['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'uncertainty', '--traj', 'medium', '--seed', '3', '--data', '100000']
epoch: 0 training_loss 0.3201538574695587 test_loss: 0.20380194187164308
epoch: 1 training_loss 0.18082193195819854 test_loss: 0.15603284835815429
epoch: 2 training_loss 0.14979702822864055 test_loss: 0.13249988555908204
epoch: 3 training_loss 0.13492196440696716 test_loss: 0.12724871635437013
epoch: 4 training_loss 0.13233790334314108 test_loss: 0.13515440225601197
epoch: 5 training_loss 0.12569881193339824 test_loss: 0.13229037523269654
epoch: 6 training_loss 0.11353961814194918 test_loss: 0.1295575499534607
epoch: 7 training_loss 0.12220875337719918 test_loss: 0.11398708820343018
epoch: 8 training_loss 0.11258429501205683 test_loss: 0.11709308624267578
epoch: 9 training_loss 0.12062359793111682 test_loss: 0.1126847505569458
epoch: 10 training_loss 0.10799618063494563 test_loss: 0.12334362268447877
epoch: 11 training_loss 0.10513214491307736 test_loss: 0.10554500818252563
epoch: 12 training_loss 0.11249644011259079 test_loss: 0.12346529960632324
epoch: 13 training_loss 0.11111998662352562 test_loss: 0.11397664546966553
epoch: 14 training_loss 0.11076686076819897 test_loss: 0.11075172424316407
epoch: 15 training_loss 0.11480457175523043 test_loss: 0.1133196473121643
epoch: 16 training_loss 0.10418173544108868 test_loss: 0.10144494771957398
epoch: 17 training_loss 0.10492749601602554 test_loss: 0.10717246532440186
epoch: 18 training_loss 0.11436740405857564 test_loss: 0.11072200536727905
epoch: 19 training_loss 0.10864077374339104 test_loss: 0.10763266086578369
epoch: 20 training_loss 0.10754246223717928 test_loss: 0.11486746072769165
epoch: 21 training_loss 0.10382163796573878 test_loss: 0.10611089468002319
epoch: 22 training_loss 0.11109755259007216 test_loss: 0.1259470224380493
epoch: 23 training_loss 0.10818423237651587 test_loss: 0.112919282913208
epoch: 24 training_loss 0.0988459388166666 test_loss: 0.12433800697326661
epoch: 25 training_loss 0.10934254322201013 test_loss: 0.11324431896209716
epoch: 26 training_loss 0.10987225290387868 test_loss: 0.11625468730926514
epoch: 27 training_loss 0.09776711862534285 test_loss: 0.1171831488609314
epoch: 28 training_loss 0.11064835850149393 test_loss: 0.09770970940589904
epoch: 29 training_loss 0.108098039701581 test_loss: 0.10165870189666748
epoch: 30 training_loss 0.10646525423973799 test_loss: 0.11729742288589477
epoch: 31 training_loss 0.09956418255344034 test_loss: 0.11735700368881226
epoch: 32 training_loss 0.11045120783150196 test_loss: 0.11835799217224122
epoch: 33 training_loss 0.10657124081626534 test_loss: 0.09410487413406372
epoch: 34 training_loss 0.10160486359149218 test_loss: 0.12091541290283203
epoch: 35 training_loss 0.10573625382035971 test_loss: 0.11709092855453491
epoch: 36 training_loss 0.0980110214278102 test_loss: 0.1098833441734314
epoch: 37 training_loss 0.10159266222268343 test_loss: 0.12390164136886597
epoch: 38 training_loss 0.10225913634523749 test_loss: 0.1022486686706543
epoch: 39 training_loss 0.102865243665874 test_loss: 0.09901013374328613
epoch: 40 training_loss 0.11335153758525848 test_loss: 0.11117397546768189
epoch: 41 training_loss 0.101487937271595 test_loss: 0.1019004225730896
epoch: 42 training_loss 0.10156559431925416 test_loss: 0.10354576110839844
epoch: 43 training_loss 0.10407515946775675 test_loss: 0.11105033159255981
epoch: 44 training_loss 0.09653383754193783 test_loss: 0.10462578535079955
epoch: 45 training_loss 0.103341845870018 test_loss: 0.1270768642425537
epoch: 46 training_loss 0.0974992697685957 test_loss: 0.09329168200492859
epoch: 47 training_loss 0.10391436817124486 test_loss: 0.12191175222396851
epoch: 48 training_loss 0.10671586271375418 test_loss: 0.10444505214691162
epoch: 49 training_loss 0.097080662176013 test_loss: 0.10622538328170776
epoch: 50 training_loss 0.10738833783194422 test_loss: 0.11635371446609497
epoch: 51 training_loss 0.09885946210473776 test_loss: 0.1012840747833252
epoch: 52 training_loss 0.10060074217617512 test_loss: 0.0857203483581543
epoch: 53 training_loss 0.0973398045822978 test_loss: 0.09691582322120666
epoch: 54 training_loss 0.10020573135465384 test_loss: 0.11235496997833253
epoch: 55 training_loss 0.10215274259448051 test_loss: 0.10389142036437989
epoch: 56 training_loss 0.1031490002013743 test_loss: 0.09624228477478028
epoch: 57 training_loss 0.09977350313216447 test_loss: 0.12561917304992676
epoch: 58 training_loss 0.1000801220536232 test_loss: 0.10572305917739869
epoch: 59 training_loss 0.104399199411273 test_loss: 0.10240778923034669
epoch: 60 training_loss 0.10475683074444532 test_loss: 0.10085631608963012
epoch: 61 training_loss 0.10577387288212776 test_loss: 0.10998115539550782
epoch: 62 training_loss 0.1022389455139637 test_loss: 0.10506104230880738
epoch: 63 training_loss 0.09374597666785121 test_loss: 0.11196157932281495
epoch: 64 training_loss 0.09541639124974609 test_loss: 0.10561327934265137
epoch: 65 training_loss 0.10181577881798148 test_loss: 0.10460654497146607
epoch: 66 training_loss 0.10100042700767517 test_loss: 0.1076666235923767
epoch: 67 training_loss 0.09576695397496224 test_loss: 0.11336995363235473
epoch: 68 training_loss 0.10338090050965548 test_loss: 0.10778497457504273
epoch: 69 training_loss 0.09921169621869921 test_loss: 0.11007894277572632
epoch: 70 training_loss 0.10576879167929291 test_loss: 0.10143423080444336
epoch: 71 training_loss 0.09858263690024614 test_loss: 0.09491544365882873
epoch: 72 training_loss 0.09712596233934163 test_loss: 0.11309264898300171
epoch: 73 training_loss 0.09569234186783433 test_loss: 0.10797277688980103
epoch: 74 training_loss 0.10202742191031575 test_loss: 0.09777401685714722
epoch: 75 training_loss 0.09927635490894318 test_loss: 0.10074350833892823
epoch: 76 training_loss 0.09144630996510386 test_loss: 0.100093412399292
epoch: 77 training_loss 0.09810744859278202 test_loss: 0.11119638681411743
epoch: 78 training_loss 0.10105073487386107 test_loss: 0.09767999649047851
epoch: 79 training_loss 0.09897225588560105 test_loss: 0.11698487997055054
epoch: 80 training_loss 0.09895585425198078 test_loss: 0.1218523621559143
epoch: 81 training_loss 0.10758972762152552 test_loss: 0.10366537570953369
epoch: 82 training_loss 0.10505596309900284 test_loss: 0.10901293754577637
epoch: 83 training_loss 0.09801037540659309 test_loss: 0.11772720813751221
epoch: 84 training_loss 0.09979883808642626 test_loss: 0.10326967239379883
epoch: 85 training_loss 0.10000626511871814 test_loss: 0.12017505168914795
epoch: 86 training_loss 0.09477841364219784 test_loss: 0.10928618907928467
epoch: 87 training_loss 0.10038637485355138 test_loss: 0.11259902715682983
epoch: 88 training_loss 0.09951369929127395 test_loss: 0.1177977442741394
epoch: 89 training_loss 0.1035109107941389 test_loss: 0.11037681102752686
epoch: 90 training_loss 0.10032323729246855 test_loss: 0.1047550082206726
epoch: 91 training_loss 0.0856148511916399 test_loss: 0.10710572004318238
epoch: 92 training_loss 0.09528257101774215 test_loss: 0.11308835744857788
epoch: 93 training_loss 0.09923675924539566 test_loss: 0.09095398783683777
epoch: 94 training_loss 0.09359103914350271 test_loss: 0.11803792715072632
epoch: 95 training_loss 0.09675819447264075 test_loss: 0.12126857042312622
epoch: 96 training_loss 0.0995531159453094 test_loss: 0.10245091915130615
epoch: 97 training_loss 0.09686669003218412 test_loss: 0.1096393346786499
epoch: 98 training_loss 0.10492478623986244 test_loss: 0.09972440004348755
epoch: 99 training_loss 0.09199055552482605 test_loss: 0.11534595489501953
epoch: 100 training_loss 0.09213565001264215 test_loss: 0.12408456802368165
epoch: 101 training_loss 0.09694145701825618 test_loss: 0.1135666012763977
epoch: 102 training_loss 0.09282748186960817 test_loss: 0.11321718692779541
epoch: 103 training_loss 0.09282260186970234 test_loss: 0.12347086668014526
epoch: 104 training_loss 0.09900910170748829 test_loss: 0.13226985931396484
epoch: 105 training_loss 0.09863751169294119 test_loss: 0.13010478019714355
epoch: 106 training_loss 0.1037945780530572 test_loss: 0.10004498958587646
epoch: 107 training_loss 0.09289035305380822 test_loss: 0.11421371698379516
epoch: 108 training_loss 0.09545044130645693 test_loss: 0.1103639006614685
epoch: 109 training_loss 0.09548391854390502 test_loss: 0.11278126239776612
epoch: 110 training_loss 0.09682123523205519 test_loss: 0.11442058086395264
epoch: 111 training_loss 0.09901059336960316 test_loss: 0.11492654085159301
epoch: 112 training_loss 0.09805545368231833 test_loss: 0.12252571582794189
epoch: 113 training_loss 0.08991619670763612 test_loss: 0.10444017648696899
epoch: 114 training_loss 0.09147010400891303 test_loss: 0.11048622131347656
epoch: 115 training_loss 0.09188317086547614 test_loss: 0.1098724365234375
epoch: 116 training_loss 0.10065108466893434 test_loss: 0.09937836527824402
epoch: 117 training_loss 0.09712274797260761 test_loss: 0.09750802516937256
epoch: 118 training_loss 0.09672618616372347 test_loss: 0.12464687824249268
epoch: 119 training_loss 0.09507726427167654 test_loss: 0.112767493724823
epoch: 120 training_loss 0.08977777391672134 test_loss: 0.11454051733016968
epoch: 121 training_loss 0.09953995894640684 test_loss: 0.1029622197151184
epoch: 122 training_loss 0.08935068322345614 test_loss: 0.09719005227088928
epoch: 123 training_loss 0.09072847194969653 test_loss: 0.12003653049468994
epoch: 124 training_loss 0.09476621886715293 test_loss: 0.11051536798477173
epoch: 125 training_loss 0.08765217058360576 test_loss: 0.10896109342575074
epoch: 126 training_loss 0.08878435846418142 test_loss: 0.13565917015075685
epoch: 127 training_loss 0.09642702704295517 test_loss: 0.12352159023284912
epoch: 128 training_loss 0.09916336949914693 test_loss: 0.12375030517578126
epoch: 129 training_loss 0.09817741505801678 test_loss: 0.10678883790969848
epoch: 130 training_loss 0.09480560451745987 test_loss: 0.10592176914215087
epoch: 131 training_loss 0.09783624382689596 test_loss: 0.10609526634216308
epoch: 132 training_loss 0.08857006279751659 test_loss: 0.10964903831481934
epoch: 133 training_loss 0.08984088368713855 test_loss: 0.09741631150245667
epoch: 134 training_loss 0.09355245588347316 test_loss: 0.11651676893234253
epoch: 135 training_loss 0.0989233997091651 test_loss: 0.10298534631729125
epoch: 136 training_loss 0.09311343516223132 test_loss: 0.1064569115638733
epoch: 137 training_loss 0.09082077419385313 test_loss: 0.11815539598464966
epoch: 138 training_loss 0.08619132995605469 test_loss: 0.10804405212402343
epoch: 139 training_loss 0.09176800415851176 test_loss: 0.11712201833724975
epoch: 140 training_loss 0.0926943315193057 test_loss: 0.11395019292831421
epoch: 141 training_loss 0.09297652367502451 test_loss: 0.10577837228775025
epoch: 142 training_loss 0.09333228640258312 test_loss: 0.09820356965065002
epoch: 143 training_loss 0.09064416863024234 test_loss: 0.11742134094238281
epoch: 144 training_loss 0.09316554555669426 test_loss: 0.11121119260787964
epoch: 145 training_loss 0.09057465890422463 test_loss: 0.11558364629745484
epoch: 146 training_loss 0.09077265182510018 test_loss: 0.09703829884529114
epoch: 147 training_loss 0.09140653610229492 test_loss: 0.09408802986145019
epoch: 148 training_loss 0.09357879543676972 test_loss: 0.12013434171676636
epoch: 149 training_loss 0.0893110483046621 test_loss: 0.11453999280929565
epoch: 0 training_loss 0.29588523164391517 test_loss: 0.20022969245910643
epoch: 1 training_loss 0.16816828027367592 test_loss: 0.13947356939315797
epoch: 2 training_loss 0.14030734799802302 test_loss: 0.13385393619537353
epoch: 3 training_loss 0.14281726855784654 test_loss: 0.12063441276550294
epoch: 4 training_loss 0.12204733289778233 test_loss: 0.11750500202178955
epoch: 5 training_loss 0.1251262315362692 test_loss: 0.11314849853515625
epoch: 6 training_loss 0.12286638580262661 test_loss: 0.1291046142578125
epoch: 7 training_loss 0.12384049776941537 test_loss: 0.13728411197662355
epoch: 8 training_loss 0.11833538375794887 test_loss: 0.10253872871398925
epoch: 9 training_loss 0.11448694743216038 test_loss: 0.12807267904281616
epoch: 10 training_loss 0.11523196067661047 test_loss: 0.10725522041320801
epoch: 11 training_loss 0.11307026762515307 test_loss: 0.11100494861602783
epoch: 12 training_loss 0.11601852357387543 test_loss: 0.11317501068115235
epoch: 13 training_loss 0.11229046739637852 test_loss: 0.11304428577423095
epoch: 14 training_loss 0.10850446386262774 test_loss: 0.11435949802398682
epoch: 15 training_loss 0.10915264043956995 test_loss: 0.11335228681564331
epoch: 16 training_loss 0.11653369914740325 test_loss: 0.10185691118240356
epoch: 17 training_loss 0.11317711818963289 test_loss: 0.10354303121566773
epoch: 18 training_loss 0.10296433225274086 test_loss: 0.09966006875038147
epoch: 19 training_loss 0.11268390001729131 test_loss: 0.10511407852172852
epoch: 20 training_loss 0.11965869691222907 test_loss: 0.12331218719482422
epoch: 21 training_loss 0.1034587636217475 test_loss: 0.10138583183288574
epoch: 22 training_loss 0.11379194408655166 test_loss: 0.09628745317459106
epoch: 23 training_loss 0.10403081558644772 test_loss: 0.11011010408401489
epoch: 24 training_loss 0.1066204410046339 test_loss: 0.12297523021697998
epoch: 25 training_loss 0.10673265874385834 test_loss: 0.1229780912399292
epoch: 26 training_loss 0.11306218983605504 test_loss: 0.10380538702011108
epoch: 27 training_loss 0.10421684885397553 test_loss: 0.1027302622795105
epoch: 28 training_loss 0.10796712130308152 test_loss: 0.11297729015350341
epoch: 29 training_loss 0.11259535046294332 test_loss: 0.09718362689018249
epoch: 30 training_loss 0.1166607477888465 test_loss: 0.12890390157699586
epoch: 31 training_loss 0.10612099135294556 test_loss: 0.09139397144317626
epoch: 32 training_loss 0.10484011851251125 test_loss: 0.09601472616195679
epoch: 33 training_loss 0.1023628681898117 test_loss: 0.11113756895065308
epoch: 34 training_loss 0.10166983377188445 test_loss: 0.10680637359619141
epoch: 35 training_loss 0.10440527457743883 test_loss: 0.09548326134681702
epoch: 36 training_loss 0.1077510241419077 test_loss: 0.127578067779541
epoch: 37 training_loss 0.10783370196819306 test_loss: 0.10372992753982543
epoch: 38 training_loss 0.11180294437333942 test_loss: 0.10499261617660523
epoch: 39 training_loss 0.10659758001565933 test_loss: 0.09738513827323914
epoch: 40 training_loss 0.0972443514689803 test_loss: 0.11131325960159302
epoch: 41 training_loss 0.1086989899724722 test_loss: 0.11570833921432495
epoch: 42 training_loss 0.10317988624796272 test_loss: 0.11468149423599243
epoch: 43 training_loss 0.10266363272443414 test_loss: 0.11229325532913208
epoch: 44 training_loss 0.10616522390395403 test_loss: 0.10971213579177856
epoch: 45 training_loss 0.10639751080423593 test_loss: 0.1065988302230835
epoch: 46 training_loss 0.10223531998693942 test_loss: 0.12237764596939087
epoch: 47 training_loss 0.09833904284983873 test_loss: 0.11177310943603516
epoch: 48 training_loss 0.1033529395237565 test_loss: 0.10504069328308105
epoch: 49 training_loss 0.10187288941815495 test_loss: 0.11694821119308471
epoch: 50 training_loss 0.11043323904275894 test_loss: 0.11361373662948608
epoch: 51 training_loss 0.10260044068098068 test_loss: 0.10402446985244751
epoch: 52 training_loss 0.10670953962951898 test_loss: 0.1121628761291504
epoch: 53 training_loss 0.11221894703805446 test_loss: 0.1285254716873169
epoch: 54 training_loss 0.10181603349745273 test_loss: 0.11348583698272705
epoch: 55 training_loss 0.10372650127857924 test_loss: 0.09460493922233582
epoch: 56 training_loss 0.10068761833012103 test_loss: 0.10468716621398926
epoch: 57 training_loss 0.10391322426497936 test_loss: 0.11704477071762084
epoch: 58 training_loss 0.09614075902849435 test_loss: 0.11027461290359497
epoch: 59 training_loss 0.09734374936670065 test_loss: 0.11550145149230957
epoch: 60 training_loss 0.10279090445488691 test_loss: 0.11716601848602295
epoch: 61 training_loss 0.10512705504894257 test_loss: 0.11093523502349853
epoch: 62 training_loss 0.1044481648877263 test_loss: 0.12506142854690552
epoch: 63 training_loss 0.10505445938557387 test_loss: 0.11197952032089234
epoch: 64 training_loss 0.1091179347038269 test_loss: 0.1180188775062561
epoch: 65 training_loss 0.1021986386179924 test_loss: 0.11000862121582031
epoch: 66 training_loss 0.10065127292647957 test_loss: 0.0912258505821228
epoch: 67 training_loss 0.10326275896281004 test_loss: 0.1039682149887085
epoch: 68 training_loss 0.1001159168407321 test_loss: 0.1051369309425354
epoch: 69 training_loss 0.11068374754860998 test_loss: 0.11296620368957519
epoch: 70 training_loss 0.09790241658687591 test_loss: 0.09283233284950257
epoch: 71 training_loss 0.10346335541456937 test_loss: 0.1160810112953186
epoch: 72 training_loss 0.11169392053037881 test_loss: 0.117694091796875
epoch: 73 training_loss 0.10326408721506596 test_loss: 0.0993494987487793
epoch: 74 training_loss 0.10585792787373066 test_loss: 0.11010420322418213
epoch: 75 training_loss 0.09961777983233333 test_loss: 0.10674110651016236
epoch: 76 training_loss 0.10580674594268202 test_loss: 0.11229307651519775
epoch: 77 training_loss 0.09827163405716419 test_loss: 0.12543938159942628
epoch: 78 training_loss 0.09539333807304501 test_loss: 0.10240378379821777
epoch: 79 training_loss 0.10951185174286365 test_loss: 0.1173891544342041
epoch: 80 training_loss 0.10289228122681379 test_loss: 0.1026442289352417
epoch: 81 training_loss 0.09920561604201794 test_loss: 0.11629730463027954
epoch: 82 training_loss 0.1101689574867487 test_loss: 0.11058148145675659
epoch: 83 training_loss 0.1029586835205555 test_loss: 0.11850830316543579
epoch: 84 training_loss 0.099639239218086 test_loss: 0.10777615308761597
epoch: 85 training_loss 0.09638222068548202 test_loss: 0.10931034088134765
epoch: 86 training_loss 0.08778407571837306 test_loss: 0.11565052270889283
epoch: 87 training_loss 0.09899992072954773 test_loss: 0.11041309833526611
epoch: 88 training_loss 0.09968000071123242 test_loss: 0.11584454774856567
epoch: 89 training_loss 0.09340997146442533 test_loss: 0.10642801523208618
epoch: 90 training_loss 0.10046686120331287 test_loss: 0.10050528049468994
epoch: 91 training_loss 0.09908064853399992 test_loss: 0.10128587484359741
epoch: 92 training_loss 0.09833961427211761 test_loss: 0.11882331371307372
epoch: 93 training_loss 0.09466562734916806 test_loss: 0.13294782638549804
epoch: 94 training_loss 0.0913666389323771 test_loss: 0.1022186040878296
epoch: 95 training_loss 0.09893646482378245 test_loss: 0.12362051010131836
epoch: 96 training_loss 0.100671266913414 test_loss: 0.11012877225875854
epoch: 97 training_loss 0.10339824881404638 test_loss: 0.11197280883789062
epoch: 98 training_loss 0.09907517354935408 test_loss: 0.116965913772583
epoch: 99 training_loss 0.0951159918680787 test_loss: 0.10937793254852295
epoch: 100 training_loss 0.09167231433093548 test_loss: 0.09592704176902771
epoch: 101 training_loss 0.09834699189290404 test_loss: 0.12123051881790162
epoch: 102 training_loss 0.09611445341259241 test_loss: 0.113003408908844
epoch: 103 training_loss 0.09690964370965957 test_loss: 0.11903995275497437
epoch: 104 training_loss 0.10094627747312188 test_loss: 0.10520488023757935
epoch: 105 training_loss 0.10072678256779909 test_loss: 0.11195718050003052
epoch: 106 training_loss 0.10092885820195079 test_loss: 0.11128666400909423
epoch: 107 training_loss 0.09296345494687558 test_loss: 0.11282291412353515
epoch: 108 training_loss 0.09591428067535163 test_loss: 0.10997581481933594
epoch: 109 training_loss 0.09852017829194665 test_loss: 0.11233744621276856
epoch: 110 training_loss 0.09804964633658528 test_loss: 0.11324402093887329
epoch: 111 training_loss 0.08953953212592751 test_loss: 0.1094308614730835
epoch: 112 training_loss 0.09529435250908136 test_loss: 0.10122134685516357
epoch: 113 training_loss 0.10055360229685903 test_loss: 0.1023136854171753
epoch: 114 training_loss 0.0952934499643743 test_loss: 0.11871259212493897
epoch: 115 training_loss 0.0948077330365777 test_loss: 0.10585694313049317
epoch: 116 training_loss 0.09551303189247846 test_loss: 0.10883442163467408
epoch: 117 training_loss 0.09604898072779179 test_loss: 0.1042888879776001
epoch: 118 training_loss 0.10054256793111563 test_loss: 0.11710233688354492
epoch: 119 training_loss 0.0974755784869194 test_loss: 0.12688331604003905
epoch: 120 training_loss 0.09128782369196414 test_loss: 0.09927942752838134
epoch: 121 training_loss 0.09586768632754683 test_loss: 0.1084296464920044
epoch: 122 training_loss 0.08480270622298121 test_loss: 0.0964510679244995
epoch: 123 training_loss 0.08966885520145297 test_loss: 0.11237683296203613
epoch: 124 training_loss 0.09401902053505182 test_loss: 0.09809199571609498
epoch: 125 training_loss 0.1013348551094532 test_loss: 0.11360703706741333
epoch: 126 training_loss 0.09067003608681262 test_loss: 0.09962919354438782
epoch: 127 training_loss 0.0905029327981174 test_loss: 0.107691490650177
epoch: 128 training_loss 0.10106752252206207 test_loss: 0.1122409462928772
epoch: 129 training_loss 0.09693783422932029 test_loss: 0.11547390222549439
epoch: 130 training_loss 0.09702783275395632 test_loss: 0.11023000478744507
epoch: 131 training_loss 0.08705300807952882 test_loss: 0.11041121482849121
epoch: 132 training_loss 0.10273883892223239 test_loss: 0.11130200624465943
epoch: 133 training_loss 0.08692297913134098 test_loss: 0.10593479871749878
epoch: 134 training_loss 0.09035220818594097 test_loss: 0.11649106740951538
epoch: 135 training_loss 0.09261595007032156 test_loss: 0.11359031200408935
epoch: 136 training_loss 0.09318588027730584 test_loss: 0.11102895736694336
epoch: 137 training_loss 0.0991280822083354 test_loss: 0.12503405809402465
epoch: 138 training_loss 0.09298707464709878 test_loss: 0.12081695795059204
epoch: 139 training_loss 0.09832300648093223 test_loss: 0.11164155006408691
epoch: 140 training_loss 0.0974642999470234 test_loss: 0.11693384647369384
epoch: 141 training_loss 0.09209322709590197 test_loss: 0.10965744256973267
epoch: 142 training_loss 0.10400865469127893 test_loss: 0.11126784086227418
epoch: 143 training_loss 0.08956571761518717 test_loss: 0.12414486408233642
epoch: 144 training_loss 0.08858572563156486 test_loss: 0.12175079584121704
epoch: 145 training_loss 0.08944758851081133 test_loss: 0.1178172469139099
epoch: 146 training_loss 0.09268392669036984 test_loss: 0.14371969699859619
epoch: 147 training_loss 0.09607800269499421 test_loss: 0.10420105457305909
epoch: 148 training_loss 0.09905163712799549 test_loss: 0.11405429840087891
epoch: 149 training_loss 0.0916127735376358 test_loss: 0.11682987213134766
epoch: 0 training_loss 0.3256625072658062 test_loss: 0.19539819955825805
epoch: 1 training_loss 0.17237476304173469 test_loss: 0.16597853899002074
epoch: 2 training_loss 0.15372426442801954 test_loss: 0.14980847835540773
epoch: 3 training_loss 0.13474549740552902 test_loss: 0.14136970043182373
epoch: 4 training_loss 0.12034910436719656 test_loss: 0.13938825130462645
epoch: 5 training_loss 0.12488869477063418 test_loss: 0.13608059883117676
epoch: 6 training_loss 0.11617057122290135 test_loss: 0.12276251316070556
epoch: 7 training_loss 0.12171451229602098 test_loss: 0.12937666177749635
epoch: 8 training_loss 0.11761479113250971 test_loss: 0.12579401731491088
epoch: 9 training_loss 0.1109192419052124 test_loss: 0.13844075202941894
epoch: 10 training_loss 0.1111585845053196 test_loss: 0.13466153144836426
epoch: 11 training_loss 0.1115852414444089 test_loss: 0.11914705038070679
epoch: 12 training_loss 0.10422852287068964 test_loss: 0.12407987117767334
epoch: 13 training_loss 0.10806310918182134 test_loss: 0.12898674011230468
epoch: 14 training_loss 0.11830500740557909 test_loss: 0.1199834942817688
epoch: 15 training_loss 0.10794964680448174 test_loss: 0.13201828002929689
epoch: 16 training_loss 0.11302987411618233 test_loss: 0.12418769598007202
epoch: 17 training_loss 0.10829620610922575 test_loss: 0.11215264797210693
epoch: 18 training_loss 0.10667204411700368 test_loss: 0.14139885902404786
epoch: 19 training_loss 0.10771727193146945 test_loss: 0.10316128730773926
epoch: 20 training_loss 0.10457198379561305 test_loss: 0.1238093376159668
epoch: 21 training_loss 0.11261389151215553 test_loss: 0.11972991228103638
epoch: 22 training_loss 0.09953834941610694 test_loss: 0.12018572092056275
epoch: 23 training_loss 0.10599255474284291 test_loss: 0.11165112257003784
epoch: 24 training_loss 0.10276165252551436 test_loss: 0.12571251392364502
epoch: 25 training_loss 0.10680888213217259 test_loss: 0.13537026643753053
epoch: 26 training_loss 0.10526227824389935 test_loss: 0.12043513059616089
epoch: 27 training_loss 0.10629925515502692 test_loss: 0.119460129737854
epoch: 28 training_loss 0.10163086563348771 test_loss: 0.1227956771850586
epoch: 29 training_loss 0.10643216667696834 test_loss: 0.12421815395355225
epoch: 30 training_loss 0.09846318839117885 test_loss: 0.12238067388534546
epoch: 31 training_loss 0.10094352779909968 test_loss: 0.11995921134948731
epoch: 32 training_loss 0.1055477587133646 test_loss: 0.1195876121520996
epoch: 33 training_loss 0.1095308625139296 test_loss: 0.11032568216323853
epoch: 34 training_loss 0.10439811730757356 test_loss: 0.1233519434928894
epoch: 35 training_loss 0.10635930314660072 test_loss: 0.13838365077972412
epoch: 36 training_loss 0.10285272415727377 test_loss: 0.11380962133407593
epoch: 37 training_loss 0.10221981739625335 test_loss: 0.10390934944152833
epoch: 38 training_loss 0.10279585743322968 test_loss: 0.12426964044570923
epoch: 39 training_loss 0.09862655555829405 test_loss: 0.09811828136444092
epoch: 40 training_loss 0.09368367597460747 test_loss: 0.12095886468887329
epoch: 41 training_loss 0.10246407872065902 test_loss: 0.1127097487449646
epoch: 42 training_loss 0.09587112007662654 test_loss: 0.10856430530548096
epoch: 43 training_loss 0.09811660308390856 test_loss: 0.1093336820602417
epoch: 44 training_loss 0.1058605944365263 test_loss: 0.11234902143478394
epoch: 45 training_loss 0.09980230312794447 test_loss: 0.1194871187210083
epoch: 46 training_loss 0.10106884513050318 test_loss: 0.11547359228134155
epoch: 47 training_loss 0.0953038802370429 test_loss: 0.11316466331481934
epoch: 48 training_loss 0.10108362022787333 test_loss: 0.13410710096359252
epoch: 49 training_loss 0.10354929342865944 test_loss: 0.09470164775848389
epoch: 50 training_loss 0.10003118772059678 test_loss: 0.11774680614471436
epoch: 51 training_loss 0.09807288637384773 test_loss: 0.11074913740158081
epoch: 52 training_loss 0.10068839650601148 test_loss: 0.11584206819534301
epoch: 53 training_loss 0.10015165684744716 test_loss: 0.1120843768119812
epoch: 54 training_loss 0.0976339546404779 test_loss: 0.11126086711883545
epoch: 55 training_loss 0.10161393512040377 test_loss: 0.12113339900970459
epoch: 56 training_loss 0.10148919899016619 test_loss: 0.10224113464355469
epoch: 57 training_loss 0.10256253466010094 test_loss: 0.11482484340667724
epoch: 58 training_loss 0.0951184325106442 test_loss: 0.11382304430007935
epoch: 59 training_loss 0.09957869991660118 test_loss: 0.12567685842514037
epoch: 60 training_loss 0.09732237687334418 test_loss: 0.11094582080841064
epoch: 61 training_loss 0.09432800250127911 test_loss: 0.10396827459335327
epoch: 62 training_loss 0.09993378482759 test_loss: 0.11104620695114135
epoch: 63 training_loss 0.09684527518227697 test_loss: 0.10408288240432739
epoch: 64 training_loss 0.09439874354749918 test_loss: 0.11417741775512695
epoch: 65 training_loss 0.0975696837157011 test_loss: 0.1259606122970581
epoch: 66 training_loss 0.10333945324644446 test_loss: 0.11287760734558105
epoch: 67 training_loss 0.10696297235786915 test_loss: 0.12130273580551147
epoch: 68 training_loss 0.09995896656066179 test_loss: 0.12734100818634034
epoch: 69 training_loss 0.09690638404339552 test_loss: 0.10788748264312745
epoch: 70 training_loss 0.09471258467063308 test_loss: 0.10220499038696289
epoch: 71 training_loss 0.10068551510572434 test_loss: 0.11436502933502198
epoch: 72 training_loss 0.0942106824927032 test_loss: 0.11691919565200806
epoch: 73 training_loss 0.09131327886134386 test_loss: 0.10943293571472168
epoch: 74 training_loss 0.10146994242444635 test_loss: 0.13388710021972655
epoch: 75 training_loss 0.09859123591333628 test_loss: 0.11143486499786377
epoch: 76 training_loss 0.10241388956084847 test_loss: 0.11145331859588622
epoch: 77 training_loss 0.08969426499679685 test_loss: 0.13321913480758668
epoch: 78 training_loss 0.09125226514413953 test_loss: 0.11520315408706665
epoch: 79 training_loss 0.096226912625134 test_loss: 0.11619869470596314
epoch: 80 training_loss 0.09125957356765867 test_loss: 0.12630136013031007
epoch: 81 training_loss 0.09557338213548064 test_loss: 0.12607152462005616
epoch: 82 training_loss 0.09947286378592253 test_loss: 0.11845873594284058
epoch: 83 training_loss 0.09352273533120752 test_loss: 0.09736552238464355
epoch: 84 training_loss 0.10399140685796737 test_loss: 0.13240958452224733
epoch: 85 training_loss 0.08989266017451883 test_loss: 0.1267000675201416
epoch: 86 training_loss 0.09681576769798994 test_loss: 0.12765846252441407
epoch: 87 training_loss 0.08936934441328048 test_loss: 0.11997958421707153
epoch: 88 training_loss 0.09572507169097662 test_loss: 0.12079206705093384
epoch: 89 training_loss 0.09189789738506078 test_loss: 0.10847078561782837
epoch: 90 training_loss 0.09095768401399255 test_loss: 0.10114775896072388
epoch: 91 training_loss 0.09833422435447574 test_loss: 0.11115732192993164
epoch: 92 training_loss 0.09579458471387625 test_loss: 0.10656224489212036
epoch: 93 training_loss 0.09250724084675312 test_loss: 0.10694322586059571
epoch: 94 training_loss 0.09691657584160566 test_loss: 0.12970504760742188
epoch: 95 training_loss 0.09725237619131803 test_loss: 0.10025699138641357
epoch: 96 training_loss 0.091710987649858 test_loss: 0.11751153469085693
epoch: 97 training_loss 0.09071992952376604 test_loss: 0.10261482000350952
epoch: 98 training_loss 0.09633788781240582 test_loss: 0.12337585687637329
epoch: 99 training_loss 0.09086037943139673 test_loss: 0.10880413055419921
epoch: 100 training_loss 0.09094510179013014 test_loss: 0.10414085388183594
epoch: 101 training_loss 0.09566190162673592 test_loss: 0.10854920148849487
epoch: 102 training_loss 0.09643438171595335 test_loss: 0.11560571193695068
epoch: 103 training_loss 0.09743849821388721 test_loss: 0.11598008871078491
epoch: 104 training_loss 0.08939178882166743 test_loss: 0.12143814563751221
epoch: 105 training_loss 0.09559500426054 test_loss: 0.11076232194900512
epoch: 106 training_loss 0.09614836852997541 test_loss: 0.126575243473053
epoch: 107 training_loss 0.09599379561841488 test_loss: 0.12392789125442505
epoch: 108 training_loss 0.09518498491495847 test_loss: 0.10549641847610473
epoch: 109 training_loss 0.09059021852910519 test_loss: 0.1254098892211914
epoch: 110 training_loss 0.09539809497073293 test_loss: 0.1240228295326233
epoch: 111 training_loss 0.08894704382866621 test_loss: 0.13032597303390503
epoch: 112 training_loss 0.08990543499588967 test_loss: 0.12496510744094849
epoch: 113 training_loss 0.0936193217523396 test_loss: 0.13128522634506226
epoch: 114 training_loss 0.08714226815849542 test_loss: 0.10356146097183228
epoch: 115 training_loss 0.09235332608222961 test_loss: 0.11283724308013916
epoch: 116 training_loss 0.09191840564832092 test_loss: 0.14091887474060058
epoch: 117 training_loss 0.09281983954831957 test_loss: 0.11185649633407593
epoch: 118 training_loss 0.08871439516544342 test_loss: 0.12347402572631835
epoch: 119 training_loss 0.08628095243126153 test_loss: 0.11792491674423218
epoch: 120 training_loss 0.09639334469102323 test_loss: 0.1416313886642456
epoch: 121 training_loss 0.08884571418166161 test_loss: 0.12468305826187134
epoch: 122 training_loss 0.0889149316214025 test_loss: 0.11806931495666503
epoch: 123 training_loss 0.08380812995135783 test_loss: 0.12390494346618652
epoch: 124 training_loss 0.09442880790680647 test_loss: 0.11180695295333862
epoch: 125 training_loss 0.09874201511964202 test_loss: 0.12046360969543457
epoch: 126 training_loss 0.09288478940725327 test_loss: 0.11897412538528443
epoch: 127 training_loss 0.08525521317496895 test_loss: 0.10889909267425538
epoch: 128 training_loss 0.08590420620515943 test_loss: 0.11620246171951294
epoch: 129 training_loss 0.09158432211726904 test_loss: 0.10935231447219848
epoch: 130 training_loss 0.08784363925457 test_loss: 0.12883267402648926
epoch: 131 training_loss 0.09391993260011076 test_loss: 0.12054201364517211
epoch: 132 training_loss 0.08774125592783094 test_loss: 0.12674187421798705
epoch: 133 training_loss 0.08680492511019111 test_loss: 0.14285532236099244
epoch: 134 training_loss 0.0948301712423563 test_loss: 0.12194684743881226
epoch: 135 training_loss 0.08649822352454066 test_loss: 0.11312943696975708
epoch: 136 training_loss 0.0949715937115252 test_loss: 0.13030765056610108
epoch: 137 training_loss 0.08649772573262453 test_loss: 0.10954904556274414
epoch: 138 training_loss 0.08562417786568403 test_loss: 0.12904388904571534
epoch: 139 training_loss 0.08890988986939191 test_loss: 0.12238184213638306
epoch: 140 training_loss 0.08839199775829912 test_loss: 0.10999449491500854
epoch: 141 training_loss 0.09085461165755987 test_loss: 0.13107086420059205
epoch: 142 training_loss 0.09011537360027433 test_loss: 0.11272040605545045
epoch: 143 training_loss 0.08474213434383274 test_loss: 0.10973691940307617
epoch: 144 training_loss 0.088610910018906 test_loss: 0.12400238513946533
epoch: 145 training_loss 0.08886319329962135 test_loss: 0.10520981550216675
epoch: 146 training_loss 0.08607364816591144 test_loss: 0.13016300201416015
epoch: 147 training_loss 0.08191348822787405 test_loss: 0.10467745065689087
epoch: 148 training_loss 0.09147144749760627 test_loss: 0.12669397592544557
epoch: 149 training_loss 0.08671219971962273 test_loss: 0.1392387866973877
epoch: 0 training_loss 0.3056240327656269 test_loss: 0.19966620206832886
epoch: 1 training_loss 0.17185777314007283 test_loss: 0.1791400909423828
epoch: 2 training_loss 0.146108709089458 test_loss: 0.15277911424636842
epoch: 3 training_loss 0.12955876737833022 test_loss: 0.15777419805526732
epoch: 4 training_loss 0.12579061310738326 test_loss: 0.13525201082229615
epoch: 5 training_loss 0.11119699954986573 test_loss: 0.1567375659942627
epoch: 6 training_loss 0.12126760497689247 test_loss: 0.11240812540054321
epoch: 7 training_loss 0.1169680330529809 test_loss: 0.14158093929290771
epoch: 8 training_loss 0.11641345508396625 test_loss: 0.11515564918518066
epoch: 9 training_loss 0.11514323778450489 test_loss: 0.13543895483016968
epoch: 10 training_loss 0.11169839583337307 test_loss: 0.11482187509536743
epoch: 11 training_loss 0.1129682369157672 test_loss: 0.13580851554870604
epoch: 12 training_loss 0.11563348768278957 test_loss: 0.1399033784866333
epoch: 13 training_loss 0.1159038259088993 test_loss: 0.13580377101898194
epoch: 14 training_loss 0.10452056916430592 test_loss: 0.12916288375854493
epoch: 15 training_loss 0.10592319380491971 test_loss: 0.13382349014282227
epoch: 16 training_loss 0.10308537656441331 test_loss: 0.10686959028244018
epoch: 17 training_loss 0.1063136561959982 test_loss: 0.1208569884300232
epoch: 18 training_loss 0.11525642037391663 test_loss: 0.12450470924377441
epoch: 19 training_loss 0.09895567316561937 test_loss: 0.11749500036239624
epoch: 20 training_loss 0.10233243811875582 test_loss: 0.11067078113555909
epoch: 21 training_loss 0.10239796888083219 test_loss: 0.10753335952758789
epoch: 22 training_loss 0.10016675043851136 test_loss: 0.11445809602737426
epoch: 23 training_loss 0.11099861960858107 test_loss: 0.11869151592254638
epoch: 24 training_loss 0.10226293796673418 test_loss: 0.13949029445648192
epoch: 25 training_loss 0.10935616969130933 test_loss: 0.12432764768600464
epoch: 26 training_loss 0.1036285399273038 test_loss: 0.11750086545944213
epoch: 27 training_loss 0.10908234735950828 test_loss: 0.11289701461791993
epoch: 28 training_loss 0.10232856124639511 test_loss: 0.12509111166000367
epoch: 29 training_loss 0.10555963352322578 test_loss: 0.10582352876663208
epoch: 30 training_loss 0.09866756174713373 test_loss: 0.11550503969192505
epoch: 31 training_loss 0.10165227893739939 test_loss: 0.13872182369232178
epoch: 32 training_loss 0.10251556972041725 test_loss: 0.11393885612487793
epoch: 33 training_loss 0.09597458766773342 test_loss: 0.12347574234008789
epoch: 34 training_loss 0.09974586373195052 test_loss: 0.13958731889724732
epoch: 35 training_loss 0.09946963293477892 test_loss: 0.13340426683425904
epoch: 36 training_loss 0.10655225794762373 test_loss: 0.12504820823669432
epoch: 37 training_loss 0.10731859352439642 test_loss: 0.12428003549575806
epoch: 38 training_loss 0.1027962601557374 test_loss: 0.10219719409942626
epoch: 39 training_loss 0.09696103114634752 test_loss: 0.12097829580307007
epoch: 40 training_loss 0.10651535488665104 test_loss: 0.12069008350372315
epoch: 41 training_loss 0.09626175915822387 test_loss: 0.11085829734802247
epoch: 42 training_loss 0.10132586214691401 test_loss: 0.12567148208618165
epoch: 43 training_loss 0.10294852687045931 test_loss: 0.12317830324172974
epoch: 44 training_loss 0.10398057304322719 test_loss: 0.11560616493225098
epoch: 45 training_loss 0.09557289019227028 test_loss: 0.11485950946807862
epoch: 46 training_loss 0.09932859219610692 test_loss: 0.12195320129394531
epoch: 47 training_loss 0.09526577908545733 test_loss: 0.10953933000564575
epoch: 48 training_loss 0.09680141359567643 test_loss: 0.1161973237991333
epoch: 49 training_loss 0.09738170329481363 test_loss: 0.11762944459915162
epoch: 50 training_loss 0.09650588653981686 test_loss: 0.1141655683517456
epoch: 51 training_loss 0.09614404808729887 test_loss: 0.11657549142837524
epoch: 52 training_loss 0.10088782846927642 test_loss: 0.11379997730255127
epoch: 53 training_loss 0.10432437971234322 test_loss: 0.11041193008422852
epoch: 54 training_loss 0.10102591890841722 test_loss: 0.12146153450012206
epoch: 55 training_loss 0.1134136281721294 test_loss: 0.12349417209625244
epoch: 56 training_loss 0.09759735723957419 test_loss: 0.1192885160446167
epoch: 57 training_loss 0.10425456862896681 test_loss: 0.12040644884109497
epoch: 58 training_loss 0.1062316370010376 test_loss: 0.12166672945022583
epoch: 59 training_loss 0.09988760996609926 test_loss: 0.11340571641921997
epoch: 60 training_loss 0.10009710799902677 test_loss: 0.12329685688018799
epoch: 61 training_loss 0.09680557571351528 test_loss: 0.11473150253295898
epoch: 62 training_loss 0.10026512714102864 test_loss: 0.11896182298660278
epoch: 63 training_loss 0.09717396672815085 test_loss: 0.11871079206466675
epoch: 64 training_loss 0.09146523650735616 test_loss: 0.11301801204681397
epoch: 65 training_loss 0.10441733751446008 test_loss: 0.11670870780944824
epoch: 66 training_loss 0.1051817624270916 test_loss: 0.11648626327514648
epoch: 67 training_loss 0.09575276827439666 test_loss: 0.139252769947052
epoch: 68 training_loss 0.09430586099624634 test_loss: 0.10952930450439453
epoch: 69 training_loss 0.10088320279493929 test_loss: 0.12705812454223633
epoch: 70 training_loss 0.09679624401032924 test_loss: 0.12442160844802856
epoch: 71 training_loss 0.0976239651069045 test_loss: 0.12030950784683228
epoch: 72 training_loss 0.09190985590219497 test_loss: 0.10968296527862549
epoch: 73 training_loss 0.10095448814332485 test_loss: 0.11905039548873901
epoch: 74 training_loss 0.10283009642735123 test_loss: 0.10236061811447143
epoch: 75 training_loss 0.10098285853862762 test_loss: 0.1011210560798645
epoch: 76 training_loss 0.09466792596504092 test_loss: 0.12077434062957763
epoch: 77 training_loss 0.09857756363227964 test_loss: 0.1266472339630127
epoch: 78 training_loss 0.09229487471282483 test_loss: 0.12104145288467408
epoch: 79 training_loss 0.09846870597451925 test_loss: 0.13075708150863646
epoch: 80 training_loss 0.09846860878169536 test_loss: 0.12121421098709106
epoch: 81 training_loss 0.09471941880881786 test_loss: 0.13257120847702025
epoch: 82 training_loss 0.09808867570012808 test_loss: 0.11646432876586914
epoch: 83 training_loss 0.10029071692377328 test_loss: 0.11088728904724121
epoch: 84 training_loss 0.09676628712564707 test_loss: 0.1104694962501526
epoch: 85 training_loss 0.10012408355250954 test_loss: 0.12026699781417846
epoch: 86 training_loss 0.09222095295786857 test_loss: 0.10402295589447022
epoch: 87 training_loss 0.09490573523566127 test_loss: 0.11270043849945069
epoch: 88 training_loss 0.09765170766040683 test_loss: 0.11036347150802613
epoch: 89 training_loss 0.09646981209516525 test_loss: 0.1309797167778015
epoch: 90 training_loss 0.09640950324013829 test_loss: 0.1075509786605835
epoch: 91 training_loss 0.10132815418764948 test_loss: 0.1103359341621399
epoch: 92 training_loss 0.09343886759132147 test_loss: 0.10172245502471924
epoch: 93 training_loss 0.09122398912906647 test_loss: 0.1346613883972168
epoch: 94 training_loss 0.09359070414677262 test_loss: 0.12255212068557739
epoch: 95 training_loss 0.10232997482642531 test_loss: 0.11171829700469971
epoch: 96 training_loss 0.09403126021847129 test_loss: 0.12229673862457276
epoch: 97 training_loss 0.10593003261834383 test_loss: 0.11362332105636597
epoch: 98 training_loss 0.09942997269332408 test_loss: 0.13633981943130494
epoch: 99 training_loss 0.09534325744956731 test_loss: 0.13251054286956787
epoch: 100 training_loss 0.09560942217707634 test_loss: 0.1269811987876892
epoch: 101 training_loss 0.09502224342897535 test_loss: 0.11283701658248901
epoch: 102 training_loss 0.08921166777610778 test_loss: 0.11357835531234742
epoch: 103 training_loss 0.09972998021170497 test_loss: 0.12206888198852539
epoch: 104 training_loss 0.09919024674221873 test_loss: 0.10757889747619628
epoch: 105 training_loss 0.09025417003780603 test_loss: 0.10392810106277466
epoch: 106 training_loss 0.0940499171987176 test_loss: 0.11588135957717896
epoch: 107 training_loss 0.09389053296297789 test_loss: 0.12509247064590454
epoch: 108 training_loss 0.09845373265445233 test_loss: 0.11627874374389649
epoch: 109 training_loss 0.08898240124806761 test_loss: 0.11612126827239991
epoch: 110 training_loss 0.09780941769480705 test_loss: 0.11421278715133668
epoch: 111 training_loss 0.09488528497517108 test_loss: 0.11829217672348022
epoch: 112 training_loss 0.09265302976593376 test_loss: 0.1210450530052185
epoch: 113 training_loss 0.09445825271308422 test_loss: 0.11528186798095703
epoch: 114 training_loss 0.0945349046960473 test_loss: 0.10839231014251709
epoch: 115 training_loss 0.09862981755286455 test_loss: 0.1303722858428955
epoch: 116 training_loss 0.09458667246624827 test_loss: 0.10936141014099121
epoch: 117 training_loss 0.0960287693887949 test_loss: 0.13652247190475464
epoch: 118 training_loss 0.09311165194958448 test_loss: 0.10295170545578003
epoch: 119 training_loss 0.08974466029554605 test_loss: 0.12218198776245118
epoch: 120 training_loss 0.09735237196087837 test_loss: 0.1257961869239807
epoch: 121 training_loss 0.08719827009364962 test_loss: 0.10718809366226197
epoch: 122 training_loss 0.1037257232889533 test_loss: 0.10408273935317994
epoch: 123 training_loss 0.09302680535241961 test_loss: 0.10576602220535278
epoch: 124 training_loss 0.09004459876567125 test_loss: 0.10672123432159424
epoch: 125 training_loss 0.08346514559350908 test_loss: 0.1097288727760315
epoch: 126 training_loss 0.09068468853831291 test_loss: 0.10973894596099854
epoch: 127 training_loss 0.089555205963552 test_loss: 0.11184234619140625
epoch: 128 training_loss 0.09090257162228227 test_loss: 0.12189953327178955
epoch: 129 training_loss 0.08902670534327627 test_loss: 0.10985850095748902
epoch: 130 training_loss 0.09386779770255088 test_loss: 0.12072298526763917
epoch: 131 training_loss 0.08714883212000131 test_loss: 0.12564482688903808
epoch: 132 training_loss 0.0879480211623013 test_loss: 0.11825377941131592
epoch: 133 training_loss 0.09217348352074622 test_loss: 0.11332696676254272
epoch: 134 training_loss 0.09684864137321711 test_loss: 0.11995606422424317
epoch: 135 training_loss 0.09728643339127302 test_loss: 0.12264578342437744
epoch: 136 training_loss 0.09469616799615324 test_loss: 0.12917253971099854
epoch: 137 training_loss 0.0865482045803219 test_loss: 0.13459761142730714
epoch: 138 training_loss 0.09608347803354263 test_loss: 0.12226744890213012
epoch: 139 training_loss 0.09039350727573037 test_loss: 0.12458003759384155
epoch: 140 training_loss 0.0924425682425499 test_loss: 0.09925802350044251
epoch: 141 training_loss 0.09301489796489477 test_loss: 0.12821775674819946
epoch: 142 training_loss 0.09269582470878959 test_loss: 0.11189415454864501
epoch: 143 training_loss 0.08230210594832897 test_loss: 0.13740923404693603
epoch: 144 training_loss 0.09750985570251941 test_loss: 0.12845486402511597
epoch: 145 training_loss 0.08461654977872968 test_loss: 0.12349295616149902
epoch: 146 training_loss 0.09495234755799174 test_loss: 0.13337208032608033
epoch: 147 training_loss 0.08630593832582235 test_loss: 0.13662338256835938
epoch: 148 training_loss 0.09119008461013436 test_loss: 0.12146328687667847
epoch: 149 training_loss 0.08459666149690748 test_loss: 0.11895023584365845
episode: 0 training return: -999.9496784089855
episode: 1 training return: -999.9671366924761
episode: 2 training return: -999.9641364617174
episode: 3 training return: -999.9742019869067
epoch: 1 test_true_pfm: -0.5608614717657797 sim_pfm: -999.9315730520115
episode: 4 training return: -999.9592523740323
episode: 5 training return: -999.9473815372351
episode: 6 training return: -999.9598867242581
episode: 7 training return: -999.9766239287062
epoch: 2 test_true_pfm: -0.9566680837827289 sim_pfm: -999.9317715332822
episode: 8 training return: -999.9778829832229
episode: 9 training return: -999.9710516179277
episode: 10 training return: -999.9517238089239
episode: 11 training return: -999.9816719937528
epoch: 3 test_true_pfm: 0.20258011454356492 sim_pfm: -999.9316317666422
episode: 12 training return: -999.9770031792827
episode: 13 training return: -999.9558894670388
episode: 14 training return: -999.9767568644645
episode: 15 training return: -999.967178470221
epoch: 4 test_true_pfm: 0.03371846288438909 sim_pfm: -999.9320198331824
episode: 16 training return: -999.9515318597279
episode: 17 training return: -999.9693932626022
episode: 18 training return: -999.9537689104803
episode: 19 training return: -999.980100280056
epoch: 5 test_true_pfm: -0.7061275795316808 sim_pfm: -999.9320397167218
episode: 20 training return: -999.9446894418887
episode: 21 training return: -999.9732416681244
episode: 22 training return: -999.9800576829093
episode: 23 training return: -999.9781204530609
epoch: 6 test_true_pfm: -0.056273118039933456 sim_pfm: -999.9315264432256
episode: 24 training return: -999.9786786594918
episode: 25 training return: -999.968586353693
episode: 26 training return: -999.9731722397399
episode: 27 training return: -999.9708757097496
epoch: 7 test_true_pfm: 0.13490579480583217 sim_pfm: -999.9316023555402
episode: 28 training return: -999.9814037828719
episode: 29 training return: -999.97152381007
episode: 30 training return: -999.9740474116412
episode: 31 training return: -999.9727812066194
epoch: 8 test_true_pfm: 0.0711678973986279 sim_pfm: -999.9319038944205
episode: 32 training return: -999.9554494794598
episode: 33 training return: -999.9737857370495
episode: 34 training return: -999.9671618772475
episode: 35 training return: -999.9751018574776
epoch: 9 test_true_pfm: -1.1519873106063736 sim_pfm: -999.9317618376573
episode: 36 training return: -999.9640342134863
episode: 37 training return: -999.9777945158622
episode: 38 training return: -999.9750265752839
episode: 39 training return: -999.9762678833767
epoch: 10 test_true_pfm: -0.40065053667994466 sim_pfm: -999.9320825711638
episode: 40 training return: -999.9721657275622
episode: 41 training return: -999.9817047437645
episode: 42 training return: -999.9713141676663
episode: 43 training return: -999.97972586029
epoch: 11 test_true_pfm: -0.3783693497353557 sim_pfm: -999.931279541846
episode: 44 training return: -999.9751858349231
episode: 45 training return: -999.9718810801317
episode: 46 training return: -999.9773575372268
episode: 47 training return: -999.9722929984151
epoch: 12 test_true_pfm: 0.0682135035872435 sim_pfm: -999.9327130154588
episode: 48 training return: -999.9747465905633
episode: 49 training return: -999.9735712296075
episode: 50 training return: -999.9761865679753
episode: 51 training return: -999.979889753668
epoch: 13 test_true_pfm: -0.26972401796999795 sim_pfm: -999.9323291102097
episode: 52 training return: -999.9736732719402
episode: 53 training return: -999.9744188365379
episode: 54 training return: -999.9800787931342
episode: 55 training return: -999.9747728179083
epoch: 14 test_true_pfm: -0.4587572027842029 sim_pfm: -999.9320500200943
episode: 56 training return: -999.9703831710123
episode: 57 training return: -999.9768425421638
episode: 58 training return: -999.9783132989648
episode: 59 training return: -999.9774001012486
epoch: 15 test_true_pfm: -0.29951556784161076 sim_pfm: -999.9314320543712
episode: 60 training return: -999.9766540138497
episode: 61 training return: -999.9766479935539
episode: 62 training return: -999.9785541162504
episode: 63 training return: -999.9796641637305
epoch: 16 test_true_pfm: -0.004375048002884426 sim_pfm: -999.9311934806525
episode: 64 training return: -999.9717435830167
episode: 65 training return: -999.979661478201
episode: 66 training return: -999.9686055068785
episode: 67 training return: -999.9750777703989
epoch: 17 test_true_pfm: -0.46434571606176744 sim_pfm: -999.9325398618844
episode: 68 training return: -999.9753372545481
episode: 69 training return: -999.973317297961
episode: 70 training return: -999.978771262658
episode: 71 training return: -999.9680333138023
epoch: 18 test_true_pfm: -0.28950466345077697 sim_pfm: -999.9322508813061
episode: 72 training return: -999.9793677237134
episode: 73 training return: -999.9759728992591
episode: 74 training return: -999.9770034245241
episode: 75 training return: -999.979881222815
epoch: 19 test_true_pfm: -0.0173877684983292 sim_pfm: -999.9321165932497
episode: 76 training return: -999.9778068207261
episode: 77 training return: -999.9785349045806
episode: 78 training return: -999.978497635391
episode: 79 training return: -999.9535404300638
epoch: 20 test_true_pfm: -0.9735797047096355 sim_pfm: -999.9314235304179
episode: 80 training return: -999.9818610183016
episode: 81 training return: -999.9812337156889
episode: 82 training return: -999.9749641366654
episode: 83 training return: -999.9515074345012
epoch: 21 test_true_pfm: 0.07068505404199693 sim_pfm: -999.9325199669065
episode: 84 training return: -999.9743264022061
episode: 85 training return: -999.9765632561457
episode: 86 training return: -999.9752231361883
episode: 87 training return: -999.9311118747446
epoch: 22 test_true_pfm: 0.035531889010186335 sim_pfm: -999.9317323509772
episode: 88 training return: -999.9740059408272
episode: 89 training return: -999.9770111200145
episode: 90 training return: -999.975761864651
episode: 91 training return: -999.9702589605756
epoch: 23 test_true_pfm: -0.04686806043079711 sim_pfm: -999.9316139798416
episode: 92 training return: -999.9236547936396
episode: 93 training return: -999.9730792494578
episode: 94 training return: -999.9518276118579
episode: 95 training return: -999.9781671619888
epoch: 24 test_true_pfm: -0.7317610829731634 sim_pfm: -999.9333547287728
episode: 96 training return: -999.9822125074833
episode: 97 training return: -999.9702955338358
episode: 98 training return: -999.9788989827241
episode: 99 training return: -999.9755980792513
epoch: 25 test_true_pfm: -0.32894626080775136 sim_pfm: -999.9322010280981
episode: 100 training return: -999.977314255706
episode: 101 training return: -999.9655758788556
episode: 102 training return: -999.9547550491299
episode: 103 training return: -999.9762430967372
epoch: 26 test_true_pfm: -0.14013642321412914 sim_pfm: -999.9319466444667
episode: 104 training return: -999.9442172799203
episode: 105 training return: -999.9764457715249
episode: 106 training return: -999.9817230094283
episode: 107 training return: -999.9762943576413
epoch: 27 test_true_pfm: 0.11700809332581082 sim_pfm: -999.9326585589273
episode: 108 training return: -999.9755805428747
episode: 109 training return: -999.9748922988606
episode: 110 training return: -999.9653132563824
episode: 111 training return: -999.9772901802814
epoch: 28 test_true_pfm: -0.3598078126963576 sim_pfm: -999.9320697105713
episode: 112 training return: -999.9796520728105
episode: 113 training return: -999.9808990350457
episode: 114 training return: -999.9801381876931
episode: 115 training return: -999.9775398656626
epoch: 29 test_true_pfm: -0.7002892128282571 sim_pfm: -999.9314405830761
episode: 116 training return: -999.9779843447742
episode: 117 training return: -999.9817444523444
episode: 118 training return: -999.9793112181195
episode: 119 training return: -999.9803338183505
epoch: 30 test_true_pfm: -0.5029343219596402 sim_pfm: -999.9314841929913
episode: 120 training return: -999.9548762928247
episode: 121 training return: -999.9747551186946
episode: 122 training return: -999.9793635678724
episode: 123 training return: -999.9680779992693
epoch: 31 test_true_pfm: -0.43444314317141713 sim_pfm: -999.9317585599055
episode: 124 training return: -999.9759899190614
episode: 125 training return: -999.9744322883146
episode: 126 training return: -999.9782386349422
episode: 127 training return: -999.9744049559152
epoch: 32 test_true_pfm: -0.3627144267354308 sim_pfm: -999.932100917447
episode: 128 training return: -999.9798417998308
episode: 129 training return: -1000.3858224109929
episode: 130 training return: -999.9749973251426
episode: 131 training return: -999.9768515336116
epoch: 33 test_true_pfm: 0.5121186037077406 sim_pfm: -999.9308597165185
episode: 132 training return: -999.9371399594971
episode: 133 training return: -999.9810984918196
episode: 134 training return: -999.9737017795323
episode: 135 training return: -999.9794078802845
epoch: 34 test_true_pfm: -0.1432372791208892 sim_pfm: -999.931702608144
episode: 136 training return: -999.9518842433005
episode: 137 training return: -999.9310600339268
episode: 138 training return: -999.9766310345942
episode: 139 training return: -999.9714802614885
epoch: 35 test_true_pfm: 0.1812035915654243 sim_pfm: -999.9316452499779
episode: 140 training return: -999.9634224368076
episode: 141 training return: -999.9818009476006
episode: 142 training return: -999.926429266625
episode: 143 training return: -999.9740940887258
epoch: 36 test_true_pfm: -0.6326230791663426 sim_pfm: -999.932141768786
episode: 144 training return: -999.9641781172008
episode: 145 training return: -999.9722411452718
episode: 146 training return: -999.9640011112832
episode: 147 training return: -999.9789576112246
epoch: 37 test_true_pfm: -0.13443781230518226 sim_pfm: -999.9321690795151
episode: 148 training return: -999.9639357798446
episode: 149 training return: -999.9757173000814
episode: 150 training return: -999.9763614979122
episode: 151 training return: -999.9760603213351
epoch: 38 test_true_pfm: -0.09255362718527431 sim_pfm: -999.9301916840072
episode: 152 training return: -999.9527457226699
episode: 153 training return: -999.9766325770383
episode: 154 training return: -999.9719336910782
episode: 155 training return: -999.9756456477095
epoch: 39 test_true_pfm: -0.6170819785538739 sim_pfm: -999.9319457226835
episode: 156 training return: -999.9776185008109
episode: 157 training return: -999.9487865507044
episode: 158 training return: -999.975877940494
episode: 159 training return: -999.9749458942151
epoch: 40 test_true_pfm: -0.3406787031113135 sim_pfm: -999.932137316719
episode: 160 training return: -999.9691415157363
episode: 161 training return: -999.9715627277402
episode: 162 training return: -999.9608849290366
episode: 163 training return: -999.9817380360995
epoch: 41 test_true_pfm: -0.232635597629495 sim_pfm: -999.9324536795084
episode: 164 training return: -999.9708201411204
episode: 165 training return: -999.9770934958808
episode: 166 training return: -999.9771403416776
episode: 167 training return: -999.9630844310329
epoch: 42 test_true_pfm: -0.03269451490759773 sim_pfm: -999.9314193312453
episode: 168 training return: -999.9751619553642
episode: 169 training return: -999.9782542142055
episode: 170 training return: -999.9586778129632
episode: 171 training return: -999.969578923531
epoch: 43 test_true_pfm: -0.08153158248507342 sim_pfm: -999.9319425297746
episode: 172 training return: -999.978219140293
episode: 173 training return: -999.9755697141612
episode: 174 training return: -999.945918382099
episode: 175 training return: -999.9790648207364
epoch: 44 test_true_pfm: 0.11192569952846165 sim_pfm: -999.9323301095113
episode: 176 training return: -999.9783150764393
episode: 177 training return: -999.9808872724453
episode: 178 training return: -999.9768291673323
episode: 179 training return: -999.9663094275445
epoch: 45 test_true_pfm: -0.09933871253132852 sim_pfm: -999.9319842534142
episode: 180 training return: -999.924498841975
episode: 181 training return: -999.9483253680406
episode: 182 training return: -999.9754558771659
episode: 183 training return: -999.978923921403
epoch: 46 test_true_pfm: 0.31543994721195406 sim_pfm: -999.9317164232289
episode: 184 training return: -999.9794826734567
episode: 185 training return: -999.9759073601209
episode: 186 training return: -999.9743985798092
episode: 187 training return: -999.9780482359529
epoch: 47 test_true_pfm: 0.4754479820383349 sim_pfm: -999.9321311320729
episode: 188 training return: -999.9770249397404
episode: 189 training return: -999.972463523309
episode: 190 training return: -999.9622654945075
episode: 191 training return: -999.9601930326583
epoch: 48 test_true_pfm: -0.5311206723834673 sim_pfm: -999.931836153232
episode: 192 training return: -999.9720217730779
episode: 193 training return: -999.9809289552878
episode: 194 training return: -999.98057044266
episode: 195 training return: -999.9577103925083
epoch: 49 test_true_pfm: -0.5196894135074075 sim_pfm: -999.9323014283219
episode: 196 training return: -999.9804477730926
episode: 197 training return: -999.9417110152685
episode: 198 training return: -999.9509761400736
episode: 199 training return: -999.9695748215507
epoch: 50 test_true_pfm: 0.4012624769243638 sim_pfm: -999.9318917385045
episode: 200 training return: -999.9829226209399
episode: 201 training return: -999.9462564225046
episode: 202 training return: -999.9703101468906
episode: 203 training return: -999.9802519769252
epoch: 51 test_true_pfm: -0.5442719266753252 sim_pfm: -999.9312653575353
episode: 204 training return: -999.9438675628446
episode: 205 training return: -999.9510430373633
episode: 206 training return: -999.9832008254649
episode: 207 training return: -999.9584477828645
epoch: 52 test_true_pfm: 0.10234233965102946 sim_pfm: -999.93182949038
episode: 208 training return: -999.9733268913832
episode: 209 training return: -999.9690339993174
episode: 210 training return: -999.9696619183072
episode: 211 training return: -999.974328273248
epoch: 53 test_true_pfm: 0.03734146647058904 sim_pfm: -999.931690733818
episode: 212 training return: -999.9529918760871
episode: 213 training return: -999.9779475494798
episode: 214 training return: -999.9786909572588
episode: 215 training return: -999.9660203415125
epoch: 54 test_true_pfm: -0.6311429995713388 sim_pfm: -999.9319518865004
episode: 216 training return: -999.973026495841
episode: 217 training return: -999.9800146257619
episode: 218 training return: -999.9743796446298
episode: 219 training return: -999.9769295627663
epoch: 55 test_true_pfm: -0.4257079595978282 sim_pfm: -999.9318901215021
episode: 220 training return: -999.9753625045558
episode: 221 training return: -999.9771875325351
episode: 222 training return: -999.970269661521
episode: 223 training return: -999.9593507831352
epoch: 56 test_true_pfm: -0.34356411398795855 sim_pfm: -999.9307265065218
episode: 224 training return: -999.9783310325008
episode: 225 training return: -999.8464583718884
episode: 226 training return: -999.9811762045753
episode: 227 training return: -999.9259406334443
epoch: 57 test_true_pfm: -1.0477506769412097 sim_pfm: -999.9321793712417
episode: 228 training return: -999.971902375106
episode: 229 training return: -999.9673214295032
episode: 230 training return: -999.9792026111891
episode: 231 training return: -999.927349142274
epoch: 58 test_true_pfm: -0.5972613804041886 sim_pfm: -999.9316765853777
episode: 232 training return: -999.9766190799966
episode: 233 training return: -999.9545182398406
episode: 234 training return: -999.9726802101553
episode: 235 training return: -999.9472712013905
epoch: 59 test_true_pfm: -0.3762137293471672 sim_pfm: -999.9319863617035
episode: 236 training return: -999.9806113815538
episode: 237 training return: -999.9583101328262
episode: 238 training return: -999.9718248940786
episode: 239 training return: -999.9794493586078
epoch: 60 test_true_pfm: -0.24159675916541037 sim_pfm: -999.9324836161827
episode: 240 training return: -999.9779573293663
episode: 241 training return: -999.9019885523703
episode: 242 training return: -999.9434489175325
episode: 243 training return: -999.9697283931744
epoch: 61 test_true_pfm: -0.08743905493489627 sim_pfm: -999.9313725151319
episode: 244 training return: -999.9619625332427
episode: 245 training return: -999.9576889822279
episode: 246 training return: -999.9743989607176
episode: 247 training return: -999.9766778533356
epoch: 62 test_true_pfm: -0.3572151565094386 sim_pfm: -999.9318114927055
episode: 248 training return: -999.9734952709322
episode: 249 training return: -999.9793820076256
episode: 250 training return: -999.9784891479405
episode: 251 training return: -999.9746107283795
epoch: 63 test_true_pfm: -0.7272745588103381 sim_pfm: -999.9321831563069
episode: 252 training return: -999.8972943786964
episode: 253 training return: -999.9832141153387
episode: 254 training return: -999.9795775572151
episode: 255 training return: -999.9759941161122
epoch: 64 test_true_pfm: -0.3020826157077827 sim_pfm: -999.9320104984563
episode: 256 training return: -999.9927536509085
episode: 257 training return: -999.9777742140311
episode: 258 training return: -999.9470542794993
episode: 259 training return: -999.9801132075632
epoch: 65 test_true_pfm: -0.12843182538527909 sim_pfm: -999.9320176292814
episode: 260 training return: -999.9751061250644
episode: 261 training return: -999.9661648074059
episode: 262 training return: -999.9741477392835
episode: 263 training return: -999.9310149685671
epoch: 66 test_true_pfm: 0.03575783793931588 sim_pfm: -999.9319481445285
episode: 264 training return: -999.9719779257584
episode: 265 training return: -999.9379582667177
episode: 266 training return: -999.9792281942501
episode: 267 training return: -999.9765764309604
epoch: 67 test_true_pfm: 0.07305705931944313 sim_pfm: -999.9319837439461
episode: 268 training return: -999.9571250922709
episode: 269 training return: -999.9775204008021
episode: 270 training return: -999.9590091076375
episode: 271 training return: -999.9817249111553
epoch: 68 test_true_pfm: -0.16140606329235643 sim_pfm: -999.9316524865507
episode: 272 training return: -999.9670601834601
episode: 273 training return: -999.9618446147726
episode: 274 training return: -999.977245684233
episode: 275 training return: -999.9742940314595
epoch: 69 test_true_pfm: -0.009917610814177377 sim_pfm: -999.9313365875478
episode: 276 training return: -999.9738249259857
episode: 277 training return: -999.9599077288862
episode: 278 training return: -999.9801933779978
episode: 279 training return: -1000.100164347169
epoch: 70 test_true_pfm: -0.18067455825491904 sim_pfm: -999.9322782489003
episode: 280 training return: -999.9806251358518
episode: 281 training return: -999.975143338318
episode: 282 training return: -999.9718334288262
episode: 283 training return: -999.9569778175911
epoch: 71 test_true_pfm: -0.6067691329544772 sim_pfm: -999.9319827717077
episode: 284 training return: -999.9755677303459
episode: 285 training return: -999.9769898200293
episode: 286 training return: -999.9765719607781
episode: 287 training return: -999.9792622347965
epoch: 72 test_true_pfm: -0.19922480091144576 sim_pfm: -999.9322487704968
episode: 288 training return: -999.9756613882994
episode: 289 training return: -999.9729213642519
episode: 290 training return: -999.9770746108745
episode: 291 training return: -999.907562293735
epoch: 73 test_true_pfm: -0.48792890492475927 sim_pfm: -999.9324730698805
episode: 292 training return: -999.9330446931846
episode: 293 training return: -999.96995209449
episode: 294 training return: -999.9791378286208
episode: 295 training return: -999.9744101889042
epoch: 74 test_true_pfm: 0.01795249848490595 sim_pfm: -999.9316793968761
episode: 296 training return: -999.9736750862427
episode: 297 training return: -999.9749112577446
episode: 298 training return: -999.9345863639991
episode: 299 training return: -999.9408630659543
epoch: 75 test_true_pfm: -0.26211782538860323 sim_pfm: -999.9323114926625
episode: 300 training return: -999.9639768791037
episode: 301 training return: -999.9586836681182
episode: 302 training return: -999.9752274475562
episode: 303 training return: -999.9689993255204
epoch: 76 test_true_pfm: -0.5336010520212338 sim_pfm: -999.9320735195752
episode: 304 training return: -999.9765495209957
episode: 305 training return: -999.978341765604
episode: 306 training return: -999.9768116972408
episode: 307 training return: -999.9784757332401
epoch: 77 test_true_pfm: -0.6002129713144465 sim_pfm: -999.9313574169355
episode: 308 training return: -999.9765474575754
episode: 309 training return: -999.9801364792263
episode: 310 training return: -999.9789029987046
episode: 311 training return: -999.9728850363313
epoch: 78 test_true_pfm: -0.012557527650883507 sim_pfm: -999.9325804887239
episode: 312 training return: -999.9766289131383
episode: 313 training return: -999.9649584633424
episode: 314 training return: -999.9766080833866
episode: 315 training return: -999.9698954718466
epoch: 79 test_true_pfm: -0.4140006928306277 sim_pfm: -999.9326487270049
episode: 316 training return: -999.968650377545
episode: 317 training return: -999.9279362215146
episode: 318 training return: -999.9603367395468
episode: 319 training return: -999.9593024344541
epoch: 80 test_true_pfm: 0.29677972793752216 sim_pfm: -999.9308687236189
episode: 320 training return: -999.9755282186921
episode: 321 training return: -999.9695720791906
episode: 322 training return: -999.9774442214585
episode: 323 training return: -999.9798401032115
epoch: 81 test_true_pfm: -0.9111355756715315 sim_pfm: -999.9316502224401
episode: 324 training return: -999.9784901285544
episode: 325 training return: -999.957747110626
episode: 326 training return: -999.9659473839324
episode: 327 training return: -999.9650983841964
epoch: 82 test_true_pfm: -0.1680019847612678 sim_pfm: -999.9319686609145
episode: 328 training return: -999.9545647294965
episode: 329 training return: -999.9766920332615
episode: 330 training return: -999.9668151259071
episode: 331 training return: -999.9062160747037
epoch: 83 test_true_pfm: -0.7958172407856813 sim_pfm: -999.93176147046
episode: 332 training return: -999.9796091642398
episode: 333 training return: -999.9694892330366
episode: 334 training return: -999.9796841592951
episode: 335 training return: -999.954911281465
epoch: 84 test_true_pfm: -0.15720809148032658 sim_pfm: -999.9320204131946
episode: 336 training return: -999.9776277172482
episode: 337 training return: -999.9734410336346
episode: 338 training return: -999.9751264104813
episode: 339 training return: -999.9723435535304
epoch: 85 test_true_pfm: -0.27477878044521914 sim_pfm: -999.9316101072286
episode: 340 training return: -999.9798432613901
episode: 341 training return: -999.9741594695109
episode: 342 training return: -999.9761661862865
episode: 343 training return: -999.9779556186539
epoch: 86 test_true_pfm: 0.18768178982569972 sim_pfm: -999.9321549748355
episode: 344 training return: -999.9781994056881
episode: 345 training return: -999.9761192409284
episode: 346 training return: -999.9762935990518
episode: 347 training return: -999.9509602041006
epoch: 87 test_true_pfm: 0.13787540141082102 sim_pfm: -999.9322617559842
episode: 348 training return: -999.9550593676801
episode: 349 training return: -999.9787634897721
episode: 350 training return: -999.9803596038363
episode: 351 training return: -999.9719147913315
epoch: 88 test_true_pfm: -0.25684694904369915 sim_pfm: -999.9325269404176
episode: 352 training return: -999.9629082498773
episode: 353 training return: -999.9278645062747
episode: 354 training return: -999.9794122538576
episode: 355 training return: -999.97244434323
epoch: 89 test_true_pfm: 0.0722999390260212 sim_pfm: -999.9316778646768
episode: 356 training return: -999.9745371187053
episode: 357 training return: -999.9611172343957
episode: 358 training return: -999.960325182551
episode: 359 training return: -999.9790997844093
epoch: 90 test_true_pfm: -0.31054875605720483 sim_pfm: -999.9312424845515
episode: 360 training return: -999.9811688316539
episode: 361 training return: -999.9747814408524
episode: 362 training return: -999.9810448978009
episode: 363 training return: -999.9736905162786
epoch: 91 test_true_pfm: -0.6953198728745136 sim_pfm: -999.9323247594343
episode: 364 training return: -999.9788631545846
episode: 365 training return: -999.9788553683467
episode: 366 training return: -999.9552398313728
episode: 367 training return: -999.9441594355508
epoch: 92 test_true_pfm: -0.7367742946213215 sim_pfm: -999.9317722320029
episode: 368 training return: -999.9545714456472
episode: 369 training return: -999.9794967448097
episode: 370 training return: -999.9548540678699
episode: 371 training return: -999.9775606465458
epoch: 93 test_true_pfm: -1.1699852721675252 sim_pfm: -999.9310998869547
episode: 372 training return: -999.9734016605917
episode: 373 training return: -999.9739633263804
episode: 374 training return: -999.9773045944447
episode: 375 training return: -999.9223849303285
epoch: 94 test_true_pfm: 0.5084205804121474 sim_pfm: -999.931495118614
episode: 376 training return: -999.979040008834
episode: 377 training return: -999.9785538488964
episode: 378 training return: -999.9365807523385
episode: 379 training return: -999.9771136324268
epoch: 95 test_true_pfm: -0.11353267658597364 sim_pfm: -999.9319667504925
episode: 380 training return: -999.9788963985367
episode: 381 training return: -999.9775776862887
episode: 382 training return: -999.9282293598276
episode: 383 training return: -999.9748270119319
epoch: 96 test_true_pfm: -0.6285470353353549 sim_pfm: -999.9320682079266
episode: 384 training return: -999.9421303095921
episode: 385 training return: -999.9623864568471
episode: 386 training return: -999.9346731348919
episode: 387 training return: -999.9052967336365
epoch: 97 test_true_pfm: -0.41625025860506887 sim_pfm: -999.9303327001179
episode: 388 training return: -999.9769154462085
episode: 389 training return: -999.9712916732116
episode: 390 training return: -999.9736970848297
episode: 391 training return: -999.9065143992216
epoch: 98 test_true_pfm: -0.4798433152972869 sim_pfm: -999.9321126406671
episode: 392 training return: -999.9803855916364
episode: 393 training return: -999.9744989484965
episode: 394 training return: -999.9210572296762
episode: 395 training return: -999.9031403188711
epoch: 99 test_true_pfm: 0.6126794310592167 sim_pfm: -999.9320408167911
episode: 396 training return: -999.9523376932568
episode: 397 training return: -999.9773233209102
episode: 398 training return: -999.9754686221216
episode: 399 training return: -999.9112591215448
epoch: 100 test_true_pfm: 0.18340750976699413 sim_pfm: -999.9319951321914
episode: 400 training return: -999.9787172426959
episode: 401 training return: -999.9592526968878
episode: 402 training return: -999.9701210465096
episode: 403 training return: -999.9797504294713
epoch: 101 test_true_pfm: -0.33976669452535274 sim_pfm: -999.9316695792319
episode: 404 training return: -999.9772601618752
episode: 405 training return: -999.9817194837731
episode: 406 training return: -999.9660539419435
episode: 407 training return: -999.9815974364896
epoch: 102 test_true_pfm: -0.5891977818768677 sim_pfm: -999.9316880267314
episode: 408 training return: -999.9743302262823
episode: 409 training return: -999.9743744973368
episode: 410 training return: -999.9590040549316
episode: 411 training return: -999.9639641015516
epoch: 103 test_true_pfm: -0.02076272505674796 sim_pfm: -999.9312372160933
episode: 412 training return: -999.9737661426763
episode: 413 training return: -999.9784763601822
episode: 414 training return: -999.9607870965601
episode: 415 training return: -999.9761533335325
epoch: 104 test_true_pfm: 0.03324103046008799 sim_pfm: -999.9316900742651
episode: 416 training return: -999.9789019759211
episode: 417 training return: -999.9773282444859
episode: 418 training return: -999.8910245722805
episode: 419 training return: -999.9661760504587
epoch: 105 test_true_pfm: 0.3656752318096624 sim_pfm: -999.9315471343233
episode: 420 training return: -999.9257071378561
episode: 421 training return: -999.9371789618858
episode: 422 training return: -999.9691531027624
episode: 423 training return: -999.975728037378
epoch: 106 test_true_pfm: -0.22893504351154725 sim_pfm: -999.932290449111
episode: 424 training return: -999.9766837372689
episode: 425 training return: -999.930425334913
episode: 426 training return: -999.9649182600081
episode: 427 training return: -999.9814604728504
epoch: 107 test_true_pfm: -0.23325536960341778 sim_pfm: -999.9312605736092
episode: 428 training return: -999.9499993645309
episode: 429 training return: -999.9815504453015
episode: 430 training return: -999.9765815972953
episode: 431 training return: -999.9539637399779
epoch: 108 test_true_pfm: -0.7835830467592698 sim_pfm: -999.9320486915764
episode: 432 training return: -999.948299129745
episode: 433 training return: -999.9402510280554
episode: 434 training return: -999.9446525134621
episode: 435 training return: -999.97579333821
epoch: 109 test_true_pfm: 0.24736007865178422 sim_pfm: -999.9314057357601
episode: 436 training return: -999.9755762177924
episode: 437 training return: -999.9758808529247
episode: 438 training return: -999.9775745849391
episode: 439 training return: -999.9226974115237
epoch: 110 test_true_pfm: -0.5046309368544221 sim_pfm: -999.9319760345984
episode: 440 training return: -999.9644818971602
episode: 441 training return: -999.9800177711387
episode: 442 training return: -999.9768722834167
episode: 443 training return: -999.9220823626025
epoch: 111 test_true_pfm: -0.5221602012143092 sim_pfm: -999.9319217451075
episode: 444 training return: -999.9723772732137
episode: 445 training return: -999.9781451196826
episode: 446 training return: -999.9744702698102
episode: 447 training return: -999.9808879446081
epoch: 112 test_true_pfm: 0.20376294429105 sim_pfm: -999.9323262996668
episode: 448 training return: -999.9762799843962
episode: 449 training return: -999.9603687754878
episode: 450 training return: -999.9800312293142
episode: 451 training return: -999.9665091394105
epoch: 113 test_true_pfm: 0.08358020211982449 sim_pfm: -999.9321476780224
episode: 452 training return: -999.9079022601687
episode: 453 training return: -999.9749914576757
episode: 454 training return: -999.9513466229864
episode: 455 training return: -999.9751586589302
epoch: 114 test_true_pfm: -0.42807749930011135 sim_pfm: -999.931780170069
episode: 456 training return: -999.9778735329061
episode: 457 training return: -999.9483856862353
episode: 458 training return: -999.9577174793089
episode: 459 training return: -999.9772255485608
epoch: 115 test_true_pfm: -0.004123745356748902 sim_pfm: -999.9321549903624
episode: 460 training return: -999.9456675717964
episode: 461 training return: -999.9764329226279
episode: 462 training return: -999.9586810780547
episode: 463 training return: -999.9573962670809
epoch: 116 test_true_pfm: -0.7357452016337903 sim_pfm: -999.931795608361
episode: 464 training return: -999.9792965490426
episode: 465 training return: -999.9775423716095
episode: 466 training return: -999.9662911266705
episode: 467 training return: -999.920275094637
epoch: 117 test_true_pfm: -0.1834509727306858 sim_pfm: -999.9321089529
episode: 468 training return: -999.9735388118203
episode: 469 training return: -999.9738551150373
episode: 470 training return: -999.9730668420675
episode: 471 training return: -999.974015757795
epoch: 118 test_true_pfm: -0.24078142362454377 sim_pfm: -999.9317681651925
episode: 472 training return: -999.9747307014051
episode: 473 training return: -999.964003919711
episode: 474 training return: -999.9710897275393
episode: 475 training return: -999.9701391407452
epoch: 119 test_true_pfm: -0.3605090714131225 sim_pfm: -999.932478830504
episode: 476 training return: -999.9780997714281
episode: 477 training return: -999.9813656929658
episode: 478 training return: -999.9367776796131
episode: 479 training return: -999.9735695242997
epoch: 120 test_true_pfm: -0.31963267081910335 sim_pfm: -999.9317139349631
episode: 480 training return: -999.9767034900743
episode: 481 training return: -999.9774193539307
episode: 482 training return: -999.9746781826586
episode: 483 training return: -999.954261960548
epoch: 121 test_true_pfm: -0.5044293961624349 sim_pfm: -999.9315994473055
episode: 484 training return: -999.9777745543117
episode: 485 training return: -999.9713968444703
episode: 486 training return: -999.9757946150391
episode: 487 training return: -999.9762057820867
epoch: 122 test_true_pfm: 0.2587309857211171 sim_pfm: -999.9315755972863
episode: 488 training return: -999.9715602389822
episode: 489 training return: -999.9761862906207
episode: 490 training return: -999.9751008500716
episode: 491 training return: -999.9808764304785
epoch: 123 test_true_pfm: -0.18431373214009916 sim_pfm: -999.9327664011344
episode: 492 training return: -999.9662782233746
episode: 493 training return: -999.9684143849457
episode: 494 training return: -999.9700216904512
episode: 495 training return: -999.973192135209
epoch: 124 test_true_pfm: 0.4666630126506499 sim_pfm: -999.9324869065985
episode: 496 training return: -999.9695825893955
episode: 497 training return: -999.9732128074122
episode: 498 training return: -999.9793101104282
episode: 499 training return: -999.950975890478
epoch: 125 test_true_pfm: 0.010330607973416783 sim_pfm: -999.9321562780788
episode: 500 training return: -999.9537226326817
episode: 501 training return: -999.9749653667471
episode: 502 training return: -999.9815709381958
episode: 503 training return: -999.9646653778502
epoch: 126 test_true_pfm: -1.560086923715694 sim_pfm: -999.9316883760151
episode: 504 training return: -999.9788079102509
episode: 505 training return: -999.9829018489713
episode: 506 training return: -999.9733335455045
episode: 507 training return: -999.96340464135
epoch: 127 test_true_pfm: -0.04753046749693435 sim_pfm: -999.9318450152347
episode: 508 training return: -999.9787028814611
episode: 509 training return: -999.962515150052
episode: 510 training return: -999.9501466532481
episode: 511 training return: -999.966960975288
epoch: 128 test_true_pfm: -0.07275540091338652 sim_pfm: -999.9320687795658
episode: 512 training return: -999.9780731010178
episode: 513 training return: -999.978062776873
episode: 514 training return: -999.9721846848723
episode: 515 training return: -999.9523214936776
epoch: 129 test_true_pfm: -0.3998225113543976 sim_pfm: -999.931977122068
episode: 516 training return: -999.9775397182236
episode: 517 training return: -999.9751323178532
episode: 518 training return: -999.949876856964
episode: 519 training return: -999.8903845593611
epoch: 130 test_true_pfm: -0.46551582746436265 sim_pfm: -999.9311536746286
episode: 520 training return: -999.9789703708228
episode: 521 training return: -999.9767692640554
episode: 522 training return: -999.9714564217984
episode: 523 training return: -999.9675355916542
epoch: 131 test_true_pfm: 0.42984370875736205 sim_pfm: -999.9320201475903
episode: 524 training return: -999.9739613588026
episode: 525 training return: -999.977163805201
episode: 526 training return: -999.9793920513239
episode: 527 training return: -999.955196748855
epoch: 132 test_true_pfm: -0.25631461716438436 sim_pfm: -999.9311068873243
episode: 528 training return: -999.9295186175342
episode: 529 training return: -999.9661580201334
episode: 530 training return: -999.9816525228512
episode: 531 training return: -999.9784091659354
epoch: 133 test_true_pfm: -0.2924371110190657 sim_pfm: -999.9316775580027
episode: 532 training return: -999.9676070341884
episode: 533 training return: -999.9774177349376
episode: 534 training return: -999.9766461313219
episode: 535 training return: -999.9679255579948
epoch: 134 test_true_pfm: 0.6286019627973516 sim_pfm: -999.9317751548223
episode: 536 training return: -999.9788975777637
episode: 537 training return: -999.9801823627008
episode: 538 training return: -999.9513658985348
episode: 539 training return: -999.9780503983253
epoch: 135 test_true_pfm: -0.07603710127994602 sim_pfm: -999.9316403054163
episode: 540 training return: -999.9747898506204
episode: 541 training return: -999.962072350029
episode: 542 training return: -999.9589883037347
episode: 543 training return: -999.9803417263735
epoch: 136 test_true_pfm: -0.05088374253206788 sim_pfm: -999.9315551358728
episode: 544 training return: -999.9491368418599
episode: 545 training return: -999.948777434776
episode: 546 training return: -999.9709730021735
episode: 547 training return: -999.9794447010702
epoch: 137 test_true_pfm: -0.3788800682473015 sim_pfm: -999.9320323923527
episode: 548 training return: -999.8955822100614
episode: 549 training return: -999.9787332830656
episode: 550 training return: -999.9759588005011
episode: 551 training return: -999.9808135287976
epoch: 138 test_true_pfm: 0.4122984898446791 sim_pfm: -999.9313348334518
episode: 552 training return: -999.9782912640671
episode: 553 training return: -999.9591165384884
episode: 554 training return: -999.9779710438227
episode: 555 training return: -999.9671914749466
epoch: 139 test_true_pfm: -0.08607199135679751 sim_pfm: -999.931741234057
episode: 556 training return: -999.972750282363
episode: 557 training return: -999.9799320802011
episode: 558 training return: -999.9506153599467
episode: 559 training return: -999.9593986950669
epoch: 140 test_true_pfm: 0.44258843602194925 sim_pfm: -999.9312907564062
episode: 560 training return: -999.9795833677374
episode: 561 training return: -999.974899826023
episode: 562 training return: -999.9791937557469
episode: 563 training return: -999.9734603461275
epoch: 141 test_true_pfm: -0.07078329343857244 sim_pfm: -999.931406161055
episode: 564 training return: -999.9797828812996
episode: 565 training return: -999.978662195727
episode: 566 training return: -999.976376575429
episode: 567 training return: -999.9631612671135
epoch: 142 test_true_pfm: -0.5404489379550502 sim_pfm: -999.9324963521589
episode: 568 training return: -999.9464977180273
episode: 569 training return: -999.9755332511833
episode: 570 training return: -999.952915773785
episode: 571 training return: -999.9725552283289
epoch: 143 test_true_pfm: -0.4458503336112838 sim_pfm: -999.9326263811059
episode: 572 training return: -999.9788269605441
episode: 573 training return: -999.9771061756418
episode: 574 training return: -999.9582463057731
episode: 575 training return: -999.9716595036681
epoch: 144 test_true_pfm: 0.47478294800951915 sim_pfm: -999.9319311333619
episode: 576 training return: -999.906509690994
episode: 577 training return: -999.9810232414463
episode: 578 training return: -999.9792067833316
episode: 579 training return: -999.954529708399
epoch: 145 test_true_pfm: -0.5702686722804615 sim_pfm: -999.9319741808157
episode: 580 training return: -999.9771339482423
episode: 581 training return: -999.9792961873246
episode: 582 training return: -999.9806753448863
episode: 583 training return: -999.9771298737589
epoch: 146 test_true_pfm: 0.4709656066541254 sim_pfm: -999.931532417433
episode: 584 training return: -999.9802306129343
episode: 585 training return: -999.9786583670932
episode: 586 training return: -999.9773294271657
episode: 587 training return: -999.9761533646899
epoch: 147 test_true_pfm: 0.06650489486842438 sim_pfm: -999.9319748325369
episode: 588 training return: -999.9529038044287
episode: 589 training return: -999.9767201810619
episode: 590 training return: -999.9646955498775
episode: 591 training return: -999.977953833294
epoch: 148 test_true_pfm: -0.5166087883046959 sim_pfm: -999.9317107735769
episode: 592 training return: -999.9621303923042
episode: 593 training return: -999.9745796599008
episode: 594 training return: -999.9764796764108
episode: 595 training return: -999.9635341177511
epoch: 149 test_true_pfm: -0.8788849100959718 sim_pfm: -999.9305330948
episode: 596 training return: -999.9437333605542
episode: 597 training return: -999.9800793905889
episode: 598 training return: -999.9821409905994
episode: 599 training return: -999.9765022376607
epoch: 150 test_true_pfm: -0.6152735136066443 sim_pfm: -999.9319195119896
