['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'uncertainty', '--traj', 'medium', '--seed', '1', '--data', '100000']
epoch: 0 training_loss 0.32733997717499735 test_loss: 0.21133115291595458
epoch: 1 training_loss 0.16860642723739147 test_loss: 0.16396105289459229
epoch: 2 training_loss 0.14678535632789136 test_loss: 0.15020233392715454
epoch: 3 training_loss 0.13184019308537245 test_loss: 0.1442469358444214
epoch: 4 training_loss 0.13149007491767406 test_loss: 0.109611976146698
epoch: 5 training_loss 0.12290905855596065 test_loss: 0.13061672449111938
epoch: 6 training_loss 0.12688390266150237 test_loss: 0.12889374494552613
epoch: 7 training_loss 0.11518389564007521 test_loss: 0.13673319816589355
epoch: 8 training_loss 0.11154760595411062 test_loss: 0.11945872306823731
epoch: 9 training_loss 0.12068623464554548 test_loss: 0.12063332796096801
epoch: 10 training_loss 0.10486758070066571 test_loss: 0.13747116327285766
epoch: 11 training_loss 0.11200440606102348 test_loss: 0.12735896110534667
epoch: 12 training_loss 0.10464572383090853 test_loss: 0.11072802543640137
epoch: 13 training_loss 0.11122388230636716 test_loss: 0.1320735454559326
epoch: 14 training_loss 0.11174726501107216 test_loss: 0.10835275650024415
epoch: 15 training_loss 0.1093060664087534 test_loss: 0.11641262769699097
epoch: 16 training_loss 0.10713210966438055 test_loss: 0.12415429353713989
epoch: 17 training_loss 0.10853735079988837 test_loss: 0.1195941686630249
epoch: 18 training_loss 0.10728869907557964 test_loss: 0.13105274438858033
epoch: 19 training_loss 0.10435228008776903 test_loss: 0.12037242650985717
epoch: 20 training_loss 0.1077755231782794 test_loss: 0.1254824161529541
epoch: 21 training_loss 0.10216458613052964 test_loss: 0.10873188972473144
epoch: 22 training_loss 0.10672489939257503 test_loss: 0.11738097667694092
epoch: 23 training_loss 0.10597332892939448 test_loss: 0.11904594898223878
epoch: 24 training_loss 0.10300365418195724 test_loss: 0.1132322072982788
epoch: 25 training_loss 0.10335012152791023 test_loss: 0.12949439287185668
epoch: 26 training_loss 0.10883667860180139 test_loss: 0.12775235176086425
epoch: 27 training_loss 0.1088579229079187 test_loss: 0.11159182786941528
epoch: 28 training_loss 0.1030757562071085 test_loss: 0.12813332080841064
epoch: 29 training_loss 0.1075181582570076 test_loss: 0.11048146486282348
epoch: 30 training_loss 0.10519129304215312 test_loss: 0.10486824512481689
epoch: 31 training_loss 0.10897663496434688 test_loss: 0.1210514783859253
epoch: 32 training_loss 0.10423354182392358 test_loss: 0.11644006967544555
epoch: 33 training_loss 0.10337771572172642 test_loss: 0.11864176988601685
epoch: 34 training_loss 0.09449372243136167 test_loss: 0.10053374767303466
epoch: 35 training_loss 0.09992710812017322 test_loss: 0.11189665794372558
epoch: 36 training_loss 0.09721711181104183 test_loss: 0.11853282451629639
epoch: 37 training_loss 0.10930675534531474 test_loss: 0.1018274188041687
epoch: 38 training_loss 0.10163486817851662 test_loss: 0.12985749244689943
epoch: 39 training_loss 0.10619629446417093 test_loss: 0.13743599653244018
epoch: 40 training_loss 0.100796715952456 test_loss: 0.12371435165405273
epoch: 41 training_loss 0.09678675509989261 test_loss: 0.12088900804519653
epoch: 42 training_loss 0.10260181020945311 test_loss: 0.10092931985855103
epoch: 43 training_loss 0.09354011526331306 test_loss: 0.10832724571228028
epoch: 44 training_loss 0.10068467669188977 test_loss: 0.10187734365463257
epoch: 45 training_loss 0.09633488297462463 test_loss: 0.11107230186462402
epoch: 46 training_loss 0.10262128077447415 test_loss: 0.11060680150985717
epoch: 47 training_loss 0.10033609172329307 test_loss: 0.10307414531707763
epoch: 48 training_loss 0.09880098380148411 test_loss: 0.11536189317703247
epoch: 49 training_loss 0.0997434252500534 test_loss: 0.11389766931533814
epoch: 50 training_loss 0.09845948463305831 test_loss: 0.11303552389144897
epoch: 51 training_loss 0.10299346428364516 test_loss: 0.10675673484802246
epoch: 52 training_loss 0.10254234855994582 test_loss: 0.10157434940338135
epoch: 53 training_loss 0.10123506385833025 test_loss: 0.10946371555328369
epoch: 54 training_loss 0.09657620750367642 test_loss: 0.10837584733963013
epoch: 55 training_loss 0.09926616271957756 test_loss: 0.10857205390930176
epoch: 56 training_loss 0.09199569024145603 test_loss: 0.11337321996688843
epoch: 57 training_loss 0.09794283017516137 test_loss: 0.12103499174118042
epoch: 58 training_loss 0.10018539417535066 test_loss: 0.10406074523925782
epoch: 59 training_loss 0.10387913189828396 test_loss: 0.09525827169418336
epoch: 60 training_loss 0.1005867426469922 test_loss: 0.11253300905227662
epoch: 61 training_loss 0.09443602707237005 test_loss: 0.08834763765335082
epoch: 62 training_loss 0.09826044242829085 test_loss: 0.11605397462844849
epoch: 63 training_loss 0.099205511957407 test_loss: 0.11765894889831544
epoch: 64 training_loss 0.09367302004247904 test_loss: 0.10871837139129639
epoch: 65 training_loss 0.08759826477617025 test_loss: 0.11858311891555787
epoch: 66 training_loss 0.10889564301818609 test_loss: 0.11115714311599731
epoch: 67 training_loss 0.10100815936923027 test_loss: 0.11546655893325805
epoch: 68 training_loss 0.09114167858846485 test_loss: 0.11917996406555176
epoch: 69 training_loss 0.09762825105339289 test_loss: 0.11070339679718018
epoch: 70 training_loss 0.10057296367362141 test_loss: 0.11251249313354492
epoch: 71 training_loss 0.09964991491287947 test_loss: 0.11272532939910888
epoch: 72 training_loss 0.09845074532553554 test_loss: 0.11170146465301514
epoch: 73 training_loss 0.1019382268935442 test_loss: 0.12228440046310425
epoch: 74 training_loss 0.0994601083919406 test_loss: 0.09949274063110351
epoch: 75 training_loss 0.09759408356621861 test_loss: 0.11443186998367309
epoch: 76 training_loss 0.09049813017249107 test_loss: 0.10280408859252929
epoch: 77 training_loss 0.09428398281335831 test_loss: 0.11007344722747803
epoch: 78 training_loss 0.0959807750582695 test_loss: 0.10423974990844727
epoch: 79 training_loss 0.09593466291204095 test_loss: 0.11182562112808228
epoch: 80 training_loss 0.10098179474473 test_loss: 0.12376265525817871
epoch: 81 training_loss 0.09575950616970658 test_loss: 0.11404727697372437
epoch: 82 training_loss 0.09399373851716518 test_loss: 0.13536118268966674
epoch: 83 training_loss 0.10025704378262162 test_loss: 0.11041984558105469
epoch: 84 training_loss 0.09615887828171253 test_loss: 0.097538423538208
epoch: 85 training_loss 0.0946632275916636 test_loss: 0.11081972122192382
epoch: 86 training_loss 0.09486865954473615 test_loss: 0.10064767599105835
epoch: 87 training_loss 0.09656713718548417 test_loss: 0.10227923393249512
epoch: 88 training_loss 0.08889870829880238 test_loss: 0.10957682132720947
epoch: 89 training_loss 0.102419195137918 test_loss: 0.10487756729125977
epoch: 90 training_loss 0.0996411469206214 test_loss: 0.09716131091117859
epoch: 91 training_loss 0.09594747060909867 test_loss: 0.13459631204605102
epoch: 92 training_loss 0.10036470863968133 test_loss: 0.11722114086151122
epoch: 93 training_loss 0.09974600674584508 test_loss: 0.11299502849578857
epoch: 94 training_loss 0.09555119439959527 test_loss: 0.1039425253868103
epoch: 95 training_loss 0.09184070764109492 test_loss: 0.10716443061828614
epoch: 96 training_loss 0.1006813089735806 test_loss: 0.10419059991836548
epoch: 97 training_loss 0.09828741777688264 test_loss: 0.10120742321014405
epoch: 98 training_loss 0.09264878567308188 test_loss: 0.10291477441787719
epoch: 99 training_loss 0.09789065603166819 test_loss: 0.10512697696685791
epoch: 100 training_loss 0.10288866385817527 test_loss: 0.10010621547698975
epoch: 101 training_loss 0.09737086199223995 test_loss: 0.09520304799079896
epoch: 102 training_loss 0.09756138633936644 test_loss: 0.11420983076095581
epoch: 103 training_loss 0.09025593617931009 test_loss: 0.11335146427154541
epoch: 104 training_loss 0.09084142895415426 test_loss: 0.1111606240272522
epoch: 105 training_loss 0.09490813814103603 test_loss: 0.09021695256233216
epoch: 106 training_loss 0.09551916096359492 test_loss: 0.10646330118179322
epoch: 107 training_loss 0.09968987464904785 test_loss: 0.10165292024612427
epoch: 108 training_loss 0.09851518595591187 test_loss: 0.11782886981964111
epoch: 109 training_loss 0.09687652621418237 test_loss: 0.10869542360305787
epoch: 110 training_loss 0.09092493623495101 test_loss: 0.11442910432815552
epoch: 111 training_loss 0.09327231513336301 test_loss: 0.10609951019287109
epoch: 112 training_loss 0.09265206310898065 test_loss: 0.1020621657371521
epoch: 113 training_loss 0.09770985079929233 test_loss: 0.09772025942802429
epoch: 114 training_loss 0.0950654536485672 test_loss: 0.10783381462097168
epoch: 115 training_loss 0.09874112695455552 test_loss: 0.1178774118423462
epoch: 116 training_loss 0.09490921398624778 test_loss: 0.10866177082061768
epoch: 117 training_loss 0.08942747943103313 test_loss: 0.11080828905105591
epoch: 118 training_loss 0.09411604348570109 test_loss: 0.1220130443572998
epoch: 119 training_loss 0.09414046924561262 test_loss: 0.1087113857269287
epoch: 120 training_loss 0.09343367425724863 test_loss: 0.13210316896438598
epoch: 121 training_loss 0.09658846247941255 test_loss: 0.12494767904281616
epoch: 122 training_loss 0.09834769014269114 test_loss: 0.0980542004108429
epoch: 123 training_loss 0.09994599089026451 test_loss: 0.11748521327972412
epoch: 124 training_loss 0.08503102688118816 test_loss: 0.10279667377471924
epoch: 125 training_loss 0.0925884641893208 test_loss: 0.1075998067855835
epoch: 126 training_loss 0.09151814641430973 test_loss: 0.1299458146095276
epoch: 127 training_loss 0.09112553868442774 test_loss: 0.1257344126701355
epoch: 128 training_loss 0.09667280204594135 test_loss: 0.11382894515991211
epoch: 129 training_loss 0.08761003371328116 test_loss: 0.1305290699005127
epoch: 130 training_loss 0.08737508255057037 test_loss: 0.11224006414413452
epoch: 131 training_loss 0.08830800907686352 test_loss: 0.09937348365783691
epoch: 132 training_loss 0.08982551641762257 test_loss: 0.11819020509719849
epoch: 133 training_loss 0.09981681181117892 test_loss: 0.10786247253417969
epoch: 134 training_loss 0.0863686629012227 test_loss: 0.09520562887191772
epoch: 135 training_loss 0.10125950379297137 test_loss: 0.11144514083862304
epoch: 136 training_loss 0.09109812691807746 test_loss: 0.1157188057899475
epoch: 137 training_loss 0.08787205032072962 test_loss: 0.11196779012680054
epoch: 138 training_loss 0.09469854805618524 test_loss: 0.0981545865535736
epoch: 139 training_loss 0.09228252090513706 test_loss: 0.1031480073928833
epoch: 140 training_loss 0.09389678426086903 test_loss: 0.10911147594451905
epoch: 141 training_loss 0.0964323447830975 test_loss: 0.11845208406448364
epoch: 142 training_loss 0.09349619327113032 test_loss: 0.10219038724899292
epoch: 143 training_loss 0.09010875664651394 test_loss: 0.11138356924057007
epoch: 144 training_loss 0.08740637348964811 test_loss: 0.12010133266448975
epoch: 145 training_loss 0.09059714606031775 test_loss: 0.10462833642959594
epoch: 146 training_loss 0.09025450617074966 test_loss: 0.11396851539611816
epoch: 147 training_loss 0.09166463367640972 test_loss: 0.10345498323440552
epoch: 148 training_loss 0.08946483446285129 test_loss: 0.10771726369857788
epoch: 149 training_loss 0.08999342415481806 test_loss: 0.10119258165359497
epoch: 0 training_loss 0.302070621624589 test_loss: 0.18166881799697876
epoch: 1 training_loss 0.17920527294278144 test_loss: 0.1488014578819275
epoch: 2 training_loss 0.14671520184725523 test_loss: 0.15533733367919922
epoch: 3 training_loss 0.13816616017371416 test_loss: 0.1376744508743286
epoch: 4 training_loss 0.12697817783802748 test_loss: 0.12204114198684693
epoch: 5 training_loss 0.11871912378817796 test_loss: 0.11953545808792114
epoch: 6 training_loss 0.12365253370255232 test_loss: 0.12314324378967285
epoch: 7 training_loss 0.11698341380804778 test_loss: 0.1211808443069458
epoch: 8 training_loss 0.11234855577349663 test_loss: 0.12538750171661378
epoch: 9 training_loss 0.12271472308784723 test_loss: 0.11971139907836914
epoch: 10 training_loss 0.11964964602142572 test_loss: 0.11255419254302979
epoch: 11 training_loss 0.11578384902328252 test_loss: 0.12218834161758423
epoch: 12 training_loss 0.11956185415387154 test_loss: 0.11403547525405884
epoch: 13 training_loss 0.11679718468338252 test_loss: 0.11890307664871216
epoch: 14 training_loss 0.1112841417081654 test_loss: 0.1142689824104309
epoch: 15 training_loss 0.11658839870244264 test_loss: 0.11950031518936158
epoch: 16 training_loss 0.10985468465834856 test_loss: 0.11891121864318847
epoch: 17 training_loss 0.11088288312777878 test_loss: 0.12025394439697265
epoch: 18 training_loss 0.11455963879823684 test_loss: 0.10088281631469727
epoch: 19 training_loss 0.10505493013188243 test_loss: 0.11224106550216675
epoch: 20 training_loss 0.1049040823802352 test_loss: 0.11035581827163696
epoch: 21 training_loss 0.12006616163998843 test_loss: 0.08863771557807923
epoch: 22 training_loss 0.11321533650159836 test_loss: 0.11078132390975952
epoch: 23 training_loss 0.11018872357904912 test_loss: 0.0990288257598877
epoch: 24 training_loss 0.10039997756481171 test_loss: 0.11117448806762695
epoch: 25 training_loss 0.10934500532224774 test_loss: 0.10354747772216796
epoch: 26 training_loss 0.10464241411536931 test_loss: 0.11252868175506592
epoch: 27 training_loss 0.1071242162771523 test_loss: 0.11067278385162353
epoch: 28 training_loss 0.11515349306166173 test_loss: 0.11314756870269775
epoch: 29 training_loss 0.11231940474361181 test_loss: 0.10859589576721192
epoch: 30 training_loss 0.10646939178928733 test_loss: 0.11301814317703247
epoch: 31 training_loss 0.10571382325142623 test_loss: 0.10895388126373291
epoch: 32 training_loss 0.10541531775146723 test_loss: 0.1148659348487854
epoch: 33 training_loss 0.098994159437716 test_loss: 0.10931082963943481
epoch: 34 training_loss 0.10841463414952159 test_loss: 0.10161033868789673
epoch: 35 training_loss 0.09980524562299252 test_loss: 0.11600328683853149
epoch: 36 training_loss 0.10769365396350622 test_loss: 0.11604249477386475
epoch: 37 training_loss 0.11758358854800463 test_loss: 0.11147421598434448
epoch: 38 training_loss 0.11585617516189814 test_loss: 0.09597017168998719
epoch: 39 training_loss 0.10407620925456286 test_loss: 0.11323577165603638
epoch: 40 training_loss 0.11022399082779884 test_loss: 0.11237640380859375
epoch: 41 training_loss 0.10147017115727067 test_loss: 0.10596342086791992
epoch: 42 training_loss 0.10773856688290834 test_loss: 0.1005474328994751
epoch: 43 training_loss 0.09998768731951714 test_loss: 0.10583771467208862
epoch: 44 training_loss 0.1011536630988121 test_loss: 0.1213732123374939
epoch: 45 training_loss 0.09724984455853701 test_loss: 0.1188477635383606
epoch: 46 training_loss 0.11052868951112033 test_loss: 0.11023237705230712
epoch: 47 training_loss 0.1066711832024157 test_loss: 0.09805408120155334
epoch: 48 training_loss 0.1075363102927804 test_loss: 0.10501933097839355
epoch: 49 training_loss 0.10239120589569212 test_loss: 0.11256661415100097
epoch: 50 training_loss 0.10121138056740164 test_loss: 0.11342511177062989
epoch: 51 training_loss 0.10230570420622825 test_loss: 0.10338997840881348
epoch: 52 training_loss 0.09776226375252009 test_loss: 0.10915448665618896
epoch: 53 training_loss 0.09931250279769302 test_loss: 0.10972630977630615
epoch: 54 training_loss 0.11101519286632539 test_loss: 0.10116194486618042
epoch: 55 training_loss 0.10479004306718707 test_loss: 0.11826744079589843
epoch: 56 training_loss 0.09791834959760308 test_loss: 0.10200279951095581
epoch: 57 training_loss 0.10448652997612953 test_loss: 0.09766197800636292
epoch: 58 training_loss 0.10553066322579979 test_loss: 0.09039846062660217
epoch: 59 training_loss 0.09643708048388362 test_loss: 0.1266900658607483
epoch: 60 training_loss 0.10512740282341838 test_loss: 0.10158429145812989
epoch: 61 training_loss 0.10750773133710027 test_loss: 0.11290215253829956
epoch: 62 training_loss 0.10529787670820952 test_loss: 0.12753491401672362
epoch: 63 training_loss 0.09254138886928559 test_loss: 0.09436709880828857
epoch: 64 training_loss 0.10054496651515364 test_loss: 0.10045627355575562
epoch: 65 training_loss 0.10022791307419539 test_loss: 0.1112529993057251
epoch: 66 training_loss 0.10494735445827245 test_loss: 0.09446324706077576
epoch: 67 training_loss 0.10744248831644654 test_loss: 0.11610392332077027
epoch: 68 training_loss 0.09759849961847067 test_loss: 0.12942056655883788
epoch: 69 training_loss 0.09948876971378923 test_loss: 0.11102547645568847
epoch: 70 training_loss 0.09882468551397323 test_loss: 0.09857661128044129
epoch: 71 training_loss 0.09904749494045972 test_loss: 0.09528685212135315
epoch: 72 training_loss 0.09716675780713559 test_loss: 0.10215356349945068
epoch: 73 training_loss 0.09761352179571986 test_loss: 0.10431562662124634
epoch: 74 training_loss 0.10557358004152775 test_loss: 0.11797058582305908
epoch: 75 training_loss 0.09871202945709229 test_loss: 0.10875875949859619
epoch: 76 training_loss 0.10438382117077709 test_loss: 0.10301196575164795
epoch: 77 training_loss 0.10282374044880271 test_loss: 0.11889384984970093
epoch: 78 training_loss 0.10559526344761253 test_loss: 0.11103957891464233
epoch: 79 training_loss 0.10040838569402695 test_loss: 0.11612825393676758
epoch: 80 training_loss 0.10294385647401214 test_loss: 0.09559187293052673
epoch: 81 training_loss 0.10233395835384726 test_loss: 0.10310695171356202
epoch: 82 training_loss 0.09936605831608177 test_loss: 0.1309174418449402
epoch: 83 training_loss 0.09360517954453826 test_loss: 0.12033008337020874
epoch: 84 training_loss 0.10216858461499215 test_loss: 0.10774236917495728
epoch: 85 training_loss 0.09714885799214244 test_loss: 0.10530211925506591
epoch: 86 training_loss 0.09598065067082644 test_loss: 0.11539199352264404
epoch: 87 training_loss 0.10605941662564873 test_loss: 0.10689125061035157
epoch: 88 training_loss 0.0974339746683836 test_loss: 0.12577242851257325
epoch: 89 training_loss 0.10699401054531336 test_loss: 0.0962806761264801
epoch: 90 training_loss 0.10161171032115818 test_loss: 0.106201434135437
epoch: 91 training_loss 0.09919314792379737 test_loss: 0.1147118091583252
epoch: 92 training_loss 0.10669798502698541 test_loss: 0.10550051927566528
epoch: 93 training_loss 0.09859391366131603 test_loss: 0.12110257148742676
epoch: 94 training_loss 0.09200116526335478 test_loss: 0.12026901245117187
epoch: 95 training_loss 0.10263875994831323 test_loss: 0.11126861572265626
epoch: 96 training_loss 0.09971779489889741 test_loss: 0.12842872142791747
epoch: 97 training_loss 0.09956447915174067 test_loss: 0.09658107161521912
epoch: 98 training_loss 0.09710802368819714 test_loss: 0.1022464394569397
epoch: 99 training_loss 0.0991048363596201 test_loss: 0.0912087082862854
epoch: 100 training_loss 0.09791211917996406 test_loss: 0.11721738576889038
epoch: 101 training_loss 0.09817168843001127 test_loss: 0.09786074757575988
epoch: 102 training_loss 0.10208000387996435 test_loss: 0.1092877984046936
epoch: 103 training_loss 0.09858831046149134 test_loss: 0.10566648244857788
epoch: 104 training_loss 0.09462578689679503 test_loss: 0.11743841171264649
epoch: 105 training_loss 0.09824110094457865 test_loss: 0.10675339698791504
epoch: 106 training_loss 0.10512717867270112 test_loss: 0.11742010116577148
epoch: 107 training_loss 0.09635270340368152 test_loss: 0.09605527520179749
epoch: 108 training_loss 0.09922251516021788 test_loss: 0.10832685232162476
epoch: 109 training_loss 0.09578189773485064 test_loss: 0.12661702632904054
epoch: 110 training_loss 0.09692501187324525 test_loss: 0.10354951620101929
epoch: 111 training_loss 0.09963161382824183 test_loss: 0.11796852350234985
epoch: 112 training_loss 0.10159105483442545 test_loss: 0.11200435161590576
epoch: 113 training_loss 0.0978115339577198 test_loss: 0.1264404296875
epoch: 114 training_loss 0.10000524392351508 test_loss: 0.09689252376556397
epoch: 115 training_loss 0.09969300264492631 test_loss: 0.11047575473785401
epoch: 116 training_loss 0.0967989246174693 test_loss: 0.12975128889083862
epoch: 117 training_loss 0.0969314919412136 test_loss: 0.12299323081970215
epoch: 118 training_loss 0.0913750469405204 test_loss: 0.09767131209373474
epoch: 119 training_loss 0.09179420541971922 test_loss: 0.11762473583221436
epoch: 120 training_loss 0.09570403680205346 test_loss: 0.10779300928115845
epoch: 121 training_loss 0.09865909561514855 test_loss: 0.1310235381126404
epoch: 122 training_loss 0.10178276078775525 test_loss: 0.11634587049484253
epoch: 123 training_loss 0.09883852574974299 test_loss: 0.0946759283542633
epoch: 124 training_loss 0.09399305073544383 test_loss: 0.11319248676300049
epoch: 125 training_loss 0.10001815699040889 test_loss: 0.11069458723068237
epoch: 126 training_loss 0.09398101355880499 test_loss: 0.10759443044662476
epoch: 127 training_loss 0.09684967257082462 test_loss: 0.13318017721176148
epoch: 128 training_loss 0.09289292082190513 test_loss: 0.08680286407470703
epoch: 129 training_loss 0.09089890852570534 test_loss: 0.08951813578605652
epoch: 130 training_loss 0.10753105539828539 test_loss: 0.11789923906326294
epoch: 131 training_loss 0.10662888012826442 test_loss: 0.11537119150161743
epoch: 132 training_loss 0.1000589420646429 test_loss: 0.10707948207855225
epoch: 133 training_loss 0.09379955120384693 test_loss: 0.11791290044784546
epoch: 134 training_loss 0.09545325174927712 test_loss: 0.11087719202041627
epoch: 135 training_loss 0.0979821359552443 test_loss: 0.11058660745620727
epoch: 136 training_loss 0.09563855031505226 test_loss: 0.1123547911643982
epoch: 137 training_loss 0.09229415148496628 test_loss: 0.11128059625625611
epoch: 138 training_loss 0.09780280396342278 test_loss: 0.13875104188919068
epoch: 139 training_loss 0.09485426414757966 test_loss: 0.11511063575744629
epoch: 140 training_loss 0.09638248682022095 test_loss: 0.10400913953781128
epoch: 141 training_loss 0.10190047591924667 test_loss: 0.09548645615577697
epoch: 142 training_loss 0.09834477756172419 test_loss: 0.10151829719543456
epoch: 143 training_loss 0.09450094021856785 test_loss: 0.1078067660331726
epoch: 144 training_loss 0.09454068638384343 test_loss: 0.12376775741577148
epoch: 145 training_loss 0.09634187582880259 test_loss: 0.0982793390750885
epoch: 146 training_loss 0.0938037734478712 test_loss: 0.11166690587997437
epoch: 147 training_loss 0.09696518013253808 test_loss: 0.1196130633354187
epoch: 148 training_loss 0.09048023022711277 test_loss: 0.11503064632415771
epoch: 149 training_loss 0.09541168781463057 test_loss: 0.11795008182525635
epoch: 0 training_loss 0.34268915399909017 test_loss: 0.23393378257751465
epoch: 1 training_loss 0.1816109897941351 test_loss: 0.1708090901374817
epoch: 2 training_loss 0.1514231312274933 test_loss: 0.13655030727386475
epoch: 3 training_loss 0.1347673036530614 test_loss: 0.14288455247879028
epoch: 4 training_loss 0.128169578127563 test_loss: 0.13268152475357056
epoch: 5 training_loss 0.12727439131587745 test_loss: 0.13287127017974854
epoch: 6 training_loss 0.12139406949281692 test_loss: 0.14050971269607543
epoch: 7 training_loss 0.12352939866483212 test_loss: 0.134981906414032
epoch: 8 training_loss 0.11668848734349012 test_loss: 0.14334708452224731
epoch: 9 training_loss 0.11951552677899599 test_loss: 0.13934704065322875
epoch: 10 training_loss 0.10894560171291232 test_loss: 0.13336671590805055
epoch: 11 training_loss 0.10662207305431366 test_loss: 0.11859952211380005
epoch: 12 training_loss 0.1133509399369359 test_loss: 0.1304451584815979
epoch: 13 training_loss 0.10625473100692034 test_loss: 0.12937910556793214
epoch: 14 training_loss 0.10861383903771639 test_loss: 0.1316632866859436
epoch: 15 training_loss 0.10903008941560983 test_loss: 0.13337503671646117
epoch: 16 training_loss 0.11668700424954295 test_loss: 0.13872684240341188
epoch: 17 training_loss 0.10205184353515506 test_loss: 0.13720327615737915
epoch: 18 training_loss 0.10865229535847902 test_loss: 0.12445735931396484
epoch: 19 training_loss 0.0995684738084674 test_loss: 0.12494070529937744
epoch: 20 training_loss 0.10275602485984564 test_loss: 0.13905621767044068
epoch: 21 training_loss 0.10616294145584107 test_loss: 0.1272905945777893
epoch: 22 training_loss 0.1090126582980156 test_loss: 0.12469950914382935
epoch: 23 training_loss 0.09983278090134264 test_loss: 0.13184386491775513
epoch: 24 training_loss 0.0965468443185091 test_loss: 0.1306618332862854
epoch: 25 training_loss 0.09644964398816228 test_loss: 0.1149646520614624
epoch: 26 training_loss 0.10012025956064463 test_loss: 0.12976405620574952
epoch: 27 training_loss 0.10147364109754563 test_loss: 0.1336933732032776
epoch: 28 training_loss 0.10466679433360696 test_loss: 0.12436597347259522
epoch: 29 training_loss 0.1119970959238708 test_loss: 0.12406949996948242
epoch: 30 training_loss 0.10906133694574237 test_loss: 0.12560124397277833
epoch: 31 training_loss 0.10166247103363275 test_loss: 0.13298085927963257
epoch: 32 training_loss 0.10485893892124296 test_loss: 0.10227272510528565
epoch: 33 training_loss 0.10983916770666838 test_loss: 0.14066109657287598
epoch: 34 training_loss 0.0945864912495017 test_loss: 0.131693434715271
epoch: 35 training_loss 0.10734438020735979 test_loss: 0.1170569896697998
epoch: 36 training_loss 0.10585320517420768 test_loss: 0.12131661176681519
epoch: 37 training_loss 0.09763001000508666 test_loss: 0.10893125534057617
epoch: 38 training_loss 0.10573335770517588 test_loss: 0.13192532062530518
epoch: 39 training_loss 0.10928555382415653 test_loss: 0.11643390655517578
epoch: 40 training_loss 0.10094199001789093 test_loss: 0.12913904190063477
epoch: 41 training_loss 0.10249012771993876 test_loss: 0.10831968784332276
epoch: 42 training_loss 0.09413161050528288 test_loss: 0.12687764167785645
epoch: 43 training_loss 0.09875599917024375 test_loss: 0.12510137557983397
epoch: 44 training_loss 0.10675451766699552 test_loss: 0.11489183902740478
epoch: 45 training_loss 0.09890413679182529 test_loss: 0.1227375864982605
epoch: 46 training_loss 0.09628361707553268 test_loss: 0.10919915437698365
epoch: 47 training_loss 0.09772256266325713 test_loss: 0.1147391676902771
epoch: 48 training_loss 0.10347739316523075 test_loss: 0.10074422359466553
epoch: 49 training_loss 0.09660591816529632 test_loss: 0.10075267553329467
epoch: 50 training_loss 0.09619569174945354 test_loss: 0.13088847398757936
epoch: 51 training_loss 0.09203016545623541 test_loss: 0.10933541059494019
epoch: 52 training_loss 0.09487585194408893 test_loss: 0.11861920356750488
epoch: 53 training_loss 0.09751286011189222 test_loss: 0.12701867818832396
epoch: 54 training_loss 0.10015998005867005 test_loss: 0.12886626720428468
epoch: 55 training_loss 0.09797790732234717 test_loss: 0.10511538982391358
epoch: 56 training_loss 0.09800802560523153 test_loss: 0.1358274221420288
epoch: 57 training_loss 0.10276906611397862 test_loss: 0.11932264566421509
epoch: 58 training_loss 0.1003567230515182 test_loss: 0.10889197587966919
epoch: 59 training_loss 0.09678911780938507 test_loss: 0.11582703590393066
epoch: 60 training_loss 0.09697703907266259 test_loss: 0.11078623533248902
epoch: 61 training_loss 0.09835506247356535 test_loss: 0.12714757919311523
epoch: 62 training_loss 0.10272519530728459 test_loss: 0.11294548511505127
epoch: 63 training_loss 0.0978679483756423 test_loss: 0.13545624017715455
epoch: 64 training_loss 0.09707780107855797 test_loss: 0.13972309827804566
epoch: 65 training_loss 0.10032582309097052 test_loss: 0.1364848017692566
epoch: 66 training_loss 0.0926780368387699 test_loss: 0.11954373121261597
epoch: 67 training_loss 0.09738042298704386 test_loss: 0.11654685735702515
epoch: 68 training_loss 0.10599358595907687 test_loss: 0.1344359278678894
epoch: 69 training_loss 0.10341776071116328 test_loss: 0.13099924325942994
epoch: 70 training_loss 0.10015071518719196 test_loss: 0.12540030479431152
epoch: 71 training_loss 0.10335180750116706 test_loss: 0.12246586084365844
epoch: 72 training_loss 0.09368961673229932 test_loss: 0.11741973161697387
epoch: 73 training_loss 0.09885459695011377 test_loss: 0.1250307559967041
epoch: 74 training_loss 0.09677859729155898 test_loss: 0.12251697778701783
epoch: 75 training_loss 0.09662584189325571 test_loss: 0.11187291145324707
epoch: 76 training_loss 0.09652493068948388 test_loss: 0.12502126693725585
epoch: 77 training_loss 0.10408920880407095 test_loss: 0.11093032360076904
epoch: 78 training_loss 0.09859888970851899 test_loss: 0.11631357669830322
epoch: 79 training_loss 0.09793921489268541 test_loss: 0.1273511290550232
epoch: 80 training_loss 0.0958977040834725 test_loss: 0.1146570086479187
epoch: 81 training_loss 0.09493850337341428 test_loss: 0.10888811349868774
epoch: 82 training_loss 0.08748647274449467 test_loss: 0.108588707447052
epoch: 83 training_loss 0.08810065200552344 test_loss: 0.11767324209213256
epoch: 84 training_loss 0.09810727102681994 test_loss: 0.12852555513381958
epoch: 85 training_loss 0.09575825452804565 test_loss: 0.11562619209289551
epoch: 86 training_loss 0.09599531587213278 test_loss: 0.1051068902015686
epoch: 87 training_loss 0.09683128884062171 test_loss: 0.11956274509429932
epoch: 88 training_loss 0.09941640764474868 test_loss: 0.13692615032196045
epoch: 89 training_loss 0.09324270345270634 test_loss: 0.10801858901977539
epoch: 90 training_loss 0.09335382547229529 test_loss: 0.12478699684143066
epoch: 91 training_loss 0.09023044783622026 test_loss: 0.14321883916854858
epoch: 92 training_loss 0.09068530531600118 test_loss: 0.12701467275619507
epoch: 93 training_loss 0.09502167161554098 test_loss: 0.1046958327293396
epoch: 94 training_loss 0.08862035922706127 test_loss: 0.13037186861038208
epoch: 95 training_loss 0.09598587524145842 test_loss: 0.11444867849349975
epoch: 96 training_loss 0.09011293722316623 test_loss: 0.1205257534980774
epoch: 97 training_loss 0.09254174780100584 test_loss: 0.12430477142333984
epoch: 98 training_loss 0.09126677505671978 test_loss: 0.11896680593490601
epoch: 99 training_loss 0.08953266648575664 test_loss: 0.12376039028167725
epoch: 100 training_loss 0.09472699522972107 test_loss: 0.12780396938323973
epoch: 101 training_loss 0.09741096217185259 test_loss: 0.12583482265472412
epoch: 102 training_loss 0.09659584993496537 test_loss: 0.11541212797164917
epoch: 103 training_loss 0.09618406590074301 test_loss: 0.11678485870361328
epoch: 104 training_loss 0.09275589058175683 test_loss: 0.12220709323883057
epoch: 105 training_loss 0.0967501969449222 test_loss: 0.13634244203567505
epoch: 106 training_loss 0.10101938616484403 test_loss: 0.12764482498168944
epoch: 107 training_loss 0.09484585396945476 test_loss: 0.12714954614639282
epoch: 108 training_loss 0.09136732580140233 test_loss: 0.11780441999435425
epoch: 109 training_loss 0.09567539153620601 test_loss: 0.1135101556777954
epoch: 110 training_loss 0.09582811672240496 test_loss: 0.11871567964553834
epoch: 111 training_loss 0.09218902042135596 test_loss: 0.11979193687438965
epoch: 112 training_loss 0.09022724019363522 test_loss: 0.13410199880599977
epoch: 113 training_loss 0.08993637535721064 test_loss: 0.13825002908706666
epoch: 114 training_loss 0.09871359473094345 test_loss: 0.13133199214935304
epoch: 115 training_loss 0.0888973193615675 test_loss: 0.13061288595199586
epoch: 116 training_loss 0.09421662364155053 test_loss: 0.12524971961975098
epoch: 117 training_loss 0.09515710826963186 test_loss: 0.12501924037933348
epoch: 118 training_loss 0.0872816620208323 test_loss: 0.12086344957351684
epoch: 119 training_loss 0.09238753590732812 test_loss: 0.1376513123512268
epoch: 120 training_loss 0.09418675933033228 test_loss: 0.14122159481048585
epoch: 121 training_loss 0.09310906378552318 test_loss: 0.12339941263198853
epoch: 122 training_loss 0.08982424132525921 test_loss: 0.11500861644744872
epoch: 123 training_loss 0.09511668724939228 test_loss: 0.11989848613739014
epoch: 124 training_loss 0.0933767162449658 test_loss: 0.13505603075027467
epoch: 125 training_loss 0.09199589913710952 test_loss: 0.11829087734222413
epoch: 126 training_loss 0.09096685960888863 test_loss: 0.13618532419204712
epoch: 127 training_loss 0.09359230753034353 test_loss: 0.11823885440826416
epoch: 128 training_loss 0.0933460345864296 test_loss: 0.11707491874694824
epoch: 129 training_loss 0.09116922667250037 test_loss: 0.10001311302185059
epoch: 130 training_loss 0.09223927000537514 test_loss: 0.11488666534423828
epoch: 131 training_loss 0.08797808017581701 test_loss: 0.10680959224700928
epoch: 132 training_loss 0.0859196224436164 test_loss: 0.14158306121826172
epoch: 133 training_loss 0.09474896896630526 test_loss: 0.12284501791000366
epoch: 134 training_loss 0.08411130426451564 test_loss: 0.1351436972618103
epoch: 135 training_loss 0.09027501422911882 test_loss: 0.13261666297912597
epoch: 136 training_loss 0.0888446432724595 test_loss: 0.10886862277984619
epoch: 137 training_loss 0.08923886194825173 test_loss: 0.13429938554763793
epoch: 138 training_loss 0.09060055552050471 test_loss: 0.14716055393218994
epoch: 139 training_loss 0.08563266176730394 test_loss: 0.11687700748443604
epoch: 140 training_loss 0.09031980220228433 test_loss: 0.11492010354995727
epoch: 141 training_loss 0.09219863079488277 test_loss: 0.12870266437530517
epoch: 142 training_loss 0.08552376966923475 test_loss: 0.12431286573410034
epoch: 143 training_loss 0.08475300939753652 test_loss: 0.14459555149078368
epoch: 144 training_loss 0.09306832259520888 test_loss: 0.10815545320510864
epoch: 145 training_loss 0.08336478393524885 test_loss: 0.11178557872772217
epoch: 146 training_loss 0.08834268381819128 test_loss: 0.10205644369125366
epoch: 147 training_loss 0.08925839133560658 test_loss: 0.11144390106201171
epoch: 148 training_loss 0.08267350370064377 test_loss: 0.13300455808639527
epoch: 149 training_loss 0.08455395275726914 test_loss: 0.0896386206150055
epoch: 0 training_loss 0.3025954642891884 test_loss: 0.20331196784973143
epoch: 1 training_loss 0.16978298775851727 test_loss: 0.16671521663665773
epoch: 2 training_loss 0.14764829508960248 test_loss: 0.14299458265304565
epoch: 3 training_loss 0.12997905395925044 test_loss: 0.15689439773559571
epoch: 4 training_loss 0.12588862232863904 test_loss: 0.11991683244705201
epoch: 5 training_loss 0.12362210821360349 test_loss: 0.11180952787399293
epoch: 6 training_loss 0.13221658397465944 test_loss: 0.13887253999710084
epoch: 7 training_loss 0.11078146070241929 test_loss: 0.13362481594085693
epoch: 8 training_loss 0.12805186171084643 test_loss: 0.13502954244613646
epoch: 9 training_loss 0.12275801455602049 test_loss: 0.12019280195236207
epoch: 10 training_loss 0.11044208280742168 test_loss: 0.13197859525680541
epoch: 11 training_loss 0.1109599893912673 test_loss: 0.13632084131240846
epoch: 12 training_loss 0.11265274003148079 test_loss: 0.12474098205566406
epoch: 13 training_loss 0.11174245806410908 test_loss: 0.11218854188919067
epoch: 14 training_loss 0.11040500720962881 test_loss: 0.10272736549377441
epoch: 15 training_loss 0.09938313672319055 test_loss: 0.12707232236862182
epoch: 16 training_loss 0.1085179553925991 test_loss: 0.11779341697692872
epoch: 17 training_loss 0.10463893733918667 test_loss: 0.11668001413345337
epoch: 18 training_loss 0.1030357589200139 test_loss: 0.11967787742614747
epoch: 19 training_loss 0.10223333965986967 test_loss: 0.11809130907058715
epoch: 20 training_loss 0.10457210119813681 test_loss: 0.1363736629486084
epoch: 21 training_loss 0.11924138922244311 test_loss: 0.12476979494094849
epoch: 22 training_loss 0.10339034209027886 test_loss: 0.11504806280136108
epoch: 23 training_loss 0.11186764553189278 test_loss: 0.1245656967163086
epoch: 24 training_loss 0.10758286252617837 test_loss: 0.11742721796035767
epoch: 25 training_loss 0.10551811806857586 test_loss: 0.1329113721847534
epoch: 26 training_loss 0.10070551631972194 test_loss: 0.11361637115478515
epoch: 27 training_loss 0.10654949782416225 test_loss: 0.12327126264572144
epoch: 28 training_loss 0.10518832063302398 test_loss: 0.11890453100204468
epoch: 29 training_loss 0.09903642299585044 test_loss: 0.11540782451629639
epoch: 30 training_loss 0.113768550157547 test_loss: 0.1157801866531372
epoch: 31 training_loss 0.11333810199052095 test_loss: 0.1148525357246399
epoch: 32 training_loss 0.10207354042679072 test_loss: 0.11100281476974487
epoch: 33 training_loss 0.10804488968104124 test_loss: 0.09976459741592407
epoch: 34 training_loss 0.10163942459970712 test_loss: 0.12062216997146606
epoch: 35 training_loss 0.09845035530626774 test_loss: 0.13527970314025878
epoch: 36 training_loss 0.10713006185367703 test_loss: 0.11971812248229981
epoch: 37 training_loss 0.10209997717291117 test_loss: 0.10827703475952148
epoch: 38 training_loss 0.10920829072594643 test_loss: 0.11107702255249023
epoch: 39 training_loss 0.10246092177927495 test_loss: 0.10799700021743774
epoch: 40 training_loss 0.10325288600288332 test_loss: 0.11879382133483887
epoch: 41 training_loss 0.11062251426279544 test_loss: 0.11022434234619141
epoch: 42 training_loss 0.10306350287050009 test_loss: 0.09996094107627869
epoch: 43 training_loss 0.10971100311726331 test_loss: 0.11328350305557251
epoch: 44 training_loss 0.09577974654734135 test_loss: 0.11663265228271484
epoch: 45 training_loss 0.10180799530819058 test_loss: 0.1373318314552307
epoch: 46 training_loss 0.09787619575858116 test_loss: 0.12582656145095825
epoch: 47 training_loss 0.10485123101621867 test_loss: 0.09301797747611999
epoch: 48 training_loss 0.0968705753982067 test_loss: 0.09994914531707763
epoch: 49 training_loss 0.10426134599372744 test_loss: 0.13992499113082885
epoch: 50 training_loss 0.10121218917891384 test_loss: 0.10898029804229736
epoch: 51 training_loss 0.09965057166293263 test_loss: 0.10073229074478149
epoch: 52 training_loss 0.0986154954135418 test_loss: 0.10523977279663085
epoch: 53 training_loss 0.10165464516729117 test_loss: 0.1273086428642273
epoch: 54 training_loss 0.09768115440383554 test_loss: 0.10515575408935547
epoch: 55 training_loss 0.10115720696747303 test_loss: 0.10124324560165406
epoch: 56 training_loss 0.10230957275256515 test_loss: 0.11623694896697997
epoch: 57 training_loss 0.10320325199514628 test_loss: 0.10970238447189332
epoch: 58 training_loss 0.0985689340159297 test_loss: 0.1125463604927063
epoch: 59 training_loss 0.09987420443445444 test_loss: 0.13899288177490235
epoch: 60 training_loss 0.1015115095116198 test_loss: 0.11547549962997436
epoch: 61 training_loss 0.0967129959538579 test_loss: 0.11161096096038818
epoch: 62 training_loss 0.10592835210263729 test_loss: 0.10321234464645386
epoch: 63 training_loss 0.10225396318361163 test_loss: 0.11515780687332153
epoch: 64 training_loss 0.10141940403729677 test_loss: 0.12295485734939575
epoch: 65 training_loss 0.10840630698949098 test_loss: 0.109649395942688
epoch: 66 training_loss 0.10104707019403576 test_loss: 0.11549866199493408
epoch: 67 training_loss 0.10203777715563773 test_loss: 0.11490002870559693
epoch: 68 training_loss 0.09797149764373898 test_loss: 0.1066817045211792
epoch: 69 training_loss 0.10028752259910106 test_loss: 0.11961692571640015
epoch: 70 training_loss 0.09895888634026051 test_loss: 0.11903681755065917
epoch: 71 training_loss 0.09872837189584971 test_loss: 0.10539447069168091
epoch: 72 training_loss 0.10081116346642376 test_loss: 0.11104264259338378
epoch: 73 training_loss 0.10773233275860548 test_loss: 0.10232747793197632
epoch: 74 training_loss 0.10676871061325073 test_loss: 0.10777332782745361
epoch: 75 training_loss 0.1004267866909504 test_loss: 0.1229812502861023
epoch: 76 training_loss 0.09448988858610391 test_loss: 0.11194748878479004
epoch: 77 training_loss 0.09665777631103993 test_loss: 0.11700276136398316
epoch: 78 training_loss 0.09993715774267913 test_loss: 0.11221128702163696
epoch: 79 training_loss 0.10021683126688004 test_loss: 0.11110897064208984
epoch: 80 training_loss 0.09963699098676443 test_loss: 0.11041020154953003
epoch: 81 training_loss 0.10223283547908067 test_loss: 0.11202769279479981
epoch: 82 training_loss 0.09957012372091413 test_loss: 0.10125117301940918
epoch: 83 training_loss 0.10077629573643207 test_loss: 0.1207961082458496
epoch: 84 training_loss 0.10426430527120828 test_loss: 0.12425316572189331
epoch: 85 training_loss 0.09618018789216876 test_loss: 0.11428662538528442
epoch: 86 training_loss 0.10111479457467794 test_loss: 0.11382763385772705
epoch: 87 training_loss 0.1049582950025797 test_loss: 0.09125927686691285
epoch: 88 training_loss 0.0962158914655447 test_loss: 0.10740394592285156
epoch: 89 training_loss 0.09912776621058583 test_loss: 0.10302585363388062
epoch: 90 training_loss 0.09635560531169177 test_loss: 0.11587834358215332
epoch: 91 training_loss 0.09739465311169625 test_loss: 0.11666595935821533
epoch: 92 training_loss 0.10010959206148982 test_loss: 0.12855974435806275
epoch: 93 training_loss 0.09714510077610612 test_loss: 0.11491312980651855
epoch: 94 training_loss 0.09794091586023569 test_loss: 0.10918905735015869
epoch: 95 training_loss 0.09708971658721566 test_loss: 0.11422487497329711
epoch: 96 training_loss 0.09718445057049394 test_loss: 0.12055801153182984
epoch: 97 training_loss 0.10586602523922921 test_loss: 0.09544100165367127
epoch: 98 training_loss 0.09184762999415398 test_loss: 0.11742540597915649
epoch: 99 training_loss 0.09426557272672653 test_loss: 0.10629382133483886
epoch: 100 training_loss 0.09094537556171417 test_loss: 0.10770647525787354
epoch: 101 training_loss 0.09998613487929106 test_loss: 0.09954835176467895
epoch: 102 training_loss 0.10134688939899206 test_loss: 0.11496306657791137
epoch: 103 training_loss 0.09979439999908209 test_loss: 0.12671432495117188
epoch: 104 training_loss 0.09202381195500493 test_loss: 0.1270210862159729
epoch: 105 training_loss 0.09019527560099959 test_loss: 0.11556495428085327
epoch: 106 training_loss 0.09116561520844697 test_loss: 0.12077177762985229
epoch: 107 training_loss 0.09569907877594233 test_loss: 0.12298709154129028
epoch: 108 training_loss 0.09485369723290205 test_loss: 0.13303385972976683
epoch: 109 training_loss 0.09799650177359581 test_loss: 0.12155778408050537
epoch: 110 training_loss 0.10039713121950626 test_loss: 0.11082997322082519
epoch: 111 training_loss 0.09267914356663823 test_loss: 0.11585334539413453
epoch: 112 training_loss 0.09329473592340946 test_loss: 0.10876020193099975
epoch: 113 training_loss 0.08882566928863525 test_loss: 0.1327236533164978
epoch: 114 training_loss 0.0923736041225493 test_loss: 0.12516580820083617
epoch: 115 training_loss 0.09498545767739415 test_loss: 0.11793878078460693
epoch: 116 training_loss 0.08642367351800204 test_loss: 0.10725724697113037
epoch: 117 training_loss 0.09377808032557368 test_loss: 0.11067266464233398
epoch: 118 training_loss 0.09276666115969419 test_loss: 0.11240172386169434
epoch: 119 training_loss 0.09477527752518654 test_loss: 0.12038781642913818
epoch: 120 training_loss 0.09076517110690474 test_loss: 0.09898174405097962
epoch: 121 training_loss 0.09172893712297082 test_loss: 0.11811144351959228
epoch: 122 training_loss 0.0969092326797545 test_loss: 0.12736716270446777
epoch: 123 training_loss 0.0914164124801755 test_loss: 0.12365821599960328
epoch: 124 training_loss 0.09291992401704192 test_loss: 0.09933262467384338
epoch: 125 training_loss 0.10142993476241827 test_loss: 0.10878627300262451
epoch: 126 training_loss 0.09315764663740993 test_loss: 0.10872728824615478
epoch: 127 training_loss 0.09065918439999222 test_loss: 0.12777286767959595
epoch: 128 training_loss 0.09425927635282277 test_loss: 0.10746482610702515
epoch: 129 training_loss 0.08226087737828493 test_loss: 0.12047566175460815
epoch: 130 training_loss 0.0958153660967946 test_loss: 0.12410683631896972
epoch: 131 training_loss 0.08569122093729675 test_loss: 0.12478461265563964
epoch: 132 training_loss 0.09072352057322859 test_loss: 0.1185100793838501
epoch: 133 training_loss 0.08963464964181185 test_loss: 0.11776458024978638
epoch: 134 training_loss 0.0939290409348905 test_loss: 0.12617384195327758
epoch: 135 training_loss 0.09082609534263611 test_loss: 0.1051485300064087
epoch: 136 training_loss 0.09600532377138733 test_loss: 0.1275923728942871
epoch: 137 training_loss 0.09229966811835766 test_loss: 0.09662861227989197
epoch: 138 training_loss 0.08938523080199957 test_loss: 0.12639569044113158
epoch: 139 training_loss 0.09666819486767053 test_loss: 0.11736010313034058
epoch: 140 training_loss 0.0950695268623531 test_loss: 0.11848270893096924
epoch: 141 training_loss 0.09885219600051641 test_loss: 0.11677355766296386
epoch: 142 training_loss 0.08933607246726752 test_loss: 0.11466307640075683
epoch: 143 training_loss 0.09499639358371496 test_loss: 0.12068380117416382
epoch: 144 training_loss 0.09088203309103847 test_loss: 0.1337818384170532
epoch: 145 training_loss 0.09433395531028509 test_loss: 0.12359119653701782
epoch: 146 training_loss 0.09762521266937256 test_loss: 0.11087884902954101
epoch: 147 training_loss 0.0996690303273499 test_loss: 0.11486958265304566
epoch: 148 training_loss 0.0973377001285553 test_loss: 0.11510049104690552
epoch: 149 training_loss 0.10047493785619736 test_loss: 0.13206675052642822
episode: 0 training return: -999.9733272180531
episode: 1 training return: -999.9713721555286
episode: 2 training return: -999.9790735759416
episode: 3 training return: -999.9800005601853
epoch: 1 test_true_pfm: 0.3995517177852892 sim_pfm: -999.9485354162715
episode: 4 training return: -999.9596689471176
episode: 5 training return: -999.9758996178996
episode: 6 training return: -999.9608151107018
episode: 7 training return: -999.9617867199607
epoch: 2 test_true_pfm: -0.7854950477941912 sim_pfm: -999.94776823746
episode: 8 training return: -999.9675563650022
episode: 9 training return: -999.9681346964867
episode: 10 training return: -999.9693125219445
episode: 11 training return: -999.9794755794826
epoch: 3 test_true_pfm: 0.6246116758994701 sim_pfm: -999.9475626739613
episode: 12 training return: -999.9767419847492
episode: 13 training return: -999.9632228257551
episode: 14 training return: -999.9586490116144
episode: 15 training return: -999.9664978502633
epoch: 4 test_true_pfm: -0.0541778386614582 sim_pfm: -999.9478316643936
episode: 16 training return: -999.953798927842
episode: 17 training return: -999.9691133787378
episode: 18 training return: -999.9704730850092
episode: 19 training return: -999.9687656296394
epoch: 5 test_true_pfm: -0.12338582663922713 sim_pfm: -999.9472478771295
episode: 20 training return: -999.9819927896318
episode: 21 training return: -999.9735109993421
episode: 22 training return: -999.9685866536321
episode: 23 training return: -999.9792424185117
epoch: 6 test_true_pfm: -0.6789959157397227 sim_pfm: -999.9472436278505
episode: 24 training return: -999.9692875756805
episode: 25 training return: -999.975275269573
episode: 26 training return: -999.9737620625231
episode: 27 training return: -999.9716923182868
epoch: 7 test_true_pfm: -0.11590990619446928 sim_pfm: -999.947724522974
episode: 28 training return: -999.971049354393
episode: 29 training return: -999.9713849754883
episode: 30 training return: -999.9617265869466
episode: 31 training return: -999.9679350533513
epoch: 8 test_true_pfm: -0.7560455231006656 sim_pfm: -999.9479162330944
episode: 32 training return: -999.9681822644161
episode: 33 training return: -999.9543400184052
episode: 34 training return: -999.970067683577
episode: 35 training return: -999.970958621805
epoch: 9 test_true_pfm: -0.319621152327858 sim_pfm: -999.9473397956987
episode: 36 training return: -999.9732045650926
episode: 37 training return: -999.9741450366662
episode: 38 training return: -999.9643244145478
episode: 39 training return: -999.968277430082
epoch: 10 test_true_pfm: -0.10880464975632463 sim_pfm: -999.9478965667267
episode: 40 training return: -999.9687266449985
episode: 41 training return: -999.9746482816936
episode: 42 training return: -999.9651472215817
episode: 43 training return: -999.9738906540914
epoch: 11 test_true_pfm: -0.721839653825615 sim_pfm: -999.947437037595
episode: 44 training return: -999.9780437478278
episode: 45 training return: -999.9763296520292
episode: 46 training return: -999.9726184361175
episode: 47 training return: -999.9709178016429
epoch: 12 test_true_pfm: -0.33000728251864725 sim_pfm: -999.9476982569113
episode: 48 training return: -999.9713943996975
episode: 49 training return: -999.9819547128785
episode: 50 training return: -999.968743955594
episode: 51 training return: -999.9729312217693
epoch: 13 test_true_pfm: -0.4207558173750238 sim_pfm: -999.947835093555
episode: 52 training return: -999.9624626689822
episode: 53 training return: -999.9588083037429
episode: 54 training return: -999.9762612168023
episode: 55 training return: -999.9672948838464
epoch: 14 test_true_pfm: -0.3447136586137743 sim_pfm: -999.9472687923277
episode: 56 training return: -999.9739763231032
episode: 57 training return: -999.9618440972984
episode: 58 training return: -999.9786581265698
episode: 59 training return: -999.9676653323729
epoch: 15 test_true_pfm: -0.5653667526288803 sim_pfm: -999.9478117257769
episode: 60 training return: -999.9678026588455
episode: 61 training return: -999.9733218622536
episode: 62 training return: -999.9813556304501
episode: 63 training return: -999.9690019648497
epoch: 16 test_true_pfm: -0.25540117558679115 sim_pfm: -999.9480274807728
episode: 64 training return: -999.9747999950054
episode: 65 training return: -999.9733928355438
episode: 66 training return: -1000.7496087279188
episode: 67 training return: -999.9649479389577
epoch: 17 test_true_pfm: -0.5824588531428669 sim_pfm: -999.9482708165357
episode: 68 training return: -999.9713375769456
episode: 69 training return: -999.9827281435192
episode: 70 training return: -999.9718628638832
episode: 71 training return: -999.9744907223615
epoch: 18 test_true_pfm: -0.5647979862670701 sim_pfm: -999.9476063507926
episode: 72 training return: -999.9708444042012
episode: 73 training return: -999.9696344879457
episode: 74 training return: -999.9758560465641
episode: 75 training return: -999.9674930567558
epoch: 19 test_true_pfm: -0.14993019120929477 sim_pfm: -999.9478086566231
episode: 76 training return: -999.9631526404722
episode: 77 training return: -999.977948904494
episode: 78 training return: -999.9748785304146
episode: 79 training return: -999.9721373771591
epoch: 20 test_true_pfm: -0.3305549465747183 sim_pfm: -999.9478234061813
episode: 80 training return: -999.9742255641388
episode: 81 training return: -999.9683174961749
episode: 82 training return: -999.9687240243363
episode: 83 training return: -999.973734561585
epoch: 21 test_true_pfm: -0.3663003127666998 sim_pfm: -999.9476886413322
episode: 84 training return: -999.9261328778708
episode: 85 training return: -999.9704258069477
episode: 86 training return: -999.9700320237603
episode: 87 training return: -999.9767251311034
epoch: 22 test_true_pfm: -0.4725122840462557 sim_pfm: -999.9480630628065
episode: 88 training return: -999.964209021012
episode: 89 training return: -999.9618686665158
episode: 90 training return: -999.9721127817712
episode: 91 training return: -999.9719162880904
epoch: 23 test_true_pfm: -0.07688923749469305 sim_pfm: -999.9477563433848
episode: 92 training return: -999.9756385391457
episode: 93 training return: -999.9733575200995
episode: 94 training return: -999.971559299911
episode: 95 training return: -999.974793600282
epoch: 24 test_true_pfm: 0.1516378228868477 sim_pfm: -999.9480499930329
episode: 96 training return: -999.9713353670427
episode: 97 training return: -999.971013857711
episode: 98 training return: -999.9589440625828
episode: 99 training return: -999.9765216666075
epoch: 25 test_true_pfm: -0.7722955067595404 sim_pfm: -999.94778606416
episode: 100 training return: -999.9627728678737
episode: 101 training return: -999.9723606319576
episode: 102 training return: -999.9687946411863
episode: 103 training return: -999.9671581818333
epoch: 26 test_true_pfm: -0.16225889586464662 sim_pfm: -999.9478535096792
episode: 104 training return: -999.9722424428713
episode: 105 training return: -999.9692676849292
episode: 106 training return: -999.964766771353
episode: 107 training return: -999.9574510124756
epoch: 27 test_true_pfm: -0.39337001111980524 sim_pfm: -999.9472030776369
episode: 108 training return: -999.9691413631399
episode: 109 training return: -999.964735475159
episode: 110 training return: -999.9771233471326
episode: 111 training return: -999.9707364121937
epoch: 28 test_true_pfm: 0.047518817208846244 sim_pfm: -999.9480471225417
episode: 112 training return: -999.9751522763921
episode: 113 training return: -999.9813721697624
episode: 114 training return: -999.9800743398038
episode: 115 training return: -999.9729709271539
epoch: 29 test_true_pfm: -0.09832020931220575 sim_pfm: -999.9480942247322
episode: 116 training return: -999.9539698467927
episode: 117 training return: -999.9686800397806
episode: 118 training return: -999.9739134674948
episode: 119 training return: -999.9744645944293
epoch: 30 test_true_pfm: -0.16465576384783764 sim_pfm: -999.9479068407735
episode: 120 training return: -999.9726440245282
episode: 121 training return: -999.9032989252439
episode: 122 training return: -999.9680906727598
episode: 123 training return: -999.9711848072518
epoch: 31 test_true_pfm: -1.0062057767952073 sim_pfm: -999.9476681669762
episode: 124 training return: -999.9585617745731
episode: 125 training return: -999.9671273391458
episode: 126 training return: -999.9749763650416
episode: 127 training return: -999.9731409937681
epoch: 32 test_true_pfm: -0.6136207552131502 sim_pfm: -999.947752710796
episode: 128 training return: -999.96404750014
episode: 129 training return: -999.9853782539852
episode: 130 training return: -999.9623522279792
episode: 131 training return: -999.9738768745832
epoch: 33 test_true_pfm: -0.3787267165394373 sim_pfm: -999.9481109219054
episode: 132 training return: -999.9740946998816
episode: 133 training return: -999.9698606934331
episode: 134 training return: -1002.7271093082978
episode: 135 training return: -999.9666349850061
epoch: 34 test_true_pfm: -0.13235408081723266 sim_pfm: -999.9472964736196
episode: 136 training return: -999.9734058318962
episode: 137 training return: -999.975927809963
episode: 138 training return: -999.9621774544462
episode: 139 training return: -999.9765383004669
epoch: 35 test_true_pfm: 0.3591333750295504 sim_pfm: -999.9481606450831
episode: 140 training return: -999.9686075806381
episode: 141 training return: -999.9728240773737
episode: 142 training return: -999.9549417686271
episode: 143 training return: -999.9670254861225
epoch: 36 test_true_pfm: -0.20639248400079815 sim_pfm: -999.9468592069501
episode: 144 training return: -999.973852870962
episode: 145 training return: -999.9520131584924
episode: 146 training return: -999.9707039848555
episode: 147 training return: -999.9689338818895
epoch: 37 test_true_pfm: -0.6006600044338873 sim_pfm: -999.9468132367518
episode: 148 training return: -999.9738469544129
episode: 149 training return: -999.9750411774991
episode: 150 training return: -999.97042730973
episode: 151 training return: -999.9717305417989
epoch: 38 test_true_pfm: -0.13672460001340198 sim_pfm: -999.9478670575867
episode: 152 training return: -999.9669901705408
episode: 153 training return: -999.9639579070582
episode: 154 training return: -999.9700395367832
episode: 155 training return: -999.9811683855874
epoch: 39 test_true_pfm: -0.03382243798708722 sim_pfm: -999.9475143650315
episode: 156 training return: -999.9791269187698
episode: 157 training return: -999.972404468006
episode: 158 training return: -999.9678451538229
episode: 159 training return: -999.9737113278225
epoch: 40 test_true_pfm: -0.6280173179906019 sim_pfm: -999.9478670381028
episode: 160 training return: -999.9460544845231
episode: 161 training return: -999.9669776792756
episode: 162 training return: -999.9658636949721
episode: 163 training return: -999.9666987335175
epoch: 41 test_true_pfm: -0.12793960284599806 sim_pfm: -999.9478622693665
episode: 164 training return: -999.9615040865333
episode: 165 training return: -999.9718618688752
episode: 166 training return: -999.9649940840758
episode: 167 training return: -999.9711767933313
epoch: 42 test_true_pfm: -0.569855492700564 sim_pfm: -999.9476426422107
episode: 168 training return: -999.97014267439
episode: 169 training return: -999.9733507441948
episode: 170 training return: -999.9729098833457
episode: 171 training return: -999.9708229457093
epoch: 43 test_true_pfm: -0.36494377521420424 sim_pfm: -999.9478694695648
episode: 172 training return: -999.9715113330642
episode: 173 training return: -999.9613590800933
episode: 174 training return: -999.9602745394097
episode: 175 training return: -999.9515562798654
epoch: 44 test_true_pfm: -0.5277229248889542 sim_pfm: -999.9475731939091
episode: 176 training return: -999.9711024525523
episode: 177 training return: -999.9541156920211
episode: 178 training return: -999.9757031844708
episode: 179 training return: -999.9705081057562
epoch: 45 test_true_pfm: -0.8714618725475148 sim_pfm: -999.9468008421851
episode: 180 training return: -999.9821979502926
episode: 181 training return: -999.9642228867579
episode: 182 training return: -999.9734459708279
episode: 183 training return: -999.9780243566976
epoch: 46 test_true_pfm: -0.3155986542107332 sim_pfm: -999.9472690820227
episode: 184 training return: -999.9658565048359
episode: 185 training return: -999.9718582661259
episode: 186 training return: -999.960014047422
episode: 187 training return: -999.966998426087
epoch: 47 test_true_pfm: 0.2765564973787162 sim_pfm: -999.9473083899708
episode: 188 training return: -999.9757330146526
episode: 189 training return: -999.9713973735849
episode: 190 training return: -999.9706367532297
episode: 191 training return: -999.9680808066005
epoch: 48 test_true_pfm: -0.432381641872583 sim_pfm: -999.9474703441965
episode: 192 training return: -999.9753772923293
episode: 193 training return: -999.976032622223
episode: 194 training return: -999.9686871217561
episode: 195 training return: -999.961153018601
epoch: 49 test_true_pfm: -0.5270603500332635 sim_pfm: -999.9476835721686
episode: 196 training return: -999.967847366712
episode: 197 training return: -999.9664187712907
episode: 198 training return: -999.9698093624172
episode: 199 training return: -999.9676795269919
epoch: 50 test_true_pfm: -0.2725935806687339 sim_pfm: -999.9477357792217
episode: 200 training return: -999.9798677934796
episode: 201 training return: -999.9791034857807
episode: 202 training return: -999.9738210530113
episode: 203 training return: -999.9731980489603
epoch: 51 test_true_pfm: -0.2910289705374786 sim_pfm: -999.9480988909344
episode: 204 training return: -999.9684447250364
episode: 205 training return: -999.9753365131007
episode: 206 training return: -999.9582704174808
episode: 207 training return: -999.9718931408561
epoch: 52 test_true_pfm: -0.3891372026286093 sim_pfm: -999.9476978542226
episode: 208 training return: -999.9709233444478
episode: 209 training return: -999.9708876798735
episode: 210 training return: -999.981346608007
episode: 211 training return: -999.9639333081714
epoch: 53 test_true_pfm: -0.14503017016649378 sim_pfm: -999.9473419846394
episode: 212 training return: -999.9735675956069
episode: 213 training return: -999.9750064590779
episode: 214 training return: -999.9790646855729
episode: 215 training return: -999.9754855125316
epoch: 54 test_true_pfm: -0.13490161005263257 sim_pfm: -999.9473877201108
episode: 216 training return: -999.9540130113402
episode: 217 training return: -999.9719174893621
episode: 218 training return: -999.9717967622068
episode: 219 training return: -999.9697896001072
epoch: 55 test_true_pfm: -0.557004897361299 sim_pfm: -999.9478236668377
episode: 220 training return: -999.9571605848797
episode: 221 training return: -999.9686435160954
episode: 222 training return: -999.9735949670657
episode: 223 training return: -999.9720768840004
epoch: 56 test_true_pfm: -1.0290155401837737 sim_pfm: -999.9476306747982
episode: 224 training return: -999.971595064317
episode: 225 training return: -999.9721016789243
episode: 226 training return: -999.9789864062716
episode: 227 training return: -999.9756686569499
epoch: 57 test_true_pfm: -0.41654365302753177 sim_pfm: -999.9476449768063
episode: 228 training return: -999.9646656294085
episode: 229 training return: -999.9753947383217
episode: 230 training return: -999.9552831041742
episode: 231 training return: -999.9675124136264
epoch: 58 test_true_pfm: 0.060207198954874086 sim_pfm: -999.9477212398712
episode: 232 training return: -999.9659785368514
episode: 233 training return: -999.9718201212135
episode: 234 training return: -999.9826806574132
episode: 235 training return: -999.9689923086111
epoch: 59 test_true_pfm: -0.0258889197939068 sim_pfm: -999.946840214855
episode: 236 training return: -999.976171323888
episode: 237 training return: -999.9787780206215
episode: 238 training return: -999.9718630266482
episode: 239 training return: -999.9839269947785
epoch: 60 test_true_pfm: 0.4877943817498422 sim_pfm: -999.947505997081
episode: 240 training return: -999.9609315661124
episode: 241 training return: -999.9748882914977
episode: 242 training return: -999.9674295809477
episode: 243 training return: -999.9769687860071
epoch: 61 test_true_pfm: -1.0402193387823913 sim_pfm: -999.9477231463787
episode: 244 training return: -999.9649961486068
episode: 245 training return: -999.9685449192316
episode: 246 training return: -999.9727055122905
episode: 247 training return: -999.9580200839963
epoch: 62 test_true_pfm: -1.069394654751173 sim_pfm: -999.9477634403806
episode: 248 training return: -999.9707089598238
episode: 249 training return: -999.9719575765769
episode: 250 training return: -999.972985767541
episode: 251 training return: -999.9699276019694
epoch: 63 test_true_pfm: 0.2759602958755351 sim_pfm: -999.9480882115239
episode: 252 training return: -999.9685428802487
episode: 253 training return: -999.9680462055614
episode: 254 training return: -999.9612218863319
episode: 255 training return: -999.9602606565769
epoch: 64 test_true_pfm: 0.05780027101648224 sim_pfm: -999.9481037622012
episode: 256 training return: -999.959556475408
episode: 257 training return: -999.9804776906304
episode: 258 training return: -999.9683762102692
episode: 259 training return: -999.9761689860127
epoch: 65 test_true_pfm: 0.024208597659799953 sim_pfm: -999.9477350942616
episode: 260 training return: -999.9727322262615
episode: 261 training return: -999.9762297629496
episode: 262 training return: -999.9745369663949
episode: 263 training return: -999.967059796308
epoch: 66 test_true_pfm: 0.1760732960667576 sim_pfm: -999.9478447761088
episode: 264 training return: -999.9742849386039
episode: 265 training return: -999.9670817763545
episode: 266 training return: -999.9683540537192
episode: 267 training return: -999.9679248344286
epoch: 67 test_true_pfm: -0.05299152027491316 sim_pfm: -999.9472467782922
episode: 268 training return: -999.9422587108695
episode: 269 training return: -999.9771573452239
episode: 270 training return: -999.9365247611952
episode: 271 training return: -999.9720872607584
epoch: 68 test_true_pfm: 0.5010692859552343 sim_pfm: -999.947767197198
episode: 272 training return: -999.9678572179356
episode: 273 training return: -999.9621035361332
episode: 274 training return: -999.9832381174836
episode: 275 training return: -999.9636005098145
epoch: 69 test_true_pfm: 0.4070032397520332 sim_pfm: -999.947772293389
episode: 276 training return: -999.9710896546088
episode: 277 training return: -999.958721638767
episode: 278 training return: -999.9678189676484
episode: 279 training return: -1000.2727249952837
epoch: 70 test_true_pfm: 0.30497139184789906 sim_pfm: -999.9474593597585
episode: 280 training return: -999.9786302580169
episode: 281 training return: -999.9607004954592
episode: 282 training return: -999.9659719362826
episode: 283 training return: -999.9692196095615
epoch: 71 test_true_pfm: 0.31078347205648127 sim_pfm: -999.9480949863831
episode: 284 training return: -999.9578445821865
episode: 285 training return: -999.9738711414325
episode: 286 training return: -999.9618915875094
episode: 287 training return: -999.9700527506005
epoch: 72 test_true_pfm: -0.43987193740587 sim_pfm: -999.9467894843843
episode: 288 training return: -999.9571510876189
episode: 289 training return: -999.9750595191848
episode: 290 training return: -999.9675830267176
episode: 291 training return: -999.974371784385
epoch: 73 test_true_pfm: 0.33051101420338624 sim_pfm: -999.9476669523223
episode: 292 training return: -999.9784712191313
episode: 293 training return: -999.9691604201888
episode: 294 training return: -999.9752537182889
episode: 295 training return: -999.958619307694
epoch: 74 test_true_pfm: -0.47270207390375546 sim_pfm: -999.9480826053973
episode: 296 training return: -999.9690063589893
episode: 297 training return: -999.9681882779741
episode: 298 training return: -999.958161639744
episode: 299 training return: -999.9713658612543
epoch: 75 test_true_pfm: -0.41775935826498256 sim_pfm: -999.9475455706051
episode: 300 training return: -999.9727495143491
episode: 301 training return: -999.969335576651
episode: 302 training return: -999.9644087132154
episode: 303 training return: -999.9576158596313
epoch: 76 test_true_pfm: -0.07456624086524201 sim_pfm: -999.94774830199
episode: 304 training return: -999.9676750769029
episode: 305 training return: -999.9510867415033
episode: 306 training return: -999.9685276652914
episode: 307 training return: -999.9718968077051
epoch: 77 test_true_pfm: -0.6034760220185355 sim_pfm: -999.9475390214897
episode: 308 training return: -999.9603777733281
episode: 309 training return: -999.9693646465117
episode: 310 training return: -999.9754790454828
episode: 311 training return: -999.962362982897
epoch: 78 test_true_pfm: -0.08575400584996917 sim_pfm: -999.9477096865861
episode: 312 training return: -999.9716589063103
episode: 313 training return: -999.9733016454215
episode: 314 training return: -999.9706963681002
episode: 315 training return: -999.9763516093152
epoch: 79 test_true_pfm: -0.22778736582476836 sim_pfm: -999.947665456859
episode: 316 training return: -999.9749521046815
episode: 317 training return: -999.9666467326216
episode: 318 training return: -999.9750246877629
episode: 319 training return: -999.9094156978314
epoch: 80 test_true_pfm: -0.21289668951760798 sim_pfm: -999.9479789009151
episode: 320 training return: -999.9746140152098
episode: 321 training return: -999.9728563816482
episode: 322 training return: -999.9753420550182
episode: 323 training return: -999.9695029512515
epoch: 81 test_true_pfm: 0.24713566729507377 sim_pfm: -999.947442020065
episode: 324 training return: -999.9759514764347
episode: 325 training return: -999.9612439620225
episode: 326 training return: -999.9720796455038
episode: 327 training return: -999.9730842346453
epoch: 82 test_true_pfm: -0.4222306582549979 sim_pfm: -999.9471881647955
episode: 328 training return: -999.9638650790366
episode: 329 training return: -999.9694138055722
episode: 330 training return: -999.9710803222389
episode: 331 training return: -999.9679901647239
epoch: 83 test_true_pfm: -0.6611443949014054 sim_pfm: -999.9482027023572
episode: 332 training return: -999.9761833891132
episode: 333 training return: -999.9813986602145
episode: 334 training return: -999.9614225471958
episode: 335 training return: -999.9693359329932
epoch: 84 test_true_pfm: -0.5664894416614122 sim_pfm: -999.947410409202
episode: 336 training return: -999.9717959440025
episode: 337 training return: -999.970952105631
episode: 338 training return: -999.9756173693706
episode: 339 training return: -999.9743904207738
epoch: 85 test_true_pfm: -0.5001602124273729 sim_pfm: -999.9474343577898
episode: 340 training return: -999.9692522547729
episode: 341 training return: -999.9781374697034
episode: 342 training return: -999.968872510637
episode: 343 training return: -999.9703020499217
epoch: 86 test_true_pfm: -0.3168180012414962 sim_pfm: -999.9471178788972
episode: 344 training return: -999.9776452870973
episode: 345 training return: -999.9685487428975
episode: 346 training return: -999.9796820215322
episode: 347 training return: -999.9535673750347
epoch: 87 test_true_pfm: -0.9608304872101242 sim_pfm: -999.9485040148597
episode: 348 training return: -999.9686682589848
episode: 349 training return: -999.9642706547717
episode: 350 training return: -999.9672237462739
episode: 351 training return: -999.9725564405574
epoch: 88 test_true_pfm: -0.5941446592696119 sim_pfm: -999.9474603305995
episode: 352 training return: -999.9818427192815
episode: 353 training return: -999.9679372053872
episode: 354 training return: -999.9720957187914
episode: 355 training return: -999.9418392364781
epoch: 89 test_true_pfm: -0.3570931909178632 sim_pfm: -999.9473880936108
episode: 356 training return: -999.9685783011231
episode: 357 training return: -999.9793320657775
episode: 358 training return: -999.9674332006813
episode: 359 training return: -999.9754287351011
epoch: 90 test_true_pfm: 0.21770541603051521 sim_pfm: -999.9477332512023
episode: 360 training return: -999.965294343772
episode: 361 training return: -999.9722641628123
episode: 362 training return: -999.9755351499047
episode: 363 training return: -999.9805908184992
epoch: 91 test_true_pfm: 0.21526559117033486 sim_pfm: -999.9477024989898
episode: 364 training return: -999.9731865581728
episode: 365 training return: -999.9647873437078
episode: 366 training return: -999.9727527694994
episode: 367 training return: -999.9732244370772
epoch: 92 test_true_pfm: -0.3892762041288669 sim_pfm: -999.9480440342495
episode: 368 training return: -999.9651590618442
episode: 369 training return: -999.977628408273
episode: 370 training return: -999.9780069045352
episode: 371 training return: -999.9688957160099
epoch: 93 test_true_pfm: 0.0940325782823056 sim_pfm: -999.9478658432523
episode: 372 training return: -999.9635232098112
episode: 373 training return: -999.9669286514747
episode: 374 training return: -999.9701564740618
episode: 375 training return: -999.9793273827319
epoch: 94 test_true_pfm: -1.6797453738772676 sim_pfm: -999.9476790140785
episode: 376 training return: -999.965430980143
episode: 377 training return: -999.9687397868906
episode: 378 training return: -999.9720434090592
episode: 379 training return: -999.9695184479646
epoch: 95 test_true_pfm: -0.1492529129080973 sim_pfm: -999.947677315601
episode: 380 training return: -999.9755788754385
episode: 381 training return: -999.9611497028159
episode: 382 training return: -999.9701150558955
episode: 383 training return: -999.9738337055109
epoch: 96 test_true_pfm: -0.8135297423171249 sim_pfm: -999.9476375016096
episode: 384 training return: -999.9241197806615
episode: 385 training return: -999.9633957000261
episode: 386 training return: -999.9391969917891
episode: 387 training return: -1000.0272834936715
epoch: 97 test_true_pfm: -0.2813195906795967 sim_pfm: -999.94741855963
episode: 388 training return: -999.9634528823675
episode: 389 training return: -999.9762135600135
episode: 390 training return: -999.9691023019122
episode: 391 training return: -999.9809009098673
epoch: 98 test_true_pfm: -0.03012487104697344 sim_pfm: -999.9470391239738
episode: 392 training return: -999.9825581279052
episode: 393 training return: -999.968560376393
episode: 394 training return: -999.9692095088446
episode: 395 training return: -999.9687048907521
epoch: 99 test_true_pfm: 0.3446302202454577 sim_pfm: -999.9476515548571
episode: 396 training return: -999.9788916718819
episode: 397 training return: -999.977930221648
episode: 398 training return: -999.9812860774168
episode: 399 training return: -999.9725163046062
epoch: 100 test_true_pfm: -0.30174612073280116 sim_pfm: -999.9479567104522
episode: 400 training return: -999.9672725214483
episode: 401 training return: -999.9656490834014
episode: 402 training return: -999.9658083908263
episode: 403 training return: -999.9713133835414
epoch: 101 test_true_pfm: -0.1455457170403793 sim_pfm: -999.9472446440963
episode: 404 training return: -999.9784969182396
episode: 405 training return: -999.9659463927187
episode: 406 training return: -999.9698739613286
episode: 407 training return: -999.9669072624864
epoch: 102 test_true_pfm: -0.168735160564388 sim_pfm: -999.9474612161894
episode: 408 training return: -999.9755096735427
episode: 409 training return: -999.9728254955884
episode: 410 training return: -999.9713574974538
episode: 411 training return: -999.9665249401162
epoch: 103 test_true_pfm: -0.4580414531899435 sim_pfm: -999.947682025535
episode: 412 training return: -999.9675199750558
episode: 413 training return: -999.9766071762982
episode: 414 training return: -999.9753367169895
episode: 415 training return: -999.9577863014182
epoch: 104 test_true_pfm: 0.13645622960462253 sim_pfm: -999.9476346994156
episode: 416 training return: -999.9749915925792
episode: 417 training return: -1001.0261396506658
episode: 418 training return: -999.9506678493238
episode: 419 training return: -999.9719486326986
epoch: 105 test_true_pfm: 0.04484240221327166 sim_pfm: -999.9478756883924
episode: 420 training return: -999.967670867306
episode: 421 training return: -999.9630515879791
episode: 422 training return: -999.9678048680573
episode: 423 training return: -999.9680829622288
epoch: 106 test_true_pfm: 0.2268917506031023 sim_pfm: -999.9478895925581
episode: 424 training return: -999.9692709694709
episode: 425 training return: -999.9790292494705
episode: 426 training return: -999.9680561434009
episode: 427 training return: -999.9630234305204
epoch: 107 test_true_pfm: -0.1473762256683807 sim_pfm: -999.9479609798769
episode: 428 training return: -999.9751121563107
episode: 429 training return: -999.9770558534904
episode: 430 training return: -999.9666014460585
episode: 431 training return: -999.9755084920812
epoch: 108 test_true_pfm: -0.629164584169226 sim_pfm: -999.9482053288651
episode: 432 training return: -999.9736440440685
episode: 433 training return: -999.972961308867
episode: 434 training return: -999.9735724200266
episode: 435 training return: -999.9826026684652
epoch: 109 test_true_pfm: -0.7335705119795234 sim_pfm: -999.947941641331
episode: 436 training return: -999.9819370207032
episode: 437 training return: -999.971177614318
episode: 438 training return: -999.9690853457292
episode: 439 training return: -999.9727086469387
epoch: 110 test_true_pfm: -0.49524682176270113 sim_pfm: -999.9479740584619
episode: 440 training return: -999.9634510025204
episode: 441 training return: -999.9837845343197
episode: 442 training return: -999.9710990911601
episode: 443 training return: -999.9657679508451
epoch: 111 test_true_pfm: -0.8286416787118579 sim_pfm: -999.9484223970634
episode: 444 training return: -999.9742324710143
episode: 445 training return: -999.9710123057979
episode: 446 training return: -999.9707102175284
episode: 447 training return: -999.9749658753263
epoch: 112 test_true_pfm: 0.08718136639180823 sim_pfm: -999.947866852272
episode: 448 training return: -999.9764402961089
episode: 449 training return: -999.9717075674563
episode: 450 training return: -999.9748500858027
episode: 451 training return: -999.9742792415951
epoch: 113 test_true_pfm: 0.2543246394374691 sim_pfm: -999.9474662108347
episode: 452 training return: -999.9623815133215
episode: 453 training return: -999.967802047526
episode: 454 training return: -999.9698404072984
episode: 455 training return: -999.9638994147163
epoch: 114 test_true_pfm: -0.6624916516195992 sim_pfm: -999.9481972592909
episode: 456 training return: -999.9713509676693
episode: 457 training return: -999.9691282456638
episode: 458 training return: -999.975182336208
episode: 459 training return: -999.9688780454889
epoch: 115 test_true_pfm: -0.02767926029754697 sim_pfm: -999.9476404159981
episode: 460 training return: -999.9739047132524
episode: 461 training return: -999.9812424957233
episode: 462 training return: -999.9749182734548
episode: 463 training return: -999.9707919605358
epoch: 116 test_true_pfm: -0.001991628024291217 sim_pfm: -999.9476416429837
episode: 464 training return: -999.9654721166835
episode: 465 training return: -999.9750073400417
episode: 466 training return: -999.9690101235585
episode: 467 training return: -999.9526162295653
epoch: 117 test_true_pfm: 0.13172261293466678 sim_pfm: -999.9483616332938
episode: 468 training return: -999.9679393353182
episode: 469 training return: -999.9715935154338
episode: 470 training return: -999.9747089408646
episode: 471 training return: -999.9739533241072
epoch: 118 test_true_pfm: -0.008209158282792431 sim_pfm: -999.9478745281931
episode: 472 training return: -999.9720815260268
episode: 473 training return: -999.9481324648223
episode: 474 training return: -999.9762656515306
episode: 475 training return: -999.971543671504
epoch: 119 test_true_pfm: 0.41106083123382064 sim_pfm: -999.9476883917645
episode: 476 training return: -999.9717158188101
episode: 477 training return: -999.9757675762886
episode: 478 training return: -999.9711993094832
episode: 479 training return: -999.973576141124
epoch: 120 test_true_pfm: 0.14229611840684556 sim_pfm: -999.9471209692812
episode: 480 training return: -999.970037371799
episode: 481 training return: -999.9679128585434
episode: 482 training return: -999.9796686717328
episode: 483 training return: -999.970642605693
epoch: 121 test_true_pfm: 0.23517789989221882 sim_pfm: -999.9477382671075
episode: 484 training return: -999.9538495471838
episode: 485 training return: -999.963326450723
episode: 486 training return: -999.9614299544213
episode: 487 training return: -999.959948456809
epoch: 122 test_true_pfm: -0.7462242367370365 sim_pfm: -999.9476309099003
episode: 488 training return: -999.9671286575222
episode: 489 training return: -999.9756264445558
episode: 490 training return: -999.9749773455397
episode: 491 training return: -999.9730097404321
epoch: 123 test_true_pfm: -0.5837134031508567 sim_pfm: -999.9473367712548
episode: 492 training return: -999.9641094264434
episode: 493 training return: -999.9820833064502
episode: 494 training return: -999.9762704884718
episode: 495 training return: -999.9572433588064
epoch: 124 test_true_pfm: 0.005238870164695653 sim_pfm: -999.9474543598486
episode: 496 training return: -999.9671157098193
episode: 497 training return: -999.9697133612893
episode: 498 training return: -999.9808004281061
episode: 499 training return: -999.9773602703975
epoch: 125 test_true_pfm: 0.554420192952418 sim_pfm: -999.9474715249077
episode: 500 training return: -999.977644698738
episode: 501 training return: -999.9700985565981
episode: 502 training return: -999.9664592015785
episode: 503 training return: -999.9693815369835
epoch: 126 test_true_pfm: -0.7869034637120321 sim_pfm: -999.9474985302973
episode: 504 training return: -999.9744665261675
episode: 505 training return: -999.9778086966837
episode: 506 training return: -999.9664368833965
episode: 507 training return: -999.9775961046988
epoch: 127 test_true_pfm: -0.2070212404003318 sim_pfm: -999.9480277513461
episode: 508 training return: -999.9646221841081
episode: 509 training return: -999.9703998129951
episode: 510 training return: -999.9737287947124
episode: 511 training return: -999.9663564410278
epoch: 128 test_true_pfm: -1.0452381771534942 sim_pfm: -999.9474815342531
episode: 512 training return: -999.9853900354963
episode: 513 training return: -999.9777049463072
episode: 514 training return: -999.9683369402851
episode: 515 training return: -999.9728121757533
epoch: 129 test_true_pfm: 0.06251406078366571 sim_pfm: -999.9478587848392
episode: 516 training return: -999.9646331970273
episode: 517 training return: -999.964170563812
episode: 518 training return: -999.9567317136865
episode: 519 training return: -999.9691031187091
epoch: 130 test_true_pfm: -0.271253020255245 sim_pfm: -999.9480350476791
episode: 520 training return: -999.9766155546599
episode: 521 training return: -999.9653288859154
episode: 522 training return: -999.9692570673112
episode: 523 training return: -999.9696958348624
epoch: 131 test_true_pfm: 0.09336633666518479 sim_pfm: -999.9470273399035
episode: 524 training return: -999.9720368737336
episode: 525 training return: -999.9684369022506
episode: 526 training return: -999.9740653581182
episode: 527 training return: -999.9709269160936
epoch: 132 test_true_pfm: -0.9610385361203395 sim_pfm: -999.9479096901192
episode: 528 training return: -999.9680746173347
episode: 529 training return: -999.9693645596498
episode: 530 training return: -999.9602453471482
episode: 531 training return: -999.9689402214691
epoch: 133 test_true_pfm: -0.0502634732641107 sim_pfm: -999.9480354530642
episode: 532 training return: -999.9567633614654
episode: 533 training return: -999.9720204319398
episode: 534 training return: -999.9763825406934
episode: 535 training return: -999.9719939470151
epoch: 134 test_true_pfm: -0.3604274306538566 sim_pfm: -999.9475406265623
episode: 536 training return: -999.9697844756523
episode: 537 training return: -999.9720640842166
episode: 538 training return: -999.9721606622335
episode: 539 training return: -999.9533348243505
epoch: 135 test_true_pfm: -0.025090175187922375 sim_pfm: -999.9480326253606
episode: 540 training return: -999.9733242816773
episode: 541 training return: -999.9755253570052
episode: 542 training return: -999.9542737719411
episode: 543 training return: -999.9699723332742
epoch: 136 test_true_pfm: -0.49198480678583084 sim_pfm: -999.9473633126237
episode: 544 training return: -999.9743095945599
episode: 545 training return: -999.9841905623891
episode: 546 training return: -999.9741249996114
episode: 547 training return: -999.9779246057386
epoch: 137 test_true_pfm: 0.22989223456457153 sim_pfm: -999.9480027325603
episode: 548 training return: -999.9765898047846
episode: 549 training return: -999.9438167510635
episode: 550 training return: -999.9715299458927
episode: 551 training return: -999.985764457151
epoch: 138 test_true_pfm: -0.039637691180404645 sim_pfm: -999.947829616271
episode: 552 training return: -999.9724001734008
episode: 553 training return: -999.9714321354221
episode: 554 training return: -999.9763272317336
episode: 555 training return: -1001.9047660586042
epoch: 139 test_true_pfm: -0.6948774602034536 sim_pfm: -999.9478167535958
episode: 556 training return: -999.9617841010445
episode: 557 training return: -999.9713028425749
episode: 558 training return: -999.975821330707
episode: 559 training return: -999.9750746658973
epoch: 140 test_true_pfm: -0.6797921235023104 sim_pfm: -999.9477970716413
episode: 560 training return: -999.9620122164586
episode: 561 training return: -999.9673092252063
episode: 562 training return: -999.9636682166534
episode: 563 training return: -999.9796265733474
epoch: 141 test_true_pfm: -0.7942695448061333 sim_pfm: -999.9476209404446
episode: 564 training return: -999.9746956401524
episode: 565 training return: -999.9754422006174
episode: 566 training return: -999.9756012574546
episode: 567 training return: -999.9564352546247
epoch: 142 test_true_pfm: -0.14632490639013251 sim_pfm: -999.9480707037277
episode: 568 training return: -1000.0112525963841
episode: 569 training return: -999.9703444542631
episode: 570 training return: -999.9762494538008
episode: 571 training return: -999.9757155906151
epoch: 143 test_true_pfm: -0.7611265033213219 sim_pfm: -999.9477909659557
episode: 572 training return: -999.9710463251897
episode: 573 training return: -999.9730024235593
episode: 574 training return: -999.9690513917305
episode: 575 training return: -999.9584685351748
epoch: 144 test_true_pfm: 0.5897934563701057 sim_pfm: -999.9481640324793
episode: 576 training return: -999.9721992032089
episode: 577 training return: -999.9691374347549
episode: 578 training return: -999.9790146507589
episode: 579 training return: -999.9758709162909
epoch: 145 test_true_pfm: -0.11099683876142429 sim_pfm: -999.9476920126987
episode: 580 training return: -999.9690060548813
episode: 581 training return: -999.9759007136007
episode: 582 training return: -999.9771820973953
episode: 583 training return: -999.9602782540238
epoch: 146 test_true_pfm: 0.0695892599042559 sim_pfm: -999.9478781365706
episode: 584 training return: -999.9650847366784
episode: 585 training return: -999.9636315804898
episode: 586 training return: -999.9813313177601
episode: 587 training return: -999.9667113611781
epoch: 147 test_true_pfm: -0.08042342049163628 sim_pfm: -999.9479974393558
episode: 588 training return: -999.9502699288204
episode: 589 training return: -999.9676612769933
episode: 590 training return: -999.9775547369215
episode: 591 training return: -999.964849307568
epoch: 148 test_true_pfm: -0.3074872439499239 sim_pfm: -999.9471000897311
episode: 592 training return: -999.9698537614144
episode: 593 training return: -999.9730183652823
episode: 594 training return: -999.9711233990993
episode: 595 training return: -999.9630530359216
epoch: 149 test_true_pfm: 0.03853970163748627 sim_pfm: -999.9472865401827
episode: 596 training return: -999.9595568196513
episode: 597 training return: -999.9685213970918
episode: 598 training return: -999.9766132008402
episode: 599 training return: -999.9768722329444
epoch: 150 test_true_pfm: -0.3770617055582351 sim_pfm: -999.9479189803868
