['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'uncertainty', '--traj', 'medium', '--seed', '2', '--data', '100000']
epoch: 0 training_loss 0.3159499217569828 test_loss: 0.21607596874237062
epoch: 1 training_loss 0.18324967332184314 test_loss: 0.1665424108505249
epoch: 2 training_loss 0.14580820925533772 test_loss: 0.1548901915550232
epoch: 3 training_loss 0.14381126321852208 test_loss: 0.12638587951660157
epoch: 4 training_loss 0.12030515648424625 test_loss: 0.15259119272232055
epoch: 5 training_loss 0.12549359656870365 test_loss: 0.13692700862884521
epoch: 6 training_loss 0.11918957512825727 test_loss: 0.13637808561325074
epoch: 7 training_loss 0.10797442901879549 test_loss: 0.11565208435058594
epoch: 8 training_loss 0.11829220127314329 test_loss: 0.13903720378875734
epoch: 9 training_loss 0.11661397825926542 test_loss: 0.11139523983001709
epoch: 10 training_loss 0.11738400441594422 test_loss: 0.13860292434692384
epoch: 11 training_loss 0.10883229404687882 test_loss: 0.1256870746612549
epoch: 12 training_loss 0.10708486277610063 test_loss: 0.1337376356124878
epoch: 13 training_loss 0.1072040650807321 test_loss: 0.12035717964172363
epoch: 14 training_loss 0.10835490509867668 test_loss: 0.11720197200775147
epoch: 15 training_loss 0.10410010201856494 test_loss: 0.1235957384109497
epoch: 16 training_loss 0.10552937190979719 test_loss: 0.11609251499176025
epoch: 17 training_loss 0.1091339672729373 test_loss: 0.12664464712142945
epoch: 18 training_loss 0.10369936995208263 test_loss: 0.1134792685508728
epoch: 19 training_loss 0.09771923372521997 test_loss: 0.1142166018486023
epoch: 20 training_loss 0.10404467478394508 test_loss: 0.12645130157470702
epoch: 21 training_loss 0.10476617943495511 test_loss: 0.12711198329925538
epoch: 22 training_loss 0.10777741791680455 test_loss: 0.12555375099182128
epoch: 23 training_loss 0.10000443730503321 test_loss: 0.12727380990982057
epoch: 24 training_loss 0.10324045047163963 test_loss: 0.12986962795257567
epoch: 25 training_loss 0.09908532554283739 test_loss: 0.13369144201278688
epoch: 26 training_loss 0.1041826543211937 test_loss: 0.10533679723739624
epoch: 27 training_loss 0.10658338587731123 test_loss: 0.14645439386367798
epoch: 28 training_loss 0.10434875622391701 test_loss: 0.11502392292022705
epoch: 29 training_loss 0.10006671458482742 test_loss: 0.12076996564865113
epoch: 30 training_loss 0.09882202964276075 test_loss: 0.11639894247055053
epoch: 31 training_loss 0.10402310527861118 test_loss: 0.13323066234588624
epoch: 32 training_loss 0.09135925443843007 test_loss: 0.12010705471038818
epoch: 33 training_loss 0.1054504231363535 test_loss: 0.10218819379806518
epoch: 34 training_loss 0.10991983471438288 test_loss: 0.11256825923919678
epoch: 35 training_loss 0.10465696588158607 test_loss: 0.12076284885406494
epoch: 36 training_loss 0.09241275696083903 test_loss: 0.11708868741989135
epoch: 37 training_loss 0.09644976913928986 test_loss: 0.113792085647583
epoch: 38 training_loss 0.10484433948993682 test_loss: 0.10733704566955567
epoch: 39 training_loss 0.09393655836582183 test_loss: 0.12023077011108399
epoch: 40 training_loss 0.09543505325913429 test_loss: 0.10792772769927979
epoch: 41 training_loss 0.09641587791033089 test_loss: 0.11504292488098145
epoch: 42 training_loss 0.10182868460193277 test_loss: 0.10653917789459229
epoch: 43 training_loss 0.10097493261098861 test_loss: 0.1259352445602417
epoch: 44 training_loss 0.09792054779827594 test_loss: 0.10634475946426392
epoch: 45 training_loss 0.09455111589282751 test_loss: 0.12722772359848022
epoch: 46 training_loss 0.10247585076838732 test_loss: 0.13396058082580567
epoch: 47 training_loss 0.09630611492320895 test_loss: 0.12740191221237182
epoch: 48 training_loss 0.10070696391165257 test_loss: 0.11692731380462647
epoch: 49 training_loss 0.10387007910758257 test_loss: 0.12498271465301514
epoch: 50 training_loss 0.10265058495104312 test_loss: 0.12629334926605223
epoch: 51 training_loss 0.09493266019970178 test_loss: 0.11612917184829712
epoch: 52 training_loss 0.09419490788131953 test_loss: 0.1168413758277893
epoch: 53 training_loss 0.10157332170754671 test_loss: 0.11495069265365601
epoch: 54 training_loss 0.09985111229121685 test_loss: 0.11569122076034546
epoch: 55 training_loss 0.09991225361824035 test_loss: 0.11089704036712647
epoch: 56 training_loss 0.10410337660461665 test_loss: 0.1076817274093628
epoch: 57 training_loss 0.09893300453200936 test_loss: 0.11660170555114746
epoch: 58 training_loss 0.0956015414185822 test_loss: 0.12339696884155274
epoch: 59 training_loss 0.09486022900789975 test_loss: 0.11321040391921997
epoch: 60 training_loss 0.09571573682129383 test_loss: 0.14490184783935547
epoch: 61 training_loss 0.10077899418771268 test_loss: 0.1149674415588379
epoch: 62 training_loss 0.09465260114520788 test_loss: 0.11997865438461304
epoch: 63 training_loss 0.10346222402527928 test_loss: 0.10333746671676636
epoch: 64 training_loss 0.09248449724167586 test_loss: 0.111506187915802
epoch: 65 training_loss 0.0980730251595378 test_loss: 0.09354632496833801
epoch: 66 training_loss 0.09605529107153415 test_loss: 0.11673659086227417
epoch: 67 training_loss 0.08816431742161512 test_loss: 0.10644211769104003
epoch: 68 training_loss 0.09808785043656826 test_loss: 0.12111629247665405
epoch: 69 training_loss 0.10030288018286228 test_loss: 0.12331663370132447
epoch: 70 training_loss 0.09386561887338757 test_loss: 0.13432403802871704
epoch: 71 training_loss 0.09959600765258074 test_loss: 0.12072081565856933
epoch: 72 training_loss 0.09508572859689593 test_loss: 0.12118468284606934
epoch: 73 training_loss 0.09777758104726672 test_loss: 0.13758193254470824
epoch: 74 training_loss 0.09436823401600122 test_loss: 0.12221047878265381
epoch: 75 training_loss 0.09731837537139654 test_loss: 0.12205051183700562
epoch: 76 training_loss 0.09937899028882384 test_loss: 0.09722930192947388
epoch: 77 training_loss 0.0970954610966146 test_loss: 0.12106727361679077
epoch: 78 training_loss 0.09725373797118664 test_loss: 0.11662909984588624
epoch: 79 training_loss 0.09724668171256781 test_loss: 0.11312823295593262
epoch: 80 training_loss 0.09395346689969301 test_loss: 0.12384335994720459
epoch: 81 training_loss 0.09616698944941163 test_loss: 0.1310784935951233
epoch: 82 training_loss 0.09324651163071394 test_loss: 0.14031118154525757
epoch: 83 training_loss 0.0883676684089005 test_loss: 0.0962364137172699
epoch: 84 training_loss 0.09569151535630226 test_loss: 0.11039868593215943
epoch: 85 training_loss 0.09345308734104037 test_loss: 0.11583912372589111
epoch: 86 training_loss 0.09575794789940119 test_loss: 0.12570061683654785
epoch: 87 training_loss 0.09313206734135747 test_loss: 0.09660120606422425
epoch: 88 training_loss 0.0915705581381917 test_loss: 0.12178652286529541
epoch: 89 training_loss 0.09154234742745757 test_loss: 0.11529682874679566
epoch: 90 training_loss 0.09475722102448345 test_loss: 0.09152557253837586
epoch: 91 training_loss 0.09667849261313677 test_loss: 0.1289735794067383
epoch: 92 training_loss 0.09306925185024738 test_loss: 0.11317317485809326
epoch: 93 training_loss 0.09029707125388085 test_loss: 0.11278537511825562
epoch: 94 training_loss 0.09020738577470183 test_loss: 0.10416672229766846
epoch: 95 training_loss 0.09888659376651049 test_loss: 0.11058239936828614
epoch: 96 training_loss 0.09969097835943103 test_loss: 0.12109165191650391
epoch: 97 training_loss 0.09494607448577881 test_loss: 0.10574668645858765
epoch: 98 training_loss 0.09309718547388911 test_loss: 0.11383305788040161
epoch: 99 training_loss 0.09753321411088109 test_loss: 0.11910115480422974
epoch: 100 training_loss 0.09401050278916954 test_loss: 0.1113202691078186
epoch: 101 training_loss 0.09098167233169079 test_loss: 0.12400426864624023
epoch: 102 training_loss 0.09350488245487214 test_loss: 0.11133790016174316
epoch: 103 training_loss 0.09407164845615626 test_loss: 0.1189076066017151
epoch: 104 training_loss 0.09512465555220842 test_loss: 0.12975683212280273
epoch: 105 training_loss 0.087544950786978 test_loss: 0.12644325494766234
epoch: 106 training_loss 0.08787761546671391 test_loss: 0.09524674415588379
epoch: 107 training_loss 0.09893903845921159 test_loss: 0.1323230504989624
epoch: 108 training_loss 0.09259930791333318 test_loss: 0.11366212368011475
epoch: 109 training_loss 0.09909588495269418 test_loss: 0.13740941286087036
epoch: 110 training_loss 0.09736569449305535 test_loss: 0.12220920324325561
epoch: 111 training_loss 0.0932446844317019 test_loss: 0.11523716449737549
epoch: 112 training_loss 0.09695437628775835 test_loss: 0.10339199304580689
epoch: 113 training_loss 0.08612118862569332 test_loss: 0.12011550664901734
epoch: 114 training_loss 0.08947056425735354 test_loss: 0.10137512683868408
epoch: 115 training_loss 0.09046143436804414 test_loss: 0.11424853801727294
epoch: 116 training_loss 0.09321177538484335 test_loss: 0.10445535182952881
epoch: 117 training_loss 0.09893678773194552 test_loss: 0.121541428565979
epoch: 118 training_loss 0.09316428974270821 test_loss: 0.10072296857833862
epoch: 119 training_loss 0.0866481781937182 test_loss: 0.12508184909820558
epoch: 120 training_loss 0.08918546034023166 test_loss: 0.11432472467422486
epoch: 121 training_loss 0.09920257313176989 test_loss: 0.11827611923217773
epoch: 122 training_loss 0.09088821772485972 test_loss: 0.11644755601882935
epoch: 123 training_loss 0.0873224032856524 test_loss: 0.11517088413238526
epoch: 124 training_loss 0.08567183615639806 test_loss: 0.10664829015731811
epoch: 125 training_loss 0.08797142513096333 test_loss: 0.11335166692733764
epoch: 126 training_loss 0.0965422042645514 test_loss: 0.11225662231445313
epoch: 127 training_loss 0.09234502598643303 test_loss: 0.12425867319107056
epoch: 128 training_loss 0.0905012365244329 test_loss: 0.1267510771751404
epoch: 129 training_loss 0.09243286784738303 test_loss: 0.1213568091392517
epoch: 130 training_loss 0.09436181366443634 test_loss: 0.11224913597106934
epoch: 131 training_loss 0.09484219295904041 test_loss: 0.10972230434417725
epoch: 132 training_loss 0.08691173883154989 test_loss: 0.11957905292510987
epoch: 133 training_loss 0.08788112994283438 test_loss: 0.1117120623588562
epoch: 134 training_loss 0.0996388465538621 test_loss: 0.12695183753967285
epoch: 135 training_loss 0.09713276134803892 test_loss: 0.09766252040863037
epoch: 136 training_loss 0.08937967110425234 test_loss: 0.11873825788497924
epoch: 137 training_loss 0.08813983259722591 test_loss: 0.11603693962097168
epoch: 138 training_loss 0.09121662590652704 test_loss: 0.1075588345527649
epoch: 139 training_loss 0.09569447208195925 test_loss: 0.1291339874267578
epoch: 140 training_loss 0.08814675601199269 test_loss: 0.10932823419570922
epoch: 141 training_loss 0.08639123238623142 test_loss: 0.1303227424621582
epoch: 142 training_loss 0.09289767175912857 test_loss: 0.11961647272109985
epoch: 143 training_loss 0.08966765118762851 test_loss: 0.1099958062171936
epoch: 144 training_loss 0.09035145331174135 test_loss: 0.1197648286819458
epoch: 145 training_loss 0.08582532465457916 test_loss: 0.10968266725540161
epoch: 146 training_loss 0.08803837671875954 test_loss: 0.1113547682762146
epoch: 147 training_loss 0.09170063450932503 test_loss: 0.09574834108352662
epoch: 148 training_loss 0.09184866264462471 test_loss: 0.117157244682312
epoch: 149 training_loss 0.0827825822122395 test_loss: 0.1250816822052002
epoch: 0 training_loss 0.2980856558680534 test_loss: 0.18560431003570557
epoch: 1 training_loss 0.17508980985730888 test_loss: 0.14236552715301515
epoch: 2 training_loss 0.14800700947642326 test_loss: 0.1537040114402771
epoch: 3 training_loss 0.13545405931770801 test_loss: 0.1412966012954712
epoch: 4 training_loss 0.13036578197032214 test_loss: 0.11866539716720581
epoch: 5 training_loss 0.11156199580058455 test_loss: 0.13018064498901366
epoch: 6 training_loss 0.1160483230650425 test_loss: 0.11856791973114014
epoch: 7 training_loss 0.12611513078212738 test_loss: 0.12045464515686036
epoch: 8 training_loss 0.11315068308264017 test_loss: 0.11090822219848633
epoch: 9 training_loss 0.10756081201136113 test_loss: 0.12201440334320068
epoch: 10 training_loss 0.10690836314111948 test_loss: 0.13869346380233766
epoch: 11 training_loss 0.10088228028267622 test_loss: 0.13614557981491088
epoch: 12 training_loss 0.10082886595278978 test_loss: 0.12819478511810303
epoch: 13 training_loss 0.10991509269922972 test_loss: 0.11367439031600952
epoch: 14 training_loss 0.11545763470232487 test_loss: 0.11356900930404663
epoch: 15 training_loss 0.1170909414626658 test_loss: 0.12743518352508545
epoch: 16 training_loss 0.1126672512292862 test_loss: 0.11811628341674804
epoch: 17 training_loss 0.11239275004714727 test_loss: 0.1500941753387451
epoch: 18 training_loss 0.10751023704186081 test_loss: 0.10023822784423828
epoch: 19 training_loss 0.11105045331642031 test_loss: 0.10883920192718506
epoch: 20 training_loss 0.10854385375976562 test_loss: 0.11531926393508911
epoch: 21 training_loss 0.10391097873449326 test_loss: 0.12597455978393554
epoch: 22 training_loss 0.10408665781840681 test_loss: 0.1141508936882019
epoch: 23 training_loss 0.09984929960221052 test_loss: 0.13286572694778442
epoch: 24 training_loss 0.11118993425741791 test_loss: 0.11166238784790039
epoch: 25 training_loss 0.10603499146178365 test_loss: 0.12452789545059204
epoch: 26 training_loss 0.0973244384303689 test_loss: 0.11576262712478638
epoch: 27 training_loss 0.10640175368636846 test_loss: 0.1164241075515747
epoch: 28 training_loss 0.10610270176082849 test_loss: 0.11156187057495118
epoch: 29 training_loss 0.10073199082165957 test_loss: 0.10810407400131225
epoch: 30 training_loss 0.10792528981342912 test_loss: 0.12568631172180175
epoch: 31 training_loss 0.0983951598033309 test_loss: 0.11368765830993652
epoch: 32 training_loss 0.10579350415617228 test_loss: 0.09445536136627197
epoch: 33 training_loss 0.10682599607855081 test_loss: 0.10462580919265747
epoch: 34 training_loss 0.10150353088974953 test_loss: 0.12076526880264282
epoch: 35 training_loss 0.0990092197060585 test_loss: 0.1085563063621521
epoch: 36 training_loss 0.11013361372053623 test_loss: 0.11445573568344117
epoch: 37 training_loss 0.10154973868280649 test_loss: 0.1159429669380188
epoch: 38 training_loss 0.09975840084254742 test_loss: 0.10845516920089722
epoch: 39 training_loss 0.10111512064933777 test_loss: 0.109318208694458
epoch: 40 training_loss 0.10615312116220593 test_loss: 0.11107999086380005
epoch: 41 training_loss 0.10694308374077081 test_loss: 0.10984259843826294
epoch: 42 training_loss 0.10163163732737303 test_loss: 0.1200412631034851
epoch: 43 training_loss 0.10612598760053515 test_loss: 0.12247331142425537
epoch: 44 training_loss 0.09732774686068296 test_loss: 0.12049411535263062
epoch: 45 training_loss 0.10080488231033087 test_loss: 0.11411646604537964
epoch: 46 training_loss 0.1027591985836625 test_loss: 0.11014978885650635
epoch: 47 training_loss 0.09810508063063025 test_loss: 0.1139372706413269
epoch: 48 training_loss 0.10041974324733019 test_loss: 0.10857895612716675
epoch: 49 training_loss 0.11009924717247487 test_loss: 0.1151742696762085
epoch: 50 training_loss 0.10003790805116296 test_loss: 0.1091988205909729
epoch: 51 training_loss 0.11182206451892852 test_loss: 0.12195672988891601
epoch: 52 training_loss 0.10208774697035551 test_loss: 0.11381547451019287
epoch: 53 training_loss 0.10575118716806173 test_loss: 0.12031311988830566
epoch: 54 training_loss 0.10816726546734572 test_loss: 0.09802954792976379
epoch: 55 training_loss 0.10057824591174722 test_loss: 0.11997584104537964
epoch: 56 training_loss 0.10641251284629107 test_loss: 0.10416182279586791
epoch: 57 training_loss 0.09461367534473539 test_loss: 0.11740241050720215
epoch: 58 training_loss 0.09554664891213178 test_loss: 0.11787368059158325
epoch: 59 training_loss 0.1017368196323514 test_loss: 0.11540546417236328
epoch: 60 training_loss 0.09649037377908826 test_loss: 0.11548388004302979
epoch: 61 training_loss 0.1022324220277369 test_loss: 0.12726948261260987
epoch: 62 training_loss 0.09876123823225498 test_loss: 0.12411949634552003
epoch: 63 training_loss 0.1010275822877884 test_loss: 0.11557865142822266
epoch: 64 training_loss 0.08876420309767127 test_loss: 0.13026413917541504
epoch: 65 training_loss 0.10326325032860041 test_loss: 0.11846369504928589
epoch: 66 training_loss 0.09574058599770069 test_loss: 0.12159587144851684
epoch: 67 training_loss 0.10240161696448923 test_loss: 0.1189072608947754
epoch: 68 training_loss 0.10553734462708235 test_loss: 0.12306932210922242
epoch: 69 training_loss 0.10444517940282821 test_loss: 0.10653932094573974
epoch: 70 training_loss 0.10072636792436242 test_loss: 0.1211362600326538
epoch: 71 training_loss 0.09853802666068077 test_loss: 0.133076810836792
epoch: 72 training_loss 0.09500207705423236 test_loss: 0.1047251582145691
epoch: 73 training_loss 0.1008513028267771 test_loss: 0.10219035148620606
epoch: 74 training_loss 0.09641938742250204 test_loss: 0.11502238512039184
epoch: 75 training_loss 0.10206195559352636 test_loss: 0.11563323736190796
epoch: 76 training_loss 0.1017559428140521 test_loss: 0.11889847517013549
epoch: 77 training_loss 0.10115302350372075 test_loss: 0.11483372449874878
epoch: 78 training_loss 0.09775120347738266 test_loss: 0.12880346775054932
epoch: 79 training_loss 0.09137308742851019 test_loss: 0.10553133487701416
epoch: 80 training_loss 0.09495915208011865 test_loss: 0.11032971143722534
epoch: 81 training_loss 0.10486147722229361 test_loss: 0.1272057294845581
epoch: 82 training_loss 0.10222357500344514 test_loss: 0.1096153736114502
epoch: 83 training_loss 0.10690488873049617 test_loss: 0.1142655611038208
epoch: 84 training_loss 0.09600465321913361 test_loss: 0.11758657693862914
epoch: 85 training_loss 0.09928377402946353 test_loss: 0.10606417655944825
epoch: 86 training_loss 0.09749728126451374 test_loss: 0.10064129829406739
epoch: 87 training_loss 0.09668559722602367 test_loss: 0.11329368352890015
epoch: 88 training_loss 0.09977345766499639 test_loss: 0.1047257661819458
epoch: 89 training_loss 0.09418641909956932 test_loss: 0.11351643800735474
epoch: 90 training_loss 0.09356078449636698 test_loss: 0.10884027481079102
epoch: 91 training_loss 0.0928626967407763 test_loss: 0.12171459197998047
epoch: 92 training_loss 0.09521256607025862 test_loss: 0.12322545051574707
epoch: 93 training_loss 0.09698403801769018 test_loss: 0.11291921138763428
epoch: 94 training_loss 0.09453399375081062 test_loss: 0.11355448961257934
epoch: 95 training_loss 0.09876959981396795 test_loss: 0.12013447284698486
epoch: 96 training_loss 0.09913482997566461 test_loss: 0.1235893726348877
epoch: 97 training_loss 0.08835932891815901 test_loss: 0.11883903741836548
epoch: 98 training_loss 0.10318163231015205 test_loss: 0.1025192379951477
epoch: 99 training_loss 0.09827436057850719 test_loss: 0.10916758775711059
epoch: 100 training_loss 0.09997662534937263 test_loss: 0.12378363609313965
epoch: 101 training_loss 0.08966314177960158 test_loss: 0.12548739910125734
epoch: 102 training_loss 0.1044682721607387 test_loss: 0.11913750171661378
epoch: 103 training_loss 0.09896032648161053 test_loss: 0.11460944414138793
epoch: 104 training_loss 0.09259315920993685 test_loss: 0.11505541801452637
epoch: 105 training_loss 0.09766324626281858 test_loss: 0.10580534934997558
epoch: 106 training_loss 0.0965176310762763 test_loss: 0.1135679841041565
epoch: 107 training_loss 0.10405287500470876 test_loss: 0.12784234285354615
epoch: 108 training_loss 0.09070745207369328 test_loss: 0.12052923440933228
epoch: 109 training_loss 0.09710499135777355 test_loss: 0.13111579418182373
epoch: 110 training_loss 0.09634244561195374 test_loss: 0.1394442915916443
epoch: 111 training_loss 0.09039657259359955 test_loss: 0.13774919509887695
epoch: 112 training_loss 0.09379015313461422 test_loss: 0.11555060148239135
epoch: 113 training_loss 0.1009448628127575 test_loss: 0.11120742559432983
epoch: 114 training_loss 0.09022959175519646 test_loss: 0.12786401510238649
epoch: 115 training_loss 0.09545951321721077 test_loss: 0.11500239372253418
epoch: 116 training_loss 0.08898122645914555 test_loss: 0.10423369407653808
epoch: 117 training_loss 0.09627056421712041 test_loss: 0.10275952816009522
epoch: 118 training_loss 0.09528995899483561 test_loss: 0.1077447772026062
epoch: 119 training_loss 0.09678359072655439 test_loss: 0.10878647565841675
epoch: 120 training_loss 0.10586505703628063 test_loss: 0.1205560326576233
epoch: 121 training_loss 0.0895255184918642 test_loss: 0.11428344249725342
epoch: 122 training_loss 0.09272513017058373 test_loss: 0.10871940851211548
epoch: 123 training_loss 0.09503262002021075 test_loss: 0.1153291940689087
epoch: 124 training_loss 0.09445549009367823 test_loss: 0.12058618068695068
epoch: 125 training_loss 0.09420727539807558 test_loss: 0.12614518404006958
epoch: 126 training_loss 0.09500931069254875 test_loss: 0.10874730348587036
epoch: 127 training_loss 0.10189346984028816 test_loss: 0.11623202562332154
epoch: 128 training_loss 0.10023173483088613 test_loss: 0.11534700393676758
epoch: 129 training_loss 0.09150630204007029 test_loss: 0.1282932996749878
epoch: 130 training_loss 0.0973413066379726 test_loss: 0.11111675500869751
epoch: 131 training_loss 0.08726840464398265 test_loss: 0.13835620880126953
epoch: 132 training_loss 0.09470453433692455 test_loss: 0.10281829833984375
epoch: 133 training_loss 0.09076195137575269 test_loss: 0.11480847597122193
epoch: 134 training_loss 0.09457591058686375 test_loss: 0.1151594877243042
epoch: 135 training_loss 0.09365740459412336 test_loss: 0.10874531269073487
epoch: 136 training_loss 0.09114931102842093 test_loss: 0.12112106084823608
epoch: 137 training_loss 0.09037973963655531 test_loss: 0.13356504440307618
epoch: 138 training_loss 0.09420038091018795 test_loss: 0.11863996982574462
epoch: 139 training_loss 0.09543362431228161 test_loss: 0.11167361736297607
epoch: 140 training_loss 0.08900319306179881 test_loss: 0.11575171947479249
epoch: 141 training_loss 0.08974687040783465 test_loss: 0.1270546793937683
epoch: 142 training_loss 0.09810653429478407 test_loss: 0.11139427423477173
epoch: 143 training_loss 0.0922048464603722 test_loss: 0.14640910625457765
epoch: 144 training_loss 0.09528530610725283 test_loss: 0.12013074159622192
epoch: 145 training_loss 0.08906039260327817 test_loss: 0.12710491418838502
epoch: 146 training_loss 0.0890118089132011 test_loss: 0.12422683238983154
epoch: 147 training_loss 0.09217969089746475 test_loss: 0.10892815589904785
epoch: 148 training_loss 0.08865293394774199 test_loss: 0.12263333797454834
epoch: 149 training_loss 0.09330211024731398 test_loss: 0.12640198469161987
epoch: 0 training_loss 0.29527268767356873 test_loss: 0.20206131935119628
epoch: 1 training_loss 0.16605109237134458 test_loss: 0.1465782880783081
epoch: 2 training_loss 0.15050602104514837 test_loss: 0.13357385396957397
epoch: 3 training_loss 0.13440910033881664 test_loss: 0.13740390539169312
epoch: 4 training_loss 0.12192690324038268 test_loss: 0.13612171411514282
epoch: 5 training_loss 0.12729744609445334 test_loss: 0.13526477813720703
epoch: 6 training_loss 0.11657896880060434 test_loss: 0.16333023309707642
epoch: 7 training_loss 0.11832150790840387 test_loss: 0.12955405712127685
epoch: 8 training_loss 0.11418791443109512 test_loss: 0.1083678126335144
epoch: 9 training_loss 0.11850128211081028 test_loss: 0.139348566532135
epoch: 10 training_loss 0.11131616231054067 test_loss: 0.12355663776397705
epoch: 11 training_loss 0.1095462753996253 test_loss: 0.12744396924972534
epoch: 12 training_loss 0.10675017546862364 test_loss: 0.13150291442871093
epoch: 13 training_loss 0.1116225478053093 test_loss: 0.12583451271057128
epoch: 14 training_loss 0.11375635206699371 test_loss: 0.12427557706832885
epoch: 15 training_loss 0.11135001834481954 test_loss: 0.12073142528533935
epoch: 16 training_loss 0.10971743240952492 test_loss: 0.13253384828567505
epoch: 17 training_loss 0.11139940414577723 test_loss: 0.13376713991165162
epoch: 18 training_loss 0.10488520564511418 test_loss: 0.1251988649368286
epoch: 19 training_loss 0.0953113293275237 test_loss: 0.1105342149734497
epoch: 20 training_loss 0.10302061863243579 test_loss: 0.11234245300292969
epoch: 21 training_loss 0.11450204759836197 test_loss: 0.12891263961791993
epoch: 22 training_loss 0.10714544784277677 test_loss: 0.12325283288955688
epoch: 23 training_loss 0.10733702555298805 test_loss: 0.12407994270324707
epoch: 24 training_loss 0.1091790172085166 test_loss: 0.10741667747497559
epoch: 25 training_loss 0.10594314776360989 test_loss: 0.11948515176773071
epoch: 26 training_loss 0.10328191637992859 test_loss: 0.10664985179901124
epoch: 27 training_loss 0.1048963675275445 test_loss: 0.1125078797340393
epoch: 28 training_loss 0.10114973820745946 test_loss: 0.11027592420578003
epoch: 29 training_loss 0.10739896278828383 test_loss: 0.11416797637939453
epoch: 30 training_loss 0.09898794017732143 test_loss: 0.1189850926399231
epoch: 31 training_loss 0.10676929250359535 test_loss: 0.1283712863922119
epoch: 32 training_loss 0.10650721877813339 test_loss: 0.10923070907592773
epoch: 33 training_loss 0.1024902730062604 test_loss: 0.11321195363998413
epoch: 34 training_loss 0.10834192179143429 test_loss: 0.11317967176437378
epoch: 35 training_loss 0.09684754364192485 test_loss: 0.11808828115463257
epoch: 36 training_loss 0.10544546391814948 test_loss: 0.10768356323242187
epoch: 37 training_loss 0.10249578051269054 test_loss: 0.11863169670104981
epoch: 38 training_loss 0.09986855320632458 test_loss: 0.10499368906021118
epoch: 39 training_loss 0.10341430636122823 test_loss: 0.12153023481369019
epoch: 40 training_loss 0.10266827275976538 test_loss: 0.11662589311599732
epoch: 41 training_loss 0.08941611524671317 test_loss: 0.09705086350440979
epoch: 42 training_loss 0.10136576851829887 test_loss: 0.12065349817276001
epoch: 43 training_loss 0.10118768338114023 test_loss: 0.1389461040496826
epoch: 44 training_loss 0.09847935916855931 test_loss: 0.11569596529006958
epoch: 45 training_loss 0.09446182187646628 test_loss: 0.10658258199691772
epoch: 46 training_loss 0.10199697962030768 test_loss: 0.113956880569458
epoch: 47 training_loss 0.11067958250641823 test_loss: 0.13989243507385254
epoch: 48 training_loss 0.09816032801754773 test_loss: 0.13032088279724122
epoch: 49 training_loss 0.10489525455981492 test_loss: 0.12735708951950073
epoch: 50 training_loss 0.1025952448323369 test_loss: 0.1196091890335083
epoch: 51 training_loss 0.10040582414716483 test_loss: 0.10918498039245605
epoch: 52 training_loss 0.09696095064282417 test_loss: 0.12481707334518433
epoch: 53 training_loss 0.10476505566388368 test_loss: 0.10477107763290405
epoch: 54 training_loss 0.10395349327474833 test_loss: 0.11026411056518555
epoch: 55 training_loss 0.09866801980882883 test_loss: 0.13781445026397704
epoch: 56 training_loss 0.10091825567185879 test_loss: 0.12402472496032715
epoch: 57 training_loss 0.10588985810056328 test_loss: 0.1292742371559143
epoch: 58 training_loss 0.09934794150292874 test_loss: 0.10594788789749146
epoch: 59 training_loss 0.09999023549258709 test_loss: 0.10104750394821167
epoch: 60 training_loss 0.09650380635634065 test_loss: 0.11864882707595825
epoch: 61 training_loss 0.10357918666675686 test_loss: 0.11774790287017822
epoch: 62 training_loss 0.09893726764246821 test_loss: 0.1199525237083435
epoch: 63 training_loss 0.09644921522587538 test_loss: 0.11225366592407227
epoch: 64 training_loss 0.09641513353213668 test_loss: 0.1217086911201477
epoch: 65 training_loss 0.09787157762795687 test_loss: 0.12101805210113525
epoch: 66 training_loss 0.09302055468782783 test_loss: 0.11345458030700684
epoch: 67 training_loss 0.10243806041777134 test_loss: 0.12147964239120483
epoch: 68 training_loss 0.09776934741064906 test_loss: 0.10947293043136597
epoch: 69 training_loss 0.09027246553450823 test_loss: 0.13049217462539672
epoch: 70 training_loss 0.10130768476054072 test_loss: 0.10923874378204346
epoch: 71 training_loss 0.09969971064478159 test_loss: 0.13639249801635742
epoch: 72 training_loss 0.09766393661499023 test_loss: 0.10646358728408814
epoch: 73 training_loss 0.094285549800843 test_loss: 0.12393355369567871
epoch: 74 training_loss 0.09421171830967069 test_loss: 0.11728979349136352
epoch: 75 training_loss 0.10055684737861156 test_loss: 0.10942748785018921
epoch: 76 training_loss 0.09428299417719245 test_loss: 0.10833323001861572
epoch: 77 training_loss 0.09390017352998256 test_loss: 0.10488394498825074
epoch: 78 training_loss 0.09700413269922137 test_loss: 0.11776045560836793
epoch: 79 training_loss 0.09254512092098593 test_loss: 0.11982128620147706
epoch: 80 training_loss 0.09957053670659662 test_loss: 0.10703431367874146
epoch: 81 training_loss 0.09167286995798349 test_loss: 0.11742217540740967
epoch: 82 training_loss 0.09226983908563852 test_loss: 0.0996279239654541
epoch: 83 training_loss 0.0948872934281826 test_loss: 0.1110830545425415
epoch: 84 training_loss 0.09730157807469368 test_loss: 0.10672669410705567
epoch: 85 training_loss 0.09612136628478765 test_loss: 0.12010694742202759
epoch: 86 training_loss 0.09199496608227492 test_loss: 0.1349156379699707
epoch: 87 training_loss 0.0913703328743577 test_loss: 0.11504735946655273
epoch: 88 training_loss 0.09833307068794966 test_loss: 0.11122527122497558
epoch: 89 training_loss 0.09904783992096781 test_loss: 0.12678959369659423
epoch: 90 training_loss 0.09922753173857927 test_loss: 0.12441712617874146
epoch: 91 training_loss 0.10324053060263395 test_loss: 0.10974129438400268
epoch: 92 training_loss 0.09095063883811236 test_loss: 0.12903032302856446
epoch: 93 training_loss 0.09535250630229712 test_loss: 0.1237830638885498
epoch: 94 training_loss 0.0947570538520813 test_loss: 0.08681568503379822
epoch: 95 training_loss 0.09445328881964088 test_loss: 0.1105299711227417
epoch: 96 training_loss 0.09318074399605393 test_loss: 0.13308119773864746
epoch: 97 training_loss 0.09751958211883903 test_loss: 0.11191239356994628
epoch: 98 training_loss 0.09139100998640061 test_loss: 0.12278161048889161
epoch: 99 training_loss 0.0970549276471138 test_loss: 0.11974221467971802
epoch: 100 training_loss 0.09273078149184584 test_loss: 0.10988489389419556
epoch: 101 training_loss 0.09277335723862051 test_loss: 0.10794432163238525
epoch: 102 training_loss 0.09215874852612614 test_loss: 0.1098023772239685
epoch: 103 training_loss 0.0951756203174591 test_loss: 0.11433391571044922
epoch: 104 training_loss 0.09556812427937984 test_loss: 0.12419378757476807
epoch: 105 training_loss 0.0961503247357905 test_loss: 0.12429615259170532
epoch: 106 training_loss 0.09405086155980825 test_loss: 0.1282542586326599
epoch: 107 training_loss 0.09566680859774351 test_loss: 0.11205902099609374
epoch: 108 training_loss 0.09299163116142153 test_loss: 0.12129946947097778
epoch: 109 training_loss 0.09287017177790403 test_loss: 0.1293712854385376
epoch: 110 training_loss 0.09072651887312531 test_loss: 0.11160128116607666
epoch: 111 training_loss 0.09437829084694385 test_loss: 0.10226716995239257
epoch: 112 training_loss 0.08981339925900102 test_loss: 0.10761131048202514
epoch: 113 training_loss 0.0993812327645719 test_loss: 0.11546674966812134
epoch: 114 training_loss 0.09639872372150421 test_loss: 0.12397987842559814
epoch: 115 training_loss 0.09029528418555856 test_loss: 0.10880177021026612
epoch: 116 training_loss 0.08997866284102202 test_loss: 0.11116584539413452
epoch: 117 training_loss 0.09027788648381829 test_loss: 0.11250417232513428
epoch: 118 training_loss 0.09271744141355157 test_loss: 0.1061370849609375
epoch: 119 training_loss 0.0906933176331222 test_loss: 0.11288481950759888
epoch: 120 training_loss 0.09509268578141927 test_loss: 0.12171560525894165
epoch: 121 training_loss 0.0975399524718523 test_loss: 0.11317787170410157
epoch: 122 training_loss 0.09568580713123083 test_loss: 0.11754364967346191
epoch: 123 training_loss 0.09132542338222266 test_loss: 0.11483346223831177
epoch: 124 training_loss 0.10080825258046389 test_loss: 0.11471130847930908
epoch: 125 training_loss 0.08570655953139067 test_loss: 0.12028831243515015
epoch: 126 training_loss 0.09473594607785345 test_loss: 0.1298448324203491
epoch: 127 training_loss 0.09146196067333222 test_loss: 0.1109563946723938
epoch: 128 training_loss 0.09236742712557316 test_loss: 0.125246798992157
epoch: 129 training_loss 0.09253439176827669 test_loss: 0.12072060108184815
epoch: 130 training_loss 0.08624983230605722 test_loss: 0.11427137851715088
epoch: 131 training_loss 0.08708658928051591 test_loss: 0.13391947746276855
epoch: 132 training_loss 0.08570590795949101 test_loss: 0.12810471057891845
epoch: 133 training_loss 0.09328305203467607 test_loss: 0.11788018941879272
epoch: 134 training_loss 0.08657108942046761 test_loss: 0.09681278467178345
epoch: 135 training_loss 0.08318230386823416 test_loss: 0.12760086059570314
epoch: 136 training_loss 0.09372191399335861 test_loss: 0.12242013216018677
epoch: 137 training_loss 0.09096713826060294 test_loss: 0.10216184854507446
epoch: 138 training_loss 0.0852851807884872 test_loss: 0.11510123014450073
epoch: 139 training_loss 0.08647564183920622 test_loss: 0.10651144981384278
epoch: 140 training_loss 0.08125445136800408 test_loss: 0.13067874908447266
epoch: 141 training_loss 0.08643008123151957 test_loss: 0.12269642353057861
epoch: 142 training_loss 0.09207217160612345 test_loss: 0.1149407982826233
epoch: 143 training_loss 0.08394330279901624 test_loss: 0.12179123163223267
epoch: 144 training_loss 0.08618963301181794 test_loss: 0.12797923088073732
epoch: 145 training_loss 0.08352156848646701 test_loss: 0.11305991411209107
epoch: 146 training_loss 0.09009953588247299 test_loss: 0.12896205186843873
epoch: 147 training_loss 0.08665078457444907 test_loss: 0.11522123813629151
epoch: 148 training_loss 0.08643378837034106 test_loss: 0.11753041744232177
epoch: 149 training_loss 0.0888427153043449 test_loss: 0.12142934799194335
epoch: 0 training_loss 0.3257118009030819 test_loss: 0.22446129322052003
epoch: 1 training_loss 0.18464862152934075 test_loss: 0.15757744312286376
epoch: 2 training_loss 0.1525245513767004 test_loss: 0.1503680467605591
epoch: 3 training_loss 0.14720107447355985 test_loss: 0.1674836754798889
epoch: 4 training_loss 0.13518663831055164 test_loss: 0.14120616912841796
epoch: 5 training_loss 0.13321625553071498 test_loss: 0.14369112253189087
epoch: 6 training_loss 0.1267935438081622 test_loss: 0.11498271226882935
epoch: 7 training_loss 0.12442031115293503 test_loss: 0.11836243867874145
epoch: 8 training_loss 0.11941339764744044 test_loss: 0.1233360767364502
epoch: 9 training_loss 0.1175479301624 test_loss: 0.1120952010154724
epoch: 10 training_loss 0.1108241719380021 test_loss: 0.09896119236946106
epoch: 11 training_loss 0.11592358693480492 test_loss: 0.12469298839569092
epoch: 12 training_loss 0.11875629879534244 test_loss: 0.10389394760131836
epoch: 13 training_loss 0.12181496623903514 test_loss: 0.10311094522476197
epoch: 14 training_loss 0.11000476736575365 test_loss: 0.10916762351989746
epoch: 15 training_loss 0.11575104262679815 test_loss: 0.10758422613143921
epoch: 16 training_loss 0.11299944330006838 test_loss: 0.11109603643417358
epoch: 17 training_loss 0.10874960839748382 test_loss: 0.11840598583221436
epoch: 18 training_loss 0.11306834150105714 test_loss: 0.10260875225067138
epoch: 19 training_loss 0.10410560434684157 test_loss: 0.1108051896095276
epoch: 20 training_loss 0.11381490875035524 test_loss: 0.09996269941329956
epoch: 21 training_loss 0.10944805338978768 test_loss: 0.10095354318618774
epoch: 22 training_loss 0.10374282263219356 test_loss: 0.11205949783325195
epoch: 23 training_loss 0.10775254808366298 test_loss: 0.10359817743301392
epoch: 24 training_loss 0.1084039643034339 test_loss: 0.10999536514282227
epoch: 25 training_loss 0.10994691587984562 test_loss: 0.10527157783508301
epoch: 26 training_loss 0.10614150863140821 test_loss: 0.10125290155410767
epoch: 27 training_loss 0.11098976917564869 test_loss: 0.10157574415206909
epoch: 28 training_loss 0.09833443481475115 test_loss: 0.10911465883255005
epoch: 29 training_loss 0.10875663444399834 test_loss: 0.1006558895111084
epoch: 30 training_loss 0.10608662992715835 test_loss: 0.1137770652770996
epoch: 31 training_loss 0.11430375881493092 test_loss: 0.11410082578659057
epoch: 32 training_loss 0.10014321260154248 test_loss: 0.10780194997787476
epoch: 33 training_loss 0.09866264697164297 test_loss: 0.08995846509933472
epoch: 34 training_loss 0.09782292930409313 test_loss: 0.11333141326904297
epoch: 35 training_loss 0.10493018312379718 test_loss: 0.09405028820037842
epoch: 36 training_loss 0.09864488055929542 test_loss: 0.0991885483264923
epoch: 37 training_loss 0.09959102623164653 test_loss: 0.10565989017486573
epoch: 38 training_loss 0.10688308401033282 test_loss: 0.12142900228500367
epoch: 39 training_loss 0.1008635857142508 test_loss: 0.10148659944534302
epoch: 40 training_loss 0.1045618192665279 test_loss: 0.11828631162643433
epoch: 41 training_loss 0.10935231890529394 test_loss: 0.1211084246635437
epoch: 42 training_loss 0.10397326175123453 test_loss: 0.10912320613861085
epoch: 43 training_loss 0.09676875934004783 test_loss: 0.09123666882514954
epoch: 44 training_loss 0.0985274481587112 test_loss: 0.1027055025100708
epoch: 45 training_loss 0.10274036275222898 test_loss: 0.12202146053314208
epoch: 46 training_loss 0.10438176698982715 test_loss: 0.10336257219314575
epoch: 47 training_loss 0.1126765988022089 test_loss: 0.10265135765075684
epoch: 48 training_loss 0.10799443315714598 test_loss: 0.09473255276679993
epoch: 49 training_loss 0.09902106983587146 test_loss: 0.10925137996673584
epoch: 50 training_loss 0.09866013139486313 test_loss: 0.11765300035476685
epoch: 51 training_loss 0.1102379172667861 test_loss: 0.11180956363677978
epoch: 52 training_loss 0.10910496212542058 test_loss: 0.11679158210754395
epoch: 53 training_loss 0.09563156396150589 test_loss: 0.11174618005752564
epoch: 54 training_loss 0.10040831806138158 test_loss: 0.1226275086402893
epoch: 55 training_loss 0.10042090132832528 test_loss: 0.11771354675292969
epoch: 56 training_loss 0.10432827338576317 test_loss: 0.11419955492019654
epoch: 57 training_loss 0.10244117701426149 test_loss: 0.11769531965255738
epoch: 58 training_loss 0.10465541258454322 test_loss: 0.116007399559021
epoch: 59 training_loss 0.09048304175958037 test_loss: 0.13014285564422606
epoch: 60 training_loss 0.11023126589134336 test_loss: 0.11861512660980225
epoch: 61 training_loss 0.10138002116233111 test_loss: 0.10895264148712158
epoch: 62 training_loss 0.09966753644868731 test_loss: 0.09631145000457764
epoch: 63 training_loss 0.0984657296538353 test_loss: 0.1106534481048584
epoch: 64 training_loss 0.10207250406965614 test_loss: 0.11968386173248291
epoch: 65 training_loss 0.1074888602644205 test_loss: 0.09261932373046874
epoch: 66 training_loss 0.0969803760945797 test_loss: 0.10839314460754394
epoch: 67 training_loss 0.0969255743175745 test_loss: 0.11013579368591309
epoch: 68 training_loss 0.10507246438413859 test_loss: 0.11279230117797852
epoch: 69 training_loss 0.10312055070884525 test_loss: 0.12218257188796997
epoch: 70 training_loss 0.10255707941949367 test_loss: 0.09167581796646118
epoch: 71 training_loss 0.09938246667385102 test_loss: 0.1050447702407837
epoch: 72 training_loss 0.10749602876603603 test_loss: 0.12269970178604125
epoch: 73 training_loss 0.10407647058367729 test_loss: 0.11519442796707154
epoch: 74 training_loss 0.09486094892024993 test_loss: 0.08974704146385193
epoch: 75 training_loss 0.0953078336827457 test_loss: 0.09802879691123963
epoch: 76 training_loss 0.09523794339969754 test_loss: 0.09481037259101868
epoch: 77 training_loss 0.10016498565673829 test_loss: 0.0908435583114624
epoch: 78 training_loss 0.10665812596678734 test_loss: 0.09445010423660279
epoch: 79 training_loss 0.09021772608160973 test_loss: 0.1207197666168213
epoch: 80 training_loss 0.09878911484032869 test_loss: 0.10047569274902343
epoch: 81 training_loss 0.10634940583258867 test_loss: 0.11891425848007202
epoch: 82 training_loss 0.10489250309765338 test_loss: 0.11566424369812012
epoch: 83 training_loss 0.10391690138727426 test_loss: 0.10036700963973999
epoch: 84 training_loss 0.10213681584224105 test_loss: 0.120769464969635
epoch: 85 training_loss 0.10084193419665098 test_loss: 0.10841568708419799
epoch: 86 training_loss 0.09139993108808994 test_loss: 0.09939777255058288
epoch: 87 training_loss 0.09406263377517461 test_loss: 0.10521562099456787
epoch: 88 training_loss 0.0965091659873724 test_loss: 0.1363231658935547
epoch: 89 training_loss 0.09768258541822433 test_loss: 0.12058045864105224
epoch: 90 training_loss 0.09506894459947944 test_loss: 0.12280545234680176
epoch: 91 training_loss 0.09539071798324585 test_loss: 0.10395811796188355
epoch: 92 training_loss 0.09412353001534939 test_loss: 0.10597211122512817
epoch: 93 training_loss 0.09870263174176216 test_loss: 0.10798100233078003
epoch: 94 training_loss 0.09773286513984203 test_loss: 0.11229875087738037
epoch: 95 training_loss 0.09151080515235663 test_loss: 0.10479292869567872
epoch: 96 training_loss 0.09582814261317253 test_loss: 0.10777114629745484
epoch: 97 training_loss 0.09725927997380496 test_loss: 0.11689229011535644
epoch: 98 training_loss 0.10244976010173559 test_loss: 0.1120413899421692
epoch: 99 training_loss 0.0939785273373127 test_loss: 0.109999680519104
epoch: 100 training_loss 0.09926325291395187 test_loss: 0.10417088270187377
epoch: 101 training_loss 0.0939445616491139 test_loss: 0.10514878034591675
epoch: 102 training_loss 0.10302193343639374 test_loss: 0.10471017360687256
epoch: 103 training_loss 0.09689064214006066 test_loss: 0.09854177236557007
epoch: 104 training_loss 0.08671070985496045 test_loss: 0.13010337352752685
epoch: 105 training_loss 0.08903740139678121 test_loss: 0.11636979579925537
epoch: 106 training_loss 0.09512404665350914 test_loss: 0.10313150882720948
epoch: 107 training_loss 0.09127798859030008 test_loss: 0.11570062637329101
epoch: 108 training_loss 0.09444872319698333 test_loss: 0.10301185846328735
epoch: 109 training_loss 0.10162283599376679 test_loss: 0.11181707382202148
epoch: 110 training_loss 0.10570996608585119 test_loss: 0.09114107489585876
epoch: 111 training_loss 0.09038554145023227 test_loss: 0.11910080909729004
epoch: 112 training_loss 0.09522005503997207 test_loss: 0.11007541418075562
epoch: 113 training_loss 0.09201382506638765 test_loss: 0.09222511649131775
epoch: 114 training_loss 0.09430213006213307 test_loss: 0.11282440423965454
epoch: 115 training_loss 0.10041284304112195 test_loss: 0.1112859845161438
epoch: 116 training_loss 0.0912864002212882 test_loss: 0.1202347993850708
epoch: 117 training_loss 0.09454788083210587 test_loss: 0.11595183610916138
epoch: 118 training_loss 0.09791492387652397 test_loss: 0.10967005491256714
epoch: 119 training_loss 0.08931972216814757 test_loss: 0.11350426673889161
epoch: 120 training_loss 0.09309652702882885 test_loss: 0.10161176919937134
epoch: 121 training_loss 0.0979074402153492 test_loss: 0.10816382169723511
epoch: 122 training_loss 0.09272037290036678 test_loss: 0.1137235164642334
epoch: 123 training_loss 0.09147262060083448 test_loss: 0.09266780614852906
epoch: 124 training_loss 0.09548022596165538 test_loss: 0.1358904242515564
epoch: 125 training_loss 0.09183265004307031 test_loss: 0.11284528970718384
epoch: 126 training_loss 0.09031412284821272 test_loss: 0.1150735855102539
epoch: 127 training_loss 0.09664335325360299 test_loss: 0.1127431869506836
epoch: 128 training_loss 0.0915224683471024 test_loss: 0.09944799542427063
epoch: 129 training_loss 0.09621810248121619 test_loss: 0.10436725616455078
epoch: 130 training_loss 0.09038129324093461 test_loss: 0.12188036441802978
epoch: 131 training_loss 0.09108900636434555 test_loss: 0.10582926273345947
epoch: 132 training_loss 0.09623882396146655 test_loss: 0.10641398429870605
epoch: 133 training_loss 0.09041620379313826 test_loss: 0.10693378448486328
epoch: 134 training_loss 0.09448666680604219 test_loss: 0.08879385590553283
epoch: 135 training_loss 0.09137123586609959 test_loss: 0.10331940650939941
epoch: 136 training_loss 0.09668936743400991 test_loss: 0.09845191836357117
epoch: 137 training_loss 0.089952561147511 test_loss: 0.10404118299484252
epoch: 138 training_loss 0.08725531982257963 test_loss: 0.1095581293106079
epoch: 139 training_loss 0.09002038322389126 test_loss: 0.10615146160125732
epoch: 140 training_loss 0.09257407702505588 test_loss: 0.10431979894638062
epoch: 141 training_loss 0.09162661803886295 test_loss: 0.09684568643569946
epoch: 142 training_loss 0.0924502663873136 test_loss: 0.12457493543624878
epoch: 143 training_loss 0.09305930115282536 test_loss: 0.12496682405471801
epoch: 144 training_loss 0.08800101276487111 test_loss: 0.10178353786468505
epoch: 145 training_loss 0.09166928606107831 test_loss: 0.11848970651626586
epoch: 146 training_loss 0.09911522699519992 test_loss: 0.09846093058586121
epoch: 147 training_loss 0.09461023267358541 test_loss: 0.1115774393081665
epoch: 148 training_loss 0.08810017859563231 test_loss: 0.11971443891525269
epoch: 149 training_loss 0.0943331521935761 test_loss: 0.09195653200149537
episode: 0 training return: -999.9217259295162
episode: 1 training return: -999.8990648088949
episode: 2 training return: -999.9265804897418
episode: 3 training return: -999.9223018311833
epoch: 1 test_true_pfm: -0.2758762644279901 sim_pfm: -999.6888178962632
episode: 4 training return: -999.8988745113021
episode: 5 training return: -999.906644392705
episode: 6 training return: -999.9077039937722
episode: 7 training return: -999.9274141572428
epoch: 2 test_true_pfm: -0.42487973055795175 sim_pfm: -999.690084160899
episode: 8 training return: -999.9098475791978
episode: 9 training return: -999.914767099123
episode: 10 training return: -999.8994104610531
episode: 11 training return: -999.9236685546197
epoch: 3 test_true_pfm: -0.13303094592727402 sim_pfm: -999.6899180664605
episode: 12 training return: -999.9031986754652
episode: 13 training return: -999.9014415711333
episode: 14 training return: -999.9054598223742
episode: 15 training return: -999.9241055163059
epoch: 4 test_true_pfm: -0.6924007141407525 sim_pfm: -999.6927005318676
episode: 16 training return: -999.9148360524317
episode: 17 training return: -999.9207060793743
episode: 18 training return: -999.916867966457
episode: 19 training return: -999.9271716749055
epoch: 5 test_true_pfm: -0.531579335379429 sim_pfm: -999.6894247944471
episode: 20 training return: -999.8620549721787
episode: 21 training return: -999.924210985212
episode: 22 training return: -999.9219183700385
episode: 23 training return: -999.9200813223299
epoch: 6 test_true_pfm: 0.08659220842567977 sim_pfm: -999.6936089842615
episode: 24 training return: -999.9258509418445
episode: 25 training return: -999.9086503586509
episode: 26 training return: -999.8745776737688
episode: 27 training return: -999.9082046125844
epoch: 7 test_true_pfm: -0.08220423919820062 sim_pfm: -999.6899765200665
episode: 28 training return: -999.8739191441189
episode: 29 training return: -999.9157502986483
episode: 30 training return: -999.8958379271548
episode: 31 training return: -999.9314051357482
epoch: 8 test_true_pfm: -0.6847065456350084 sim_pfm: -999.6869254122006
episode: 32 training return: -999.923022285234
episode: 33 training return: -999.9053477443078
episode: 34 training return: -999.9168473381217
episode: 35 training return: -999.900234128065
epoch: 9 test_true_pfm: -0.8477597312063577 sim_pfm: -999.6973213008197
episode: 36 training return: -999.9024817372765
episode: 37 training return: -999.8907204180545
episode: 38 training return: -999.9169746351762
episode: 39 training return: -999.8889477276264
epoch: 10 test_true_pfm: -0.44019576800360233 sim_pfm: -999.6925578617323
episode: 40 training return: -999.9037521358263
episode: 41 training return: -999.859779921525
episode: 42 training return: -999.9072286583602
episode: 43 training return: -999.867500303501
epoch: 11 test_true_pfm: -0.7461775911349594 sim_pfm: -999.6964912021882
episode: 44 training return: -999.9045997066218
episode: 45 training return: -999.9025540112135
episode: 46 training return: -999.91321264175
episode: 47 training return: -999.9225649742667
epoch: 12 test_true_pfm: -0.5924103998000975 sim_pfm: -999.6860321715907
episode: 48 training return: -999.8994114424033
episode: 49 training return: -999.919929114975
episode: 50 training return: -999.8937061110374
episode: 51 training return: -999.9009225349662
epoch: 13 test_true_pfm: -0.12118198242430378 sim_pfm: -999.6932216585277
episode: 52 training return: -999.9212595803859
episode: 53 training return: -999.8791432898757
episode: 54 training return: -999.9245076941315
episode: 55 training return: -999.9040826923216
epoch: 14 test_true_pfm: -0.06671708174138676 sim_pfm: -999.690634425522
episode: 56 training return: -999.9222652974469
episode: 57 training return: -999.9408641207159
episode: 58 training return: -999.9238713991929
episode: 59 training return: -999.8976988752195
epoch: 15 test_true_pfm: -0.02242142367279597 sim_pfm: -999.6869026549757
episode: 60 training return: -999.7650788728453
episode: 61 training return: -999.9259465542134
episode: 62 training return: -999.922087672018
episode: 63 training return: -999.9064218037031
epoch: 16 test_true_pfm: -0.37310705555253887 sim_pfm: -999.6908737288785
episode: 64 training return: -999.916269307365
episode: 65 training return: -999.924296594496
episode: 66 training return: -999.9248186329237
episode: 67 training return: -999.9272336009693
epoch: 17 test_true_pfm: -0.05230292903615047 sim_pfm: -999.6927634541989
episode: 68 training return: -999.9215656995787
episode: 69 training return: -999.9208005223544
episode: 70 training return: -999.9174234228059
episode: 71 training return: -999.9087888161522
epoch: 18 test_true_pfm: -0.5097787456188517 sim_pfm: -999.6970999933748
episode: 72 training return: -999.8915122734332
episode: 73 training return: -999.9152868813536
episode: 74 training return: -999.886162075733
episode: 75 training return: -999.9273972193274
epoch: 19 test_true_pfm: -0.2633396321608416 sim_pfm: -999.6882114444702
episode: 76 training return: -999.8988707532071
episode: 77 training return: -999.9056536020585
episode: 78 training return: -999.9072782135554
episode: 79 training return: -999.910175370232
epoch: 20 test_true_pfm: -0.5527256375761214 sim_pfm: -999.6922856828223
episode: 80 training return: -999.8662592221748
episode: 81 training return: -999.8577326984235
episode: 82 training return: -999.9362894398781
episode: 83 training return: -999.9013475227874
epoch: 21 test_true_pfm: -0.3026069359040418 sim_pfm: -999.6900345940329
episode: 84 training return: -999.92127366954
episode: 85 training return: -999.922589023843
episode: 86 training return: -999.9088604185398
episode: 87 training return: -999.9058533131229
epoch: 22 test_true_pfm: -0.8728956332746733 sim_pfm: -999.6861362266145
episode: 88 training return: -999.9099266395393
episode: 89 training return: -999.9376510887561
episode: 90 training return: -999.9274958772224
episode: 91 training return: -999.9194747560491
epoch: 23 test_true_pfm: 0.32444874460131645 sim_pfm: -999.6896977633129
episode: 92 training return: -999.9168450204523
episode: 93 training return: -999.8952328603451
episode: 94 training return: -999.9261426474895
episode: 95 training return: -999.9280482189291
epoch: 24 test_true_pfm: 0.16583764482478505 sim_pfm: -999.6938221814218
episode: 96 training return: -999.8913830434598
episode: 97 training return: -999.9079223961085
episode: 98 training return: -999.9181940353775
episode: 99 training return: -999.8981633832715
epoch: 25 test_true_pfm: -0.8203369247894639 sim_pfm: -999.6894313740936
episode: 100 training return: -999.9287335841076
episode: 101 training return: -999.9072305588311
episode: 102 training return: -999.9194673452715
episode: 103 training return: -999.8782370267761
epoch: 26 test_true_pfm: -0.6802090604218846 sim_pfm: -999.692406062717
episode: 104 training return: -999.9069150290263
episode: 105 training return: -999.8294150763471
episode: 106 training return: -999.923540782242
episode: 107 training return: -999.9124355391014
epoch: 27 test_true_pfm: -0.07075186742489813 sim_pfm: -999.6862248863763
episode: 108 training return: -999.9130215321354
episode: 109 training return: -999.8870036754095
episode: 110 training return: -999.9300282743077
episode: 111 training return: -999.8984422826995
epoch: 28 test_true_pfm: 0.07855550435071305 sim_pfm: -999.6930087445575
episode: 112 training return: -999.9204490850261
episode: 113 training return: -999.8950712162724
episode: 114 training return: -999.9144366929357
episode: 115 training return: -999.9215690530366
epoch: 29 test_true_pfm: -0.7571912394098684 sim_pfm: -999.6872606965021
episode: 116 training return: -999.9254568009965
episode: 117 training return: -999.9377929728204
episode: 118 training return: -999.9095841746271
episode: 119 training return: -999.883647891888
epoch: 30 test_true_pfm: 0.36037963864201145 sim_pfm: -999.699265514931
episode: 120 training return: -999.9290381045973
episode: 121 training return: -999.9180596106497
episode: 122 training return: -999.926258672051
episode: 123 training return: -999.9059270185679
epoch: 31 test_true_pfm: 0.07289944249271962 sim_pfm: -999.6869746282546
episode: 124 training return: -999.8943664739555
episode: 125 training return: -999.9229705049662
episode: 126 training return: -999.9120534407912
episode: 127 training return: -999.9212151720371
epoch: 32 test_true_pfm: -0.18653577543953107 sim_pfm: -999.6898686113615
episode: 128 training return: -999.8977956619486
episode: 129 training return: -999.9328139367957
episode: 130 training return: -999.9088123419098
episode: 131 training return: -999.9181886347992
epoch: 33 test_true_pfm: 0.15181035716789013 sim_pfm: -999.6872986525854
episode: 132 training return: -999.9197474451873
episode: 133 training return: -999.844122346273
episode: 134 training return: -999.9183174830316
episode: 135 training return: -999.895580813906
epoch: 34 test_true_pfm: 0.12328655956820356 sim_pfm: -999.6869153105393
episode: 136 training return: -999.933976680558
episode: 137 training return: -999.9047605104977
episode: 138 training return: -999.924691516888
episode: 139 training return: -999.9154348636946
epoch: 35 test_true_pfm: -0.5141026232411046 sim_pfm: -999.6919899455337
episode: 140 training return: -999.878010265857
episode: 141 training return: -999.9236406144795
episode: 142 training return: -999.9169359741397
episode: 143 training return: -999.888063801156
epoch: 36 test_true_pfm: -0.3354002392463158 sim_pfm: -999.6884588914776
episode: 144 training return: -999.9095377953126
episode: 145 training return: -999.9091628624885
episode: 146 training return: -999.915080303469
episode: 147 training return: -999.9264013552631
epoch: 37 test_true_pfm: -1.0296744155463993 sim_pfm: -999.6906614369292
episode: 148 training return: -999.912147651109
episode: 149 training return: -999.9064762809941
episode: 150 training return: -999.9051594329959
episode: 151 training return: -999.9272374626565
epoch: 38 test_true_pfm: -0.8062464228154855 sim_pfm: -999.6887854403407
episode: 152 training return: -999.9226495279017
episode: 153 training return: -999.9329895740015
episode: 154 training return: -999.8929798674403
episode: 155 training return: -999.9244611269901
epoch: 39 test_true_pfm: -0.5445655465009801 sim_pfm: -999.6938212448343
episode: 156 training return: -999.8979270596376
episode: 157 training return: -999.922678300527
episode: 158 training return: -999.9052207612846
episode: 159 training return: -999.8704271452344
epoch: 40 test_true_pfm: -0.007012503766917412 sim_pfm: -999.6933476854224
episode: 160 training return: -999.9305957973133
episode: 161 training return: -999.9043622148143
episode: 162 training return: -999.900175134724
episode: 163 training return: -999.8873942766719
epoch: 41 test_true_pfm: -0.2420578963221054 sim_pfm: -999.69305960849
episode: 164 training return: -999.9312779242078
episode: 165 training return: -999.9249195232525
episode: 166 training return: -999.9374724143744
episode: 167 training return: -999.9238218943563
epoch: 42 test_true_pfm: -0.22878563724612022 sim_pfm: -999.6934908461494
episode: 168 training return: -999.9041541322613
episode: 169 training return: -999.9113875620018
episode: 170 training return: -999.8997498279265
episode: 171 training return: -999.9123953050912
epoch: 43 test_true_pfm: 0.2270210087216149 sim_pfm: -999.692006507616
episode: 172 training return: -999.9202088843684
episode: 173 training return: -999.9124547291127
episode: 174 training return: -999.9370928001533
episode: 175 training return: -999.8824416555593
epoch: 44 test_true_pfm: -0.831831652453216 sim_pfm: -999.6942697324188
episode: 176 training return: -999.9074046671687
episode: 177 training return: -999.9079252761292
episode: 178 training return: -999.9307020488425
episode: 179 training return: -999.9192046924834
epoch: 45 test_true_pfm: -0.06433988557446914 sim_pfm: -999.6928282554468
episode: 180 training return: -999.931690378494
episode: 181 training return: -999.9331656211416
episode: 182 training return: -999.9259834791608
episode: 183 training return: -999.9035747846757
epoch: 46 test_true_pfm: -0.7052171821043309 sim_pfm: -999.6869664980891
episode: 184 training return: -999.9386009504276
episode: 185 training return: -999.9194581241001
episode: 186 training return: -999.9051917937385
episode: 187 training return: -999.9165482729591
epoch: 47 test_true_pfm: -1.082652977560315 sim_pfm: -999.6890775082729
episode: 188 training return: -999.9265752690469
episode: 189 training return: -999.9270219515645
episode: 190 training return: -999.9062618048481
episode: 191 training return: -999.900030814037
epoch: 48 test_true_pfm: -1.0966708555266693 sim_pfm: -999.6919876886126
episode: 192 training return: -999.904681032736
episode: 193 training return: -999.9151983094603
episode: 194 training return: -999.921836821679
episode: 195 training return: -999.9197333769938
epoch: 49 test_true_pfm: -0.3031823405384902 sim_pfm: -999.691914615241
episode: 196 training return: -999.9179311977385
episode: 197 training return: -999.9177663770886
episode: 198 training return: -999.8748934621781
episode: 199 training return: -999.9210677171951
epoch: 50 test_true_pfm: 0.17975499121883795 sim_pfm: -999.6888164088018
episode: 200 training return: -999.9258839991534
episode: 201 training return: -999.9109262800935
episode: 202 training return: -999.9311521900994
episode: 203 training return: -999.9279264237214
epoch: 51 test_true_pfm: -0.07485233431608036 sim_pfm: -999.694496634619
episode: 204 training return: -999.9144582623069
episode: 205 training return: -999.9121958553574
episode: 206 training return: -999.8584834953901
episode: 207 training return: -999.9280688930025
epoch: 52 test_true_pfm: -0.7181888532494938 sim_pfm: -999.6854357618041
episode: 208 training return: -999.925297606644
episode: 209 training return: -999.9027915779592
episode: 210 training return: -999.9032873041083
episode: 211 training return: -999.9141411943667
epoch: 53 test_true_pfm: -0.047751223282014414 sim_pfm: -999.6930389914382
episode: 212 training return: -999.9117385673294
episode: 213 training return: -999.9094317703141
episode: 214 training return: -999.9150372937208
episode: 215 training return: -999.9282659446598
epoch: 54 test_true_pfm: -1.3266074759601791 sim_pfm: -999.6879814541062
episode: 216 training return: -999.9130501704117
episode: 217 training return: -999.9163459397106
episode: 218 training return: -999.8890732136587
episode: 219 training return: -999.9249150095463
epoch: 55 test_true_pfm: 0.019327328791429288 sim_pfm: -999.6904119915299
episode: 220 training return: -999.9130514466103
episode: 221 training return: -999.9188393770038
episode: 222 training return: -999.9155121047585
episode: 223 training return: -999.9230856100289
epoch: 56 test_true_pfm: -0.5530499701687944 sim_pfm: -999.6870121699648
episode: 224 training return: -999.9292443219714
episode: 225 training return: -999.9253819098708
episode: 226 training return: -999.7999146965841
episode: 227 training return: -999.9125367389934
epoch: 57 test_true_pfm: 0.2761174573906227 sim_pfm: -999.6919061353765
episode: 228 training return: -999.9081641379108
episode: 229 training return: -999.9227020975596
episode: 230 training return: -999.9082876057859
episode: 231 training return: -999.8958272965679
epoch: 58 test_true_pfm: -0.27545911449539145 sim_pfm: -999.6869457233205
episode: 232 training return: -999.9024300287663
episode: 233 training return: -999.9272027930111
episode: 234 training return: -999.9245269661076
episode: 235 training return: -999.940549875754
epoch: 59 test_true_pfm: 0.025385408205195043 sim_pfm: -999.692475323851
episode: 236 training return: -999.9272737581523
episode: 237 training return: -999.8816661350285
episode: 238 training return: -999.8865283397706
episode: 239 training return: -999.921502894078
epoch: 60 test_true_pfm: -0.8607200096410299 sim_pfm: -999.6908113385139
episode: 240 training return: -999.8269608891269
episode: 241 training return: -999.9118470877154
episode: 242 training return: -999.8711489623953
episode: 243 training return: -999.9164070352522
epoch: 61 test_true_pfm: -0.7701282186773483 sim_pfm: -999.6905226873664
episode: 244 training return: -999.9185837544618
episode: 245 training return: -999.9172468380875
episode: 246 training return: -999.9266027728949
episode: 247 training return: -999.9033694017145
epoch: 62 test_true_pfm: -0.6566588641143459 sim_pfm: -999.6923132082128
episode: 248 training return: -999.9043972513481
episode: 249 training return: -999.9083079275138
episode: 250 training return: -999.9166557369712
episode: 251 training return: -999.8646752523066
epoch: 63 test_true_pfm: -0.20167107991393599 sim_pfm: -999.6879266145525
episode: 252 training return: -999.8970690670411
episode: 253 training return: -999.9103262696041
episode: 254 training return: -999.9080914885033
episode: 255 training return: -999.9192981820073
epoch: 64 test_true_pfm: 0.3463556827350495 sim_pfm: -999.6886407083671
episode: 256 training return: -999.9006334061121
episode: 257 training return: -999.8691934612852
episode: 258 training return: -999.9032940009612
episode: 259 training return: -999.9196454831202
epoch: 65 test_true_pfm: -0.4708030130956668 sim_pfm: -999.6904652512881
episode: 260 training return: -999.8830783990312
episode: 261 training return: -999.8928687829471
episode: 262 training return: -999.9180563683235
episode: 263 training return: -999.913204304644
epoch: 66 test_true_pfm: -0.2408482065675794 sim_pfm: -999.6977997659178
episode: 264 training return: -999.9323429740922
episode: 265 training return: -999.9139300221213
episode: 266 training return: -999.8749611267945
episode: 267 training return: -999.9249728003479
epoch: 67 test_true_pfm: -0.3273255877759056 sim_pfm: -999.691155882444
episode: 268 training return: -999.9041997820511
episode: 269 training return: -999.9025249013251
episode: 270 training return: -999.9018894153467
episode: 271 training return: -999.9044339056233
epoch: 68 test_true_pfm: -0.7038044740160255 sim_pfm: -999.6882950190942
episode: 272 training return: -999.900169104445
episode: 273 training return: -999.927007602181
episode: 274 training return: -999.9310138435042
episode: 275 training return: -999.9054765826845
epoch: 69 test_true_pfm: -0.7410660205881813 sim_pfm: -999.6966339754059
episode: 276 training return: -999.9100850221807
episode: 277 training return: -999.9027237499346
episode: 278 training return: -999.8897964529826
episode: 279 training return: -999.9254850937127
epoch: 70 test_true_pfm: -1.230609165532485 sim_pfm: -999.6862881949747
episode: 280 training return: -999.9107135772176
episode: 281 training return: -999.9209023416627
episode: 282 training return: -999.9350767270107
episode: 283 training return: -999.905398905131
epoch: 71 test_true_pfm: -0.5124502537305414 sim_pfm: -999.6948265846873
episode: 284 training return: -999.9170103861504
episode: 285 training return: -999.9130334661214
episode: 286 training return: -999.8232338146097
episode: 287 training return: -999.9079925124299
epoch: 72 test_true_pfm: -0.9051722717765628 sim_pfm: -999.692843912478
episode: 288 training return: -999.9086312884629
episode: 289 training return: -999.8802982917399
episode: 290 training return: -999.8704364311922
episode: 291 training return: -999.9050788888134
epoch: 73 test_true_pfm: -0.19535915882101915 sim_pfm: -999.6967960525411
episode: 292 training return: -999.9126051204278
episode: 293 training return: -999.9251169694963
episode: 294 training return: -999.9080920572185
episode: 295 training return: -999.9242915462361
epoch: 74 test_true_pfm: -0.9770951835769317 sim_pfm: -999.690107451141
episode: 296 training return: -999.8921596580892
episode: 297 training return: -999.9073967205424
episode: 298 training return: -999.9352173230537
episode: 299 training return: -999.9253752223575
epoch: 75 test_true_pfm: 0.21582326846241884 sim_pfm: -999.6902872316227
episode: 300 training return: -999.8804526438147
episode: 301 training return: -999.8791741437625
episode: 302 training return: -999.9168448005128
episode: 303 training return: -999.8774644129284
epoch: 76 test_true_pfm: -0.5543155786712383 sim_pfm: -999.6907326146538
episode: 304 training return: -999.9169522130569
episode: 305 training return: -999.920541121309
episode: 306 training return: -999.9221421788758
episode: 307 training return: -999.9116207631688
epoch: 77 test_true_pfm: -0.1965677926559065 sim_pfm: -999.6909837731292
episode: 308 training return: -999.923305525519
episode: 309 training return: -999.9061190252429
episode: 310 training return: -999.9107211049502
episode: 311 training return: -999.8979031147977
epoch: 78 test_true_pfm: -0.733286556345627 sim_pfm: -999.692006359726
episode: 312 training return: -999.9226211317057
episode: 313 training return: -999.8661966421482
episode: 314 training return: -999.9253338628528
episode: 315 training return: -999.8966091785236
epoch: 79 test_true_pfm: -0.2655822574279833 sim_pfm: -999.6900846160305
episode: 316 training return: -999.9272038505611
episode: 317 training return: -999.9193696929345
episode: 318 training return: -999.9133970518983
episode: 319 training return: -999.935466442019
epoch: 80 test_true_pfm: -0.5788353051012365 sim_pfm: -999.6962438325851
episode: 320 training return: -999.9052174468608
episode: 321 training return: -999.9151431833244
episode: 322 training return: -999.9366862753333
episode: 323 training return: -999.8656813453831
epoch: 81 test_true_pfm: -0.16494041647215837 sim_pfm: -999.6930564743961
episode: 324 training return: -999.9087555007651
episode: 325 training return: -999.9304386191117
episode: 326 training return: -999.8561835171894
episode: 327 training return: -999.9318492718895
epoch: 82 test_true_pfm: -0.07437292599203071 sim_pfm: -999.6875023695289
episode: 328 training return: -999.9010720237517
episode: 329 training return: -999.9022686733193
episode: 330 training return: -999.9225924337106
episode: 331 training return: -999.898533013742
epoch: 83 test_true_pfm: -0.06691843575565502 sim_pfm: -999.6931921610347
episode: 332 training return: -999.9278629041486
episode: 333 training return: -999.9193281898043
episode: 334 training return: -999.9247237891854
episode: 335 training return: -999.9229122505553
epoch: 84 test_true_pfm: -0.9022981962550204 sim_pfm: -999.6926896707419
episode: 336 training return: -999.9214973452274
episode: 337 training return: -999.8855343233495
episode: 338 training return: -999.9108551596818
episode: 339 training return: -999.9195071418819
epoch: 85 test_true_pfm: -0.3454358689252946 sim_pfm: -999.6907402443018
episode: 340 training return: -999.9164376875182
episode: 341 training return: -999.9289006286912
episode: 342 training return: -999.922850855531
episode: 343 training return: -999.9214873674331
epoch: 86 test_true_pfm: -0.11217654541367537 sim_pfm: -999.6894175998004
episode: 344 training return: -999.9026455661882
episode: 345 training return: -999.8356307941117
episode: 346 training return: -999.865488190534
episode: 347 training return: -999.9099404638545
epoch: 87 test_true_pfm: -0.7416305499687911 sim_pfm: -999.6863513444117
episode: 348 training return: -999.9292900907276
episode: 349 training return: -999.9172775979656
episode: 350 training return: -999.9245408098335
episode: 351 training return: -999.918325931663
epoch: 88 test_true_pfm: -0.508227457969051 sim_pfm: -999.6877207069347
episode: 352 training return: -999.8983509278461
episode: 353 training return: -999.9105801257704
episode: 354 training return: -999.8859778518281
episode: 355 training return: -999.9196784231102
epoch: 89 test_true_pfm: -0.32751453130842845 sim_pfm: -999.691535912647
episode: 356 training return: -999.9073478736115
episode: 357 training return: -999.9198399919596
episode: 358 training return: -999.9249030533097
episode: 359 training return: -999.9237746396723
epoch: 90 test_true_pfm: -0.6876919866709225 sim_pfm: -999.6988916959821
episode: 360 training return: -999.9183399430656
episode: 361 training return: -999.9228379825822
episode: 362 training return: -999.9115919012319
episode: 363 training return: -999.9306213922378
epoch: 91 test_true_pfm: 0.01348228707754299 sim_pfm: -999.6889101019489
episode: 364 training return: -999.9252188419619
episode: 365 training return: -999.9350088689997
episode: 366 training return: -999.921311596035
episode: 367 training return: -999.9199962621452
epoch: 92 test_true_pfm: -0.058666569842147076 sim_pfm: -999.6891915244765
episode: 368 training return: -999.877762999254
episode: 369 training return: -999.9049811784116
episode: 370 training return: -999.9316918424672
episode: 371 training return: -999.9214068224591
epoch: 93 test_true_pfm: 0.30493396493614255 sim_pfm: -999.685180982292
episode: 372 training return: -999.8658330890103
episode: 373 training return: -999.8713127786514
episode: 374 training return: -999.9269637368456
episode: 375 training return: -999.9061458393111
epoch: 94 test_true_pfm: 0.695711763857792 sim_pfm: -999.6913198128982
episode: 376 training return: -999.8628459022497
episode: 377 training return: -999.8402245358785
episode: 378 training return: -999.9029526712876
episode: 379 training return: -999.9176477006425
epoch: 95 test_true_pfm: -0.1145712294532088 sim_pfm: -999.6919132418576
episode: 380 training return: -999.9092739883856
episode: 381 training return: -999.9259268120953
episode: 382 training return: -999.9090295769679
episode: 383 training return: -999.921852165583
epoch: 96 test_true_pfm: -0.6182708428532903 sim_pfm: -999.6905521037887
episode: 384 training return: -999.9267887505227
episode: 385 training return: -999.9230903385384
episode: 386 training return: -999.9198062398783
episode: 387 training return: -999.9119807179017
epoch: 97 test_true_pfm: 0.252407745819248 sim_pfm: -999.6932891138091
episode: 388 training return: -999.9302902290964
episode: 389 training return: -999.9236186149728
episode: 390 training return: -999.9063368984127
episode: 391 training return: -999.926893929193
epoch: 98 test_true_pfm: -0.17360063736457298 sim_pfm: -999.6930347870137
episode: 392 training return: -999.9133224848235
episode: 393 training return: -999.934896824385
episode: 394 training return: -999.9243028027694
episode: 395 training return: -999.9193852627084
epoch: 99 test_true_pfm: -0.030421015749023384 sim_pfm: -999.6930634287447
episode: 396 training return: -999.916359333387
episode: 397 training return: -999.918654468418
episode: 398 training return: -999.9078048795415
episode: 399 training return: -999.9114215967397
epoch: 100 test_true_pfm: 0.11343509128959199 sim_pfm: -999.6973062682746
episode: 400 training return: -999.9107153119899
episode: 401 training return: -999.9196633603557
episode: 402 training return: -999.9233528423119
episode: 403 training return: -999.9070673280429
epoch: 101 test_true_pfm: -0.38414164172505966 sim_pfm: -999.6944578604149
episode: 404 training return: -999.9402235645493
episode: 405 training return: -999.9193648403888
episode: 406 training return: -999.9176775251385
episode: 407 training return: -999.9283762098263
epoch: 102 test_true_pfm: -0.2111023965868599 sim_pfm: -999.6917021589853
episode: 408 training return: -999.9134514691979
episode: 409 training return: -999.8797778949943
episode: 410 training return: -999.9215771359377
episode: 411 training return: -999.8877219508961
epoch: 103 test_true_pfm: -0.6796625894063212 sim_pfm: -999.6905904837548
episode: 412 training return: -999.9227644175096
episode: 413 training return: -999.9326128645362
episode: 414 training return: -999.9314189653101
episode: 415 training return: -999.9243993529801
epoch: 104 test_true_pfm: -0.17655363383956138 sim_pfm: -999.6910477613889
episode: 416 training return: -999.9180302039666
episode: 417 training return: -999.9292938228606
episode: 418 training return: -999.9197546334266
episode: 419 training return: -999.9117449219677
epoch: 105 test_true_pfm: -0.05585942776457745 sim_pfm: -999.6918721572187
episode: 420 training return: -999.9090321951478
episode: 421 training return: -999.7308540475509
episode: 422 training return: -999.9176508510079
episode: 423 training return: -999.8828025862001
epoch: 106 test_true_pfm: -0.7310915792806876 sim_pfm: -999.6949344492097
episode: 424 training return: -999.9186533523938
episode: 425 training return: -999.9352269258945
episode: 426 training return: -999.9300066178289
episode: 427 training return: -999.9267068699257
epoch: 107 test_true_pfm: -0.1938124337388342 sim_pfm: -999.6945942066844
episode: 428 training return: -999.9267687141097
episode: 429 training return: -999.9106020562937
episode: 430 training return: -999.9109267618318
episode: 431 training return: -999.908240628019
epoch: 108 test_true_pfm: 0.3682791707202318 sim_pfm: -999.6890377712089
episode: 432 training return: -999.9281143636697
episode: 433 training return: -999.9107158354094
episode: 434 training return: -999.8558840592519
episode: 435 training return: -999.9067441187052
epoch: 109 test_true_pfm: 0.322979536293921 sim_pfm: -999.6875252041795
episode: 436 training return: -999.8895753639847
episode: 437 training return: -999.8837135379665
episode: 438 training return: -999.9041138287956
episode: 439 training return: -999.8729908804386
epoch: 110 test_true_pfm: -0.21996528016784392 sim_pfm: -999.6861645708478
episode: 440 training return: -999.9360062216002
episode: 441 training return: -999.9234452116073
episode: 442 training return: -999.9077080813652
episode: 443 training return: -999.9012667478783
epoch: 111 test_true_pfm: -0.5858113824164628 sim_pfm: -999.6953488516268
episode: 444 training return: -999.9390586435234
episode: 445 training return: -999.917937919407
episode: 446 training return: -999.901878703189
episode: 447 training return: -999.9166553535113
epoch: 112 test_true_pfm: 0.14085242092230016 sim_pfm: -999.6931943638764
episode: 448 training return: -999.924464005752
episode: 449 training return: -999.8876914996974
episode: 450 training return: -999.9310002442708
episode: 451 training return: -999.9280131192265
epoch: 113 test_true_pfm: -0.38760829696885796 sim_pfm: -999.6906533236105
episode: 452 training return: -999.8828327835081
episode: 453 training return: -999.9208149726426
episode: 454 training return: -999.9086930136534
episode: 455 training return: -999.9196687777436
epoch: 114 test_true_pfm: -0.6897075950334166 sim_pfm: -999.6904126719797
episode: 456 training return: -999.9030027396691
episode: 457 training return: -999.9121822119141
episode: 458 training return: -999.9093016703011
episode: 459 training return: -999.8684446776682
epoch: 115 test_true_pfm: 0.1790159135972942 sim_pfm: -999.6850706784429
episode: 460 training return: -999.9242371688008
episode: 461 training return: -999.9269388762085
episode: 462 training return: -999.916835397483
episode: 463 training return: -999.8701343692685
epoch: 116 test_true_pfm: -0.6037890484336647 sim_pfm: -999.690099560618
episode: 464 training return: -999.9211250202638
episode: 465 training return: -999.9022808335012
episode: 466 training return: -999.8839949892343
episode: 467 training return: -999.9173643357778
epoch: 117 test_true_pfm: -1.008217603986601 sim_pfm: -999.6884586324387
episode: 468 training return: -999.9163771909556
episode: 469 training return: -999.9034804703563
episode: 470 training return: -999.9160253404372
episode: 471 training return: -999.9254473829532
epoch: 118 test_true_pfm: -0.3252878204890602 sim_pfm: -999.694602934349
episode: 472 training return: -999.9161222769641
episode: 473 training return: -999.921528821788
episode: 474 training return: -999.8671849045081
episode: 475 training return: -999.9290126465504
epoch: 119 test_true_pfm: -0.47972584275011415 sim_pfm: -999.6910482294857
episode: 476 training return: -999.8961536022947
episode: 477 training return: -999.9124844812465
episode: 478 training return: -999.9210029318493
episode: 479 training return: -999.914732512927
epoch: 120 test_true_pfm: -0.6234918444558164 sim_pfm: -999.6931181596597
episode: 480 training return: -999.923320052012
episode: 481 training return: -999.9224490146213
episode: 482 training return: -999.9131679367451
episode: 483 training return: -999.9131221851121
epoch: 121 test_true_pfm: 0.14227758707787333 sim_pfm: -999.6882186403112
episode: 484 training return: -999.8850592444448
episode: 485 training return: -999.9186174664889
episode: 486 training return: -999.8965698269842
episode: 487 training return: -999.9175200590032
epoch: 122 test_true_pfm: -0.09302812024364947 sim_pfm: -999.6909268239257
episode: 488 training return: -999.9138287304341
episode: 489 training return: -999.8404532285552
episode: 490 training return: -999.8870591269695
episode: 491 training return: -999.9019078117103
epoch: 123 test_true_pfm: -0.17906706092102057 sim_pfm: -999.6961856555195
episode: 492 training return: -999.9169269732458
episode: 493 training return: -999.93258408531
episode: 494 training return: -999.9253038178311
episode: 495 training return: -999.9093185737378
epoch: 124 test_true_pfm: -0.49853231290076927 sim_pfm: -999.6859798526953
episode: 496 training return: -999.9217651274547
episode: 497 training return: -999.9288209831234
episode: 498 training return: -999.9378439555416
episode: 499 training return: -999.8628643559683
epoch: 125 test_true_pfm: -0.031605626123928 sim_pfm: -999.6899436517102
episode: 500 training return: -999.8900640300848
episode: 501 training return: -999.901983059623
episode: 502 training return: -999.8936377038489
episode: 503 training return: -999.9279820348268
epoch: 126 test_true_pfm: -0.06965781840850788 sim_pfm: -999.6923336990006
episode: 504 training return: -999.8757321536186
episode: 505 training return: -999.9198502078863
episode: 506 training return: -999.9309713983896
episode: 507 training return: -999.9173245516871
epoch: 127 test_true_pfm: -0.4828615278297342 sim_pfm: -999.6864390653777
episode: 508 training return: -999.9224497549848
episode: 509 training return: -999.8405241033386
episode: 510 training return: -999.9318299162717
episode: 511 training return: -999.9256278579255
epoch: 128 test_true_pfm: -0.019221462607815414 sim_pfm: -999.6867054327357
episode: 512 training return: -999.9160171114423
episode: 513 training return: -999.9105172741548
episode: 514 training return: -999.8301076869168
episode: 515 training return: -999.9251736161842
epoch: 129 test_true_pfm: 0.2964117267313363 sim_pfm: -999.6883061775546
episode: 516 training return: -999.8815054812767
episode: 517 training return: -999.9041546434464
episode: 518 training return: -999.913187450457
episode: 519 training return: -999.91351133121
epoch: 130 test_true_pfm: -1.114773995974646 sim_pfm: -999.6892552803637
episode: 520 training return: -999.9456923055712
episode: 521 training return: -999.914728827376
episode: 522 training return: -999.932516101583
episode: 523 training return: -999.9261377822531
epoch: 131 test_true_pfm: -0.10691258420206269 sim_pfm: -999.686937159185
episode: 524 training return: -999.9299516648169
episode: 525 training return: -999.9156579935274
episode: 526 training return: -999.9137394392405
episode: 527 training return: -999.8756309768248
epoch: 132 test_true_pfm: -0.5394170488653246 sim_pfm: -999.6905353162883
episode: 528 training return: -999.9124473275766
episode: 529 training return: -999.9138982625798
episode: 530 training return: -999.8979219022548
episode: 531 training return: -999.9049825705272
epoch: 133 test_true_pfm: -0.3756400861529731 sim_pfm: -999.6934715525964
episode: 532 training return: -999.9300938064395
episode: 533 training return: -999.9059418935338
episode: 534 training return: -999.9042560786803
episode: 535 training return: -999.9068818591
epoch: 134 test_true_pfm: -0.3866010232649166 sim_pfm: -999.693895753035
episode: 536 training return: -999.9068517732817
episode: 537 training return: -999.9040733360104
episode: 538 training return: -999.920368686163
episode: 539 training return: -999.9197254367717
epoch: 135 test_true_pfm: -0.14372109956048623 sim_pfm: -999.689580965805
episode: 540 training return: -999.9216790777829
episode: 541 training return: -999.8814771004927
episode: 542 training return: -999.9121498669656
episode: 543 training return: -999.9140793973469
epoch: 136 test_true_pfm: -0.4471361711840076 sim_pfm: -999.6963791241027
episode: 544 training return: -999.9311372000827
episode: 545 training return: -999.9145652588636
episode: 546 training return: -999.9035106806729
episode: 547 training return: -999.922239454287
epoch: 137 test_true_pfm: -0.5683442847118818 sim_pfm: -999.6912720808724
episode: 548 training return: -999.9225064624467
episode: 549 training return: -999.8905564526802
episode: 550 training return: -999.9189127866596
episode: 551 training return: -999.8826290053366
epoch: 138 test_true_pfm: -0.6965084026597377 sim_pfm: -999.6934554507698
episode: 552 training return: -999.9233744966299
episode: 553 training return: -999.9036575293055
episode: 554 training return: -999.9175053239792
episode: 555 training return: -999.8976748324109
epoch: 139 test_true_pfm: -0.577572981095209 sim_pfm: -999.6938751135867
episode: 556 training return: -999.9166194263
episode: 557 training return: -999.9264228165641
episode: 558 training return: -999.9144196144308
episode: 559 training return: -999.9523995716561
epoch: 140 test_true_pfm: -0.31424594284288015 sim_pfm: -999.6911187408583
episode: 560 training return: -999.9173577898224
episode: 561 training return: -999.9337678618332
episode: 562 training return: -999.8973002883058
episode: 563 training return: -999.9174451268877
epoch: 141 test_true_pfm: -0.25295119607763555 sim_pfm: -999.6916579401969
episode: 564 training return: -999.926525698254
episode: 565 training return: -999.9195131872033
episode: 566 training return: -999.9383446054005
episode: 567 training return: -999.9115187122852
epoch: 142 test_true_pfm: -0.9497638467659582 sim_pfm: -999.6937023337797
episode: 568 training return: -999.9081040084559
episode: 569 training return: -999.8869154579711
episode: 570 training return: -999.9235266740567
episode: 571 training return: -999.8784250448215
epoch: 143 test_true_pfm: 0.43129527347396174 sim_pfm: -999.6957166285652
episode: 572 training return: -999.906147983166
episode: 573 training return: -999.9142022466308
episode: 574 training return: -999.8999013470369
episode: 575 training return: -999.9272832676727
epoch: 144 test_true_pfm: -0.14788153238444168 sim_pfm: -999.6909584844067
episode: 576 training return: -999.9028466092321
episode: 577 training return: -999.8817073939688
episode: 578 training return: -999.8689123343812
episode: 579 training return: -999.903386622184
epoch: 145 test_true_pfm: -0.8396899843554134 sim_pfm: -999.6858846580714
episode: 580 training return: -999.8523033151426
episode: 581 training return: -999.9207526334882
episode: 582 training return: -999.9280182717846
episode: 583 training return: -999.9273489500389
epoch: 146 test_true_pfm: -0.5818147142162249 sim_pfm: -999.6933664625641
episode: 584 training return: -999.9086319110277
episode: 585 training return: -999.9262262654311
episode: 586 training return: -999.8684008032737
episode: 587 training return: -999.9018123815237
epoch: 147 test_true_pfm: -0.1628660130120853 sim_pfm: -999.6934121749495
episode: 588 training return: -999.8781563269364
episode: 589 training return: -999.8984399955718
episode: 590 training return: -999.9234763340745
episode: 591 training return: -999.9178874272933
epoch: 148 test_true_pfm: -0.7966939297820529 sim_pfm: -999.6993801910636
episode: 592 training return: -999.9185796521451
episode: 593 training return: -999.9155643029623
episode: 594 training return: -999.9041370982127
episode: 595 training return: -999.9300587996582
epoch: 149 test_true_pfm: -0.15409939561216002 sim_pfm: -999.6897352008392
episode: 596 training return: -999.9221266859579
episode: 597 training return: -999.907564095055
episode: 598 training return: -999.9204491203692
episode: 599 training return: -999.8871893593773
epoch: 150 test_true_pfm: -0.45479819813449573 sim_pfm: -999.6975440829129
