['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'uncertainty', '--traj', 'medium', '--seed', '0', '--data', '100000']
epoch: 0 training_loss 0.2577283998578787 test_loss: 0.1696866989135742
epoch: 1 training_loss 0.15580000326037408 test_loss: 0.16139265298843383
epoch: 2 training_loss 0.140380084104836 test_loss: 0.11409913301467896
epoch: 3 training_loss 0.12379941072314977 test_loss: 0.12334514856338501
epoch: 4 training_loss 0.1152778074517846 test_loss: 0.12302607297897339
epoch: 5 training_loss 0.11557310175150633 test_loss: 0.14454840421676635
epoch: 6 training_loss 0.11226717039942741 test_loss: 0.11947587728500367
epoch: 7 training_loss 0.10974298842251301 test_loss: 0.12980060577392577
epoch: 8 training_loss 0.10687599511817097 test_loss: 0.11736578941345215
epoch: 9 training_loss 0.1124443980678916 test_loss: 0.10178704261779785
epoch: 10 training_loss 0.09972405236214399 test_loss: 0.09777647256851196
epoch: 11 training_loss 0.10506106566637755 test_loss: 0.10750024318695069
epoch: 12 training_loss 0.1019736847281456 test_loss: 0.10173779726028442
epoch: 13 training_loss 0.10321969855576754 test_loss: 0.11792906522750854
epoch: 14 training_loss 0.10532800864428282 test_loss: 0.10076761245727539
epoch: 15 training_loss 0.10365715706720949 test_loss: 0.11401290893554687
epoch: 16 training_loss 0.09963267216458917 test_loss: 0.1089814305305481
epoch: 17 training_loss 0.1001521783694625 test_loss: 0.12507106065750123
epoch: 18 training_loss 0.10547573491930962 test_loss: 0.10073302984237671
epoch: 19 training_loss 0.09799167973920703 test_loss: 0.10967694520950318
epoch: 20 training_loss 0.10061978165060281 test_loss: 0.10183007717132568
epoch: 21 training_loss 0.09605293817818165 test_loss: 0.10839790105819702
epoch: 22 training_loss 0.09562935177236795 test_loss: 0.09473442435264587
epoch: 23 training_loss 0.1015670064650476 test_loss: 0.11762312650680543
epoch: 24 training_loss 0.09059344161301851 test_loss: 0.0946068823337555
epoch: 25 training_loss 0.09192540735937654 test_loss: 0.10297912359237671
epoch: 26 training_loss 0.09707279054448009 test_loss: 0.10671963691711425
epoch: 27 training_loss 0.09453260019421578 test_loss: 0.09752559661865234
epoch: 28 training_loss 0.09400842314586043 test_loss: 0.10150463581085205
epoch: 29 training_loss 0.09183178832754493 test_loss: 0.10791199207305908
epoch: 30 training_loss 0.09681360287591816 test_loss: 0.11053353548049927
epoch: 31 training_loss 0.09509931255131959 test_loss: 0.09984782338142395
epoch: 32 training_loss 0.09519069846719504 test_loss: 0.10877914428710937
epoch: 33 training_loss 0.09543838558718562 test_loss: 0.09884530901908875
epoch: 34 training_loss 0.08889630844816565 test_loss: 0.09347392320632934
epoch: 35 training_loss 0.09722853254526853 test_loss: 0.0976460337638855
epoch: 36 training_loss 0.09324967949651182 test_loss: 0.09909137487411498
epoch: 37 training_loss 0.08626060044392943 test_loss: 0.10720534324645996
epoch: 38 training_loss 0.08930076034739613 test_loss: 0.09967116713523864
epoch: 39 training_loss 0.08580467121675611 test_loss: 0.09913597106933594
epoch: 40 training_loss 0.0843221422471106 test_loss: 0.09818677306175232
epoch: 41 training_loss 0.08455385010689496 test_loss: 0.09327677488327027
epoch: 42 training_loss 0.09085584241896867 test_loss: 0.1136210560798645
epoch: 43 training_loss 0.08765027990564704 test_loss: 0.10476992130279542
epoch: 44 training_loss 0.08417552446946502 test_loss: 0.09519392848014832
epoch: 45 training_loss 0.08855976743623614 test_loss: 0.09506624937057495
epoch: 46 training_loss 0.08918619571253657 test_loss: 0.10990655422210693
epoch: 47 training_loss 0.08990281458944083 test_loss: 0.10734376907348633
epoch: 48 training_loss 0.09034733705222607 test_loss: 0.10119066238403321
epoch: 49 training_loss 0.0964917385019362 test_loss: 0.09672486186027526
epoch: 50 training_loss 0.088728759419173 test_loss: 0.1070219874382019
epoch: 51 training_loss 0.09502756293863059 test_loss: 0.1069609522819519
epoch: 52 training_loss 0.08574921301566064 test_loss: 0.09673027992248535
epoch: 53 training_loss 0.08758414460346103 test_loss: 0.11312848329544067
epoch: 54 training_loss 0.0875689814798534 test_loss: 0.09648867845535278
epoch: 55 training_loss 0.09116200296208263 test_loss: 0.09667218327522278
epoch: 56 training_loss 0.08504208877682685 test_loss: 0.09574949741363525
epoch: 57 training_loss 0.08488648072816432 test_loss: 0.1045329213142395
epoch: 58 training_loss 0.08962445229291915 test_loss: 0.10747536420822143
epoch: 59 training_loss 0.09251251952722668 test_loss: 0.1054646134376526
epoch: 60 training_loss 0.09042079649865627 test_loss: 0.10001515150070191
epoch: 61 training_loss 0.09172673152759671 test_loss: 0.10095518827438354
epoch: 62 training_loss 0.08396216679364443 test_loss: 0.0973049283027649
epoch: 63 training_loss 0.08259718652814627 test_loss: 0.10069904327392579
epoch: 64 training_loss 0.08072727628052234 test_loss: 0.09845329523086548
epoch: 65 training_loss 0.09388614270836115 test_loss: 0.09389040470123292
epoch: 66 training_loss 0.08845484565943479 test_loss: 0.1104282021522522
epoch: 67 training_loss 0.08422589411959053 test_loss: 0.1173444151878357
epoch: 68 training_loss 0.0948498049005866 test_loss: 0.10832345485687256
epoch: 69 training_loss 0.08419608175754548 test_loss: 0.10095895528793335
epoch: 70 training_loss 0.08518419431522489 test_loss: 0.09242938756942749
epoch: 71 training_loss 0.08422485416755081 test_loss: 0.11112223863601685
epoch: 72 training_loss 0.08099218297749758 test_loss: 0.09664722084999085
epoch: 73 training_loss 0.0840432338230312 test_loss: 0.09916425943374634
epoch: 74 training_loss 0.08748406959697604 test_loss: 0.10485646724700928
epoch: 75 training_loss 0.08512764658778905 test_loss: 0.10926200151443481
epoch: 76 training_loss 0.08765766981989145 test_loss: 0.10751597881317139
epoch: 77 training_loss 0.09052316706627607 test_loss: 0.10034523010253907
epoch: 78 training_loss 0.0822355182096362 test_loss: 0.08706056475639343
epoch: 79 training_loss 0.09061068783514202 test_loss: 0.0939631462097168
epoch: 80 training_loss 0.08065607704222202 test_loss: 0.09550986886024475
epoch: 81 training_loss 0.09003549557179212 test_loss: 0.10512808561325074
epoch: 82 training_loss 0.08645589275285602 test_loss: 0.111527419090271
epoch: 83 training_loss 0.08824926486238838 test_loss: 0.10892804861068725
epoch: 84 training_loss 0.08506453204900026 test_loss: 0.10519418716430665
epoch: 85 training_loss 0.08910641575232148 test_loss: 0.09575786590576171
epoch: 86 training_loss 0.08613988313823938 test_loss: 0.1151466965675354
epoch: 87 training_loss 0.08380349955987185 test_loss: 0.10394484996795654
epoch: 88 training_loss 0.08213015547022223 test_loss: 0.11355671882629395
epoch: 89 training_loss 0.08481160145252943 test_loss: 0.11341567039489746
epoch: 90 training_loss 0.08288401070982218 test_loss: 0.10306723117828369
epoch: 91 training_loss 0.08550264909863473 test_loss: 0.09925328493118286
epoch: 92 training_loss 0.07588513007387519 test_loss: 0.1002040147781372
epoch: 93 training_loss 0.08507373929023743 test_loss: 0.1115182638168335
epoch: 94 training_loss 0.07827141817659139 test_loss: 0.10356388092041016
epoch: 95 training_loss 0.07464887659996748 test_loss: 0.09929206967353821
epoch: 96 training_loss 0.08037069652229548 test_loss: 0.0959450364112854
epoch: 97 training_loss 0.08084809875115752 test_loss: 0.11350699663162231
epoch: 98 training_loss 0.08306902900338173 test_loss: 0.11826812028884888
epoch: 99 training_loss 0.07896473878994584 test_loss: 0.10532660484313965
epoch: 100 training_loss 0.0814128942321986 test_loss: 0.10823025703430175
epoch: 101 training_loss 0.07923162371851504 test_loss: 0.10650355815887451
epoch: 102 training_loss 0.08337659956887364 test_loss: 0.10458390712738037
epoch: 103 training_loss 0.0803422059584409 test_loss: 0.09594782590866088
epoch: 104 training_loss 0.08915483605116606 test_loss: 0.11303671598434448
epoch: 105 training_loss 0.08806848898530006 test_loss: 0.08951672911643982
epoch: 106 training_loss 0.08301790948957205 test_loss: 0.10118356943130494
epoch: 107 training_loss 0.08866010442376136 test_loss: 0.10023622512817383
epoch: 108 training_loss 0.08039179070852696 test_loss: 0.09869701266288758
epoch: 109 training_loss 0.08364951532334089 test_loss: 0.08547674417495728
epoch: 110 training_loss 0.08325800085440278 test_loss: 0.12027236223220825
epoch: 111 training_loss 0.08144665597938001 test_loss: 0.0991291344165802
epoch: 112 training_loss 0.08659092929214239 test_loss: 0.1052051305770874
epoch: 113 training_loss 0.08076627211645246 test_loss: 0.11097203493118286
epoch: 114 training_loss 0.07991586400195956 test_loss: 0.09358509182929993
epoch: 115 training_loss 0.07891957960091531 test_loss: 0.09216534495353698
epoch: 116 training_loss 0.0828266299702227 test_loss: 0.09465274810791016
epoch: 117 training_loss 0.08346420634537935 test_loss: 0.09935600757598877
epoch: 118 training_loss 0.07684477286413312 test_loss: 0.09601232409477234
epoch: 119 training_loss 0.07985405467450618 test_loss: 0.09632652401924133
epoch: 120 training_loss 0.08143280614167452 test_loss: 0.11022429466247559
epoch: 121 training_loss 0.08282814536243677 test_loss: 0.12957628965377807
epoch: 122 training_loss 0.07400421142578124 test_loss: 0.11376937627792358
epoch: 123 training_loss 0.08590313654392957 test_loss: 0.10612736940383911
epoch: 124 training_loss 0.07769171403720974 test_loss: 0.09977102279663086
epoch: 125 training_loss 0.08017354820854962 test_loss: 0.10262271165847778
epoch: 126 training_loss 0.0833737606741488 test_loss: 0.10506526231765748
epoch: 127 training_loss 0.08141308180987834 test_loss: 0.09837778806686401
epoch: 128 training_loss 0.07888741059228778 test_loss: 0.09029560089111328
epoch: 129 training_loss 0.0788525759615004 test_loss: 0.10459228754043579
epoch: 130 training_loss 0.08270409666001796 test_loss: 0.09999818205833436
epoch: 131 training_loss 0.08119009222835302 test_loss: 0.10188418626785278
epoch: 132 training_loss 0.07565272361040115 test_loss: 0.09725080132484436
epoch: 133 training_loss 0.08303294111043215 test_loss: 0.09817314147949219
epoch: 134 training_loss 0.08270052164793014 test_loss: 0.10330382585525513
epoch: 135 training_loss 0.07683525265194475 test_loss: 0.09839357137680053
epoch: 136 training_loss 0.08518466025590897 test_loss: 0.1004939317703247
epoch: 137 training_loss 0.0735023008659482 test_loss: 0.10741757154464722
epoch: 138 training_loss 0.08045873699709773 test_loss: 0.0933420181274414
epoch: 139 training_loss 0.08018178505823016 test_loss: 0.11167149543762207
epoch: 140 training_loss 0.08300894629210234 test_loss: 0.1057924747467041
epoch: 141 training_loss 0.07835437344387174 test_loss: 0.10116519927978515
epoch: 142 training_loss 0.083310831990093 test_loss: 0.1181238055229187
epoch: 143 training_loss 0.0737238696590066 test_loss: 0.09347630739212036
epoch: 144 training_loss 0.0777213298343122 test_loss: 0.11166354417800903
epoch: 145 training_loss 0.07620702926069498 test_loss: 0.09701583981513977
epoch: 146 training_loss 0.07373085673898458 test_loss: 0.10689058303833007
epoch: 147 training_loss 0.07946592316031456 test_loss: 0.09988739490509033
epoch: 148 training_loss 0.08628429222851991 test_loss: 0.09373275637626648
epoch: 149 training_loss 0.08165230751037597 test_loss: 0.09709420204162597
epoch: 0 training_loss 0.30596168503165244 test_loss: 0.19133591651916504
epoch: 1 training_loss 0.16646370388567447 test_loss: 0.12685637474060057
epoch: 2 training_loss 0.13956291325390338 test_loss: 0.1266479015350342
epoch: 3 training_loss 0.13420646965503694 test_loss: 0.1127935528755188
epoch: 4 training_loss 0.12235494669526815 test_loss: 0.16141555309295655
epoch: 5 training_loss 0.11723910037428141 test_loss: 0.1007540464401245
epoch: 6 training_loss 0.1107759853079915 test_loss: 0.11077622175216675
epoch: 7 training_loss 0.11279180984944105 test_loss: 0.08898801803588867
epoch: 8 training_loss 0.11664459478110074 test_loss: 0.1116305947303772
epoch: 9 training_loss 0.10839374775066972 test_loss: 0.09857448935508728
epoch: 10 training_loss 0.10949777768924833 test_loss: 0.09965578317642212
epoch: 11 training_loss 0.1055086457543075 test_loss: 0.09975252747535705
epoch: 12 training_loss 0.10257102366536856 test_loss: 0.09071314334869385
epoch: 13 training_loss 0.10076985441148281 test_loss: 0.08215765953063965
epoch: 14 training_loss 0.10571016313508153 test_loss: 0.09068090915679931
epoch: 15 training_loss 0.10330447589978575 test_loss: 0.09455721974372863
epoch: 16 training_loss 0.10009941905736923 test_loss: 0.0808722734451294
epoch: 17 training_loss 0.10107912177219987 test_loss: 0.09065136313438416
epoch: 18 training_loss 0.09752944748848677 test_loss: 0.08535043001174927
epoch: 19 training_loss 0.09340377215296031 test_loss: 0.08506773710250855
epoch: 20 training_loss 0.09927127908915281 test_loss: 0.083592289686203
epoch: 21 training_loss 0.09980310758575797 test_loss: 0.09220787286758422
epoch: 22 training_loss 0.09130950257182122 test_loss: 0.08177042603492737
epoch: 23 training_loss 0.09687088206410407 test_loss: 0.0957624614238739
epoch: 24 training_loss 0.09002982001751661 test_loss: 0.11037960052490234
epoch: 25 training_loss 0.0975774765573442 test_loss: 0.08390704989433288
epoch: 26 training_loss 0.0991554849781096 test_loss: 0.08806713819503784
epoch: 27 training_loss 0.09401746019721031 test_loss: 0.09366351962089539
epoch: 28 training_loss 0.09237669643014669 test_loss: 0.0938670814037323
epoch: 29 training_loss 0.09405046779662371 test_loss: 0.08219728469848633
epoch: 30 training_loss 0.09333253465592861 test_loss: 0.09640565514564514
epoch: 31 training_loss 0.08410322174429893 test_loss: 0.0817635178565979
epoch: 32 training_loss 0.09611960981041193 test_loss: 0.0999227523803711
epoch: 33 training_loss 0.09451126141473651 test_loss: 0.09056516885757446
epoch: 34 training_loss 0.0890105028823018 test_loss: 0.07206134796142578
epoch: 35 training_loss 0.09426747526973486 test_loss: 0.0959505319595337
epoch: 36 training_loss 0.09189577225595713 test_loss: 0.07690666913986206
epoch: 37 training_loss 0.09386075647547841 test_loss: 0.08882242441177368
epoch: 38 training_loss 0.09154615567997099 test_loss: 0.08403416275978089
epoch: 39 training_loss 0.08313402166590095 test_loss: 0.08532918095588685
epoch: 40 training_loss 0.0946337421424687 test_loss: 0.07679828405380248
epoch: 41 training_loss 0.09418365828692914 test_loss: 0.10082840919494629
epoch: 42 training_loss 0.09040672082453965 test_loss: 0.07638518810272217
epoch: 43 training_loss 0.09032484657131136 test_loss: 0.1002200961112976
epoch: 44 training_loss 0.08525100328028203 test_loss: 0.0922865629196167
epoch: 45 training_loss 0.08886295666918159 test_loss: 0.09435374140739441
epoch: 46 training_loss 0.09342904657125473 test_loss: 0.09377729892730713
epoch: 47 training_loss 0.08904530828818678 test_loss: 0.08434985280036926
epoch: 48 training_loss 0.09358429966494441 test_loss: 0.09118708372116088
epoch: 49 training_loss 0.09246137259528041 test_loss: 0.10387749671936035
epoch: 50 training_loss 0.08583728790283203 test_loss: 0.08060921430587768
epoch: 51 training_loss 0.09129148120060564 test_loss: 0.08643972277641296
epoch: 52 training_loss 0.088747471999377 test_loss: 0.09228315949440002
epoch: 53 training_loss 0.08183989990502596 test_loss: 0.10263724327087402
epoch: 54 training_loss 0.0820494446158409 test_loss: 0.08291522860527038
epoch: 55 training_loss 0.0839438559114933 test_loss: 0.08789525032043458
epoch: 56 training_loss 0.08647559837438167 test_loss: 0.08526118993759155
epoch: 57 training_loss 0.08592184137552977 test_loss: 0.09853354096412659
epoch: 58 training_loss 0.09322034558281303 test_loss: 0.09429603815078735
epoch: 59 training_loss 0.08306149980053305 test_loss: 0.081419837474823
epoch: 60 training_loss 0.08644496951252222 test_loss: 0.08457581400871277
epoch: 61 training_loss 0.09328357694670558 test_loss: 0.09162189364433289
epoch: 62 training_loss 0.08995935281738639 test_loss: 0.08544630408287049
epoch: 63 training_loss 0.08925208048895002 test_loss: 0.08008179068565369
epoch: 64 training_loss 0.07960026839748024 test_loss: 0.08298084139823914
epoch: 65 training_loss 0.09088112266734243 test_loss: 0.0760302186012268
epoch: 66 training_loss 0.07864632984623313 test_loss: 0.08664244413375854
epoch: 67 training_loss 0.08693676818162203 test_loss: 0.09806897044181824
epoch: 68 training_loss 0.08430507574230432 test_loss: 0.07221286296844483
epoch: 69 training_loss 0.08607447931542993 test_loss: 0.08165346384048462
epoch: 70 training_loss 0.08740112449973822 test_loss: 0.09547579288482666
epoch: 71 training_loss 0.08926982156932355 test_loss: 0.08245056271553039
epoch: 72 training_loss 0.0859578802715987 test_loss: 0.08329007029533386
epoch: 73 training_loss 0.0873799823783338 test_loss: 0.06394979357719421
epoch: 74 training_loss 0.08174854267388582 test_loss: 0.08597224354743957
epoch: 75 training_loss 0.0924300380051136 test_loss: 0.09670193791389466
epoch: 76 training_loss 0.0811245964281261 test_loss: 0.07556483149528503
epoch: 77 training_loss 0.08200435284525157 test_loss: 0.0830520212650299
epoch: 78 training_loss 0.08596272774040699 test_loss: 0.09021061062812805
epoch: 79 training_loss 0.08053712343797087 test_loss: 0.09325465559959412
epoch: 80 training_loss 0.08362957999110222 test_loss: 0.08706455230712891
epoch: 81 training_loss 0.08295528404414654 test_loss: 0.09189271926879883
epoch: 82 training_loss 0.08560752650722861 test_loss: 0.091718989610672
epoch: 83 training_loss 0.08291049815714359 test_loss: 0.09128720760345459
epoch: 84 training_loss 0.07974197028204799 test_loss: 0.09659423828125
epoch: 85 training_loss 0.08478166211396455 test_loss: 0.09857556819915772
epoch: 86 training_loss 0.08746668795123697 test_loss: 0.093732351064682
epoch: 87 training_loss 0.08332968186587095 test_loss: 0.08388031125068665
epoch: 88 training_loss 0.08388991143554449 test_loss: 0.07129008173942566
epoch: 89 training_loss 0.0839791808463633 test_loss: 0.08803399205207825
epoch: 90 training_loss 0.08297730340622365 test_loss: 0.08645685911178588
epoch: 91 training_loss 0.08813968356698751 test_loss: 0.06890430450439453
epoch: 92 training_loss 0.08153881957754493 test_loss: 0.07409355044364929
epoch: 93 training_loss 0.08431388263590633 test_loss: 0.08293548226356506
epoch: 94 training_loss 0.08814154207706451 test_loss: 0.10064120292663574
epoch: 95 training_loss 0.08376763316802681 test_loss: 0.08725726008415222
epoch: 96 training_loss 0.08540592860430479 test_loss: 0.08318531513214111
epoch: 97 training_loss 0.08788309814408421 test_loss: 0.08974891304969787
epoch: 98 training_loss 0.08447047544643282 test_loss: 0.08403816819190979
epoch: 99 training_loss 0.0822044880129397 test_loss: 0.09499640464782715
epoch: 100 training_loss 0.08083945116028189 test_loss: 0.07426670789718628
epoch: 101 training_loss 0.07880034077912569 test_loss: 0.07729583978652954
epoch: 102 training_loss 0.08146759860217571 test_loss: 0.0870242953300476
epoch: 103 training_loss 0.07866687010973691 test_loss: 0.07508134245872497
epoch: 104 training_loss 0.07946823613718151 test_loss: 0.08373498916625977
epoch: 105 training_loss 0.0867862612567842 test_loss: 0.08834901452064514
epoch: 106 training_loss 0.08134540971368551 test_loss: 0.09418459534645081
epoch: 107 training_loss 0.08855538899078966 test_loss: 0.07450233101844787
epoch: 108 training_loss 0.08859379189088941 test_loss: 0.08745400905609131
epoch: 109 training_loss 0.07947507839649916 test_loss: 0.09284536242485046
epoch: 110 training_loss 0.07692405045032501 test_loss: 0.09571130275726318
epoch: 111 training_loss 0.08226059351116419 test_loss: 0.09200721979141235
epoch: 112 training_loss 0.08452839876525103 test_loss: 0.08324384689331055
epoch: 113 training_loss 0.08029034888371825 test_loss: 0.09319736361503601
epoch: 114 training_loss 0.08037728128954769 test_loss: 0.10234583616256714
epoch: 115 training_loss 0.09011661784723402 test_loss: 0.08358873724937439
epoch: 116 training_loss 0.07964868208393455 test_loss: 0.08401793241500854
epoch: 117 training_loss 0.07357052482664585 test_loss: 0.0848931074142456
epoch: 118 training_loss 0.08206150257959961 test_loss: 0.09147109389305115
epoch: 119 training_loss 0.07792063332628459 test_loss: 0.09457296133041382
epoch: 120 training_loss 0.0766980298049748 test_loss: 0.08497270345687866
epoch: 121 training_loss 0.0836936373449862 test_loss: 0.0788228154182434
epoch: 122 training_loss 0.0818501302972436 test_loss: 0.09539973139762878
epoch: 123 training_loss 0.08549331167712808 test_loss: 0.08982915282249451
epoch: 124 training_loss 0.08091101718135178 test_loss: 0.09479803442955018
epoch: 125 training_loss 0.08286043096333742 test_loss: 0.08426580429077149
epoch: 126 training_loss 0.07674123992212117 test_loss: 0.08242848515510559
epoch: 127 training_loss 0.08321935357525945 test_loss: 0.09359769821166992
epoch: 128 training_loss 0.08306331156753004 test_loss: 0.08968920707702636
epoch: 129 training_loss 0.08433239761739969 test_loss: 0.09632870554924011
epoch: 130 training_loss 0.07753695314750075 test_loss: 0.09456812143325806
epoch: 131 training_loss 0.07633084973320364 test_loss: 0.08053331971168518
epoch: 132 training_loss 0.08182837277650833 test_loss: 0.08566181659698487
epoch: 133 training_loss 0.07920798003673553 test_loss: 0.08381088376045227
epoch: 134 training_loss 0.08414152480661868 test_loss: 0.09315423965454102
epoch: 135 training_loss 0.07377985294908285 test_loss: 0.07630801796913148
epoch: 136 training_loss 0.07788344494998455 test_loss: 0.09000778794288636
epoch: 137 training_loss 0.07517764728516341 test_loss: 0.0794026494026184
epoch: 138 training_loss 0.0736783079430461 test_loss: 0.08487634062767029
epoch: 139 training_loss 0.07678682565689086 test_loss: 0.08588979244232178
epoch: 140 training_loss 0.07720887701958418 test_loss: 0.09652525782585145
epoch: 141 training_loss 0.0788939192518592 test_loss: 0.09766369462013244
epoch: 142 training_loss 0.07649001428857445 test_loss: 0.10174498558044434
epoch: 143 training_loss 0.07918018832802773 test_loss: 0.07911738753318787
epoch: 144 training_loss 0.08339516328647732 test_loss: 0.08926866054534913
epoch: 145 training_loss 0.08219848645851016 test_loss: 0.08895508646965027
epoch: 146 training_loss 0.07758458929136396 test_loss: 0.07843103408813476
epoch: 147 training_loss 0.08533448230475188 test_loss: 0.09171011447906494
epoch: 148 training_loss 0.07662528526037932 test_loss: 0.09485278129577637
epoch: 149 training_loss 0.07959364900365472 test_loss: 0.09339423179626465
epoch: 0 training_loss 0.2471413318067789 test_loss: 0.1498302102088928
epoch: 1 training_loss 0.14327527578920127 test_loss: 0.15008072853088378
epoch: 2 training_loss 0.13692654021084308 test_loss: 0.13652365207672118
epoch: 3 training_loss 0.12450662275776267 test_loss: 0.10561820268630981
epoch: 4 training_loss 0.11913003684952855 test_loss: 0.10061496496200562
epoch: 5 training_loss 0.10701826818287373 test_loss: 0.10690072774887086
epoch: 6 training_loss 0.1022822005301714 test_loss: 0.10247710943222046
epoch: 7 training_loss 0.1165611618757248 test_loss: 0.1263325333595276
epoch: 8 training_loss 0.10023353978991509 test_loss: 0.11681467294692993
epoch: 9 training_loss 0.11099255505949258 test_loss: 0.10685461759567261
epoch: 10 training_loss 0.10507326610386372 test_loss: 0.12982008457183838
epoch: 11 training_loss 0.10072120826691389 test_loss: 0.10076271295547486
epoch: 12 training_loss 0.09839361947029829 test_loss: 0.11532077789306641
epoch: 13 training_loss 0.10940452117472887 test_loss: 0.09865473508834839
epoch: 14 training_loss 0.10365856023505331 test_loss: 0.11017757654190063
epoch: 15 training_loss 0.09526235619559884 test_loss: 0.10493764877319336
epoch: 16 training_loss 0.09427430227398872 test_loss: 0.11240044832229615
epoch: 17 training_loss 0.09651068268343806 test_loss: 0.10129915475845337
epoch: 18 training_loss 0.1012580543756485 test_loss: 0.09247663021087646
epoch: 19 training_loss 0.09910473559051752 test_loss: 0.11798832416534424
epoch: 20 training_loss 0.10165628908202053 test_loss: 0.09435580372810363
epoch: 21 training_loss 0.09863079715520144 test_loss: 0.10416933298110961
epoch: 22 training_loss 0.09919859383255243 test_loss: 0.10577665567398072
epoch: 23 training_loss 0.09652229178696871 test_loss: 0.1037053108215332
epoch: 24 training_loss 0.08518828358501196 test_loss: 0.0980946660041809
epoch: 25 training_loss 0.09804442912340164 test_loss: 0.10415441989898681
epoch: 26 training_loss 0.1031692910939455 test_loss: 0.11360217332839966
epoch: 27 training_loss 0.09748554989695549 test_loss: 0.09839972853660583
epoch: 28 training_loss 0.09149440297856927 test_loss: 0.09273656010627747
epoch: 29 training_loss 0.09185709115117788 test_loss: 0.08880469799041749
epoch: 30 training_loss 0.08713492574170231 test_loss: 0.09870321154594422
epoch: 31 training_loss 0.09921124190092087 test_loss: 0.09884666204452515
epoch: 32 training_loss 0.10006249897181987 test_loss: 0.10122424364089966
epoch: 33 training_loss 0.09059336431324481 test_loss: 0.08623140454292297
epoch: 34 training_loss 0.09366962203755974 test_loss: 0.10425429344177246
epoch: 35 training_loss 0.09248049788177014 test_loss: 0.09991683959960937
epoch: 36 training_loss 0.08815626673400402 test_loss: 0.11155945062637329
epoch: 37 training_loss 0.09005205174908042 test_loss: 0.09144403934478759
epoch: 38 training_loss 0.08895372850820422 test_loss: 0.10399583578109742
epoch: 39 training_loss 0.0855149762891233 test_loss: 0.10049228668212891
epoch: 40 training_loss 0.09312982700765132 test_loss: 0.10541603565216065
epoch: 41 training_loss 0.08710619991645217 test_loss: 0.10354763269424438
epoch: 42 training_loss 0.09246654978021979 test_loss: 0.10676029920578003
epoch: 43 training_loss 0.08968291480094194 test_loss: 0.09839040040969849
epoch: 44 training_loss 0.1001440809480846 test_loss: 0.10247628688812256
epoch: 45 training_loss 0.0853348795697093 test_loss: 0.0886311948299408
epoch: 46 training_loss 0.08427643530070782 test_loss: 0.10890351533889771
epoch: 47 training_loss 0.0861347605381161 test_loss: 0.11203042268753052
epoch: 48 training_loss 0.08687640318647027 test_loss: 0.09895715117454529
epoch: 49 training_loss 0.0983594680018723 test_loss: 0.09695095419883729
epoch: 50 training_loss 0.08355994286015629 test_loss: 0.08880744576454162
epoch: 51 training_loss 0.0914867646060884 test_loss: 0.10553843975067138
epoch: 52 training_loss 0.0860603591427207 test_loss: 0.10483322143554688
epoch: 53 training_loss 0.09412241345271469 test_loss: 0.09979408979415894
epoch: 54 training_loss 0.08946515131741763 test_loss: 0.09631779193878173
epoch: 55 training_loss 0.08716673905029892 test_loss: 0.09467653036117554
epoch: 56 training_loss 0.08892047375440598 test_loss: 0.09084711670875549
epoch: 57 training_loss 0.09108614832162858 test_loss: 0.08481094837188721
epoch: 58 training_loss 0.0869376340508461 test_loss: 0.08558579683303832
epoch: 59 training_loss 0.0922767112031579 test_loss: 0.10378137826919556
epoch: 60 training_loss 0.08939711507409812 test_loss: 0.08373537063598632
epoch: 61 training_loss 0.08639684261754155 test_loss: 0.10694015026092529
epoch: 62 training_loss 0.08457976944744587 test_loss: 0.0955461323261261
epoch: 63 training_loss 0.0907075547799468 test_loss: 0.0953299880027771
epoch: 64 training_loss 0.0821924459002912 test_loss: 0.09864731431007386
epoch: 65 training_loss 0.08510638743638993 test_loss: 0.1038421630859375
epoch: 66 training_loss 0.08508972160518169 test_loss: 0.11383970975875854
epoch: 67 training_loss 0.08716721387580037 test_loss: 0.10114047527313233
epoch: 68 training_loss 0.08395612691529095 test_loss: 0.08753278851509094
epoch: 69 training_loss 0.0873104416579008 test_loss: 0.10014326572418213
epoch: 70 training_loss 0.08990305898711086 test_loss: 0.11043763160705566
epoch: 71 training_loss 0.08316687423735857 test_loss: 0.09598038196563721
epoch: 72 training_loss 0.07975050161592662 test_loss: 0.0979913353919983
epoch: 73 training_loss 0.08408298339694738 test_loss: 0.11478896141052246
epoch: 74 training_loss 0.07991527019068599 test_loss: 0.09489955902099609
epoch: 75 training_loss 0.0841600058041513 test_loss: 0.09540818333625793
epoch: 76 training_loss 0.08170687021687627 test_loss: 0.09985789060592651
epoch: 77 training_loss 0.08567755248397589 test_loss: 0.10526074171066284
epoch: 78 training_loss 0.08055349653586745 test_loss: 0.11588069200515747
epoch: 79 training_loss 0.08587946010753512 test_loss: 0.09415372610092163
epoch: 80 training_loss 0.082028095908463 test_loss: 0.09520886540412903
epoch: 81 training_loss 0.08567264681681991 test_loss: 0.10473676919937133
epoch: 82 training_loss 0.08598590244539082 test_loss: 0.09480863809585571
epoch: 83 training_loss 0.0867425342835486 test_loss: 0.09059785604476929
epoch: 84 training_loss 0.08276231117546558 test_loss: 0.10600570440292359
epoch: 85 training_loss 0.08320813274011016 test_loss: 0.07491036653518676
epoch: 86 training_loss 0.08622858311980963 test_loss: 0.09141918420791625
epoch: 87 training_loss 0.08291417792439461 test_loss: 0.10964818000793457
epoch: 88 training_loss 0.07474245032295584 test_loss: 0.08986330628395081
epoch: 89 training_loss 0.08740071484819055 test_loss: 0.09065127372741699
epoch: 90 training_loss 0.08927141418680548 test_loss: 0.12397021055221558
epoch: 91 training_loss 0.07673476021736861 test_loss: 0.09915084838867187
epoch: 92 training_loss 0.08318476049229502 test_loss: 0.08936213850975036
epoch: 93 training_loss 0.08163697641342878 test_loss: 0.10423058271408081
epoch: 94 training_loss 0.0789432373829186 test_loss: 0.10522204637527466
epoch: 95 training_loss 0.07944886790588498 test_loss: 0.09190899729728699
epoch: 96 training_loss 0.08857677917927503 test_loss: 0.08440021872520446
epoch: 97 training_loss 0.0834047215245664 test_loss: 0.1087903618812561
epoch: 98 training_loss 0.08060977436602115 test_loss: 0.10119954347610474
epoch: 99 training_loss 0.08256258368492127 test_loss: 0.1016729235649109
epoch: 100 training_loss 0.08213722495362162 test_loss: 0.0970238983631134
epoch: 101 training_loss 0.08234359577298164 test_loss: 0.10609902143478393
epoch: 102 training_loss 0.08956743456423283 test_loss: 0.09653555154800415
epoch: 103 training_loss 0.07878871707245708 test_loss: 0.10593355894088745
epoch: 104 training_loss 0.08613573586568236 test_loss: 0.0997778594493866
epoch: 105 training_loss 0.0867775870859623 test_loss: 0.1084549069404602
epoch: 106 training_loss 0.0813586581684649 test_loss: 0.10967624187469482
epoch: 107 training_loss 0.08401940641924739 test_loss: 0.09988332390785218
epoch: 108 training_loss 0.07612835731357336 test_loss: 0.11014381647109986
epoch: 109 training_loss 0.081968007478863 test_loss: 0.10573382377624511
epoch: 110 training_loss 0.07757091924548148 test_loss: 0.10622237920761109
epoch: 111 training_loss 0.08024465475231408 test_loss: 0.0996909499168396
epoch: 112 training_loss 0.07809882830828428 test_loss: 0.09498314261436462
epoch: 113 training_loss 0.08453394453972578 test_loss: 0.09450933337211609
epoch: 114 training_loss 0.08550134299322963 test_loss: 0.09793087244033813
epoch: 115 training_loss 0.08096210822463036 test_loss: 0.10849841833114623
epoch: 116 training_loss 0.08446746176108717 test_loss: 0.10253010988235474
epoch: 117 training_loss 0.08183423748239875 test_loss: 0.09820684194564819
epoch: 118 training_loss 0.08411231780424715 test_loss: 0.09855605363845825
epoch: 119 training_loss 0.08256756728515029 test_loss: 0.10515543222427368
epoch: 120 training_loss 0.08165569795295596 test_loss: 0.11396311521530152
epoch: 121 training_loss 0.08040803139097989 test_loss: 0.0822772204875946
epoch: 122 training_loss 0.08183902118355035 test_loss: 0.10550878047943116
epoch: 123 training_loss 0.07865920789539814 test_loss: 0.09297019839286805
epoch: 124 training_loss 0.07642167404294015 test_loss: 0.08803263306617737
epoch: 125 training_loss 0.076074872110039 test_loss: 0.13243962526321412
epoch: 126 training_loss 0.0801798240095377 test_loss: 0.09955461621284485
epoch: 127 training_loss 0.08282192438840866 test_loss: 0.09625856876373291
epoch: 128 training_loss 0.07908385377377272 test_loss: 0.08989273905754089
epoch: 129 training_loss 0.08138019256293774 test_loss: 0.0963937759399414
epoch: 130 training_loss 0.07836787207052112 test_loss: 0.11865469217300414
epoch: 131 training_loss 0.08129966504871845 test_loss: 0.11110666990280152
epoch: 132 training_loss 0.07772907814010978 test_loss: 0.09537512063980103
epoch: 133 training_loss 0.0803823683038354 test_loss: 0.09691782593727112
epoch: 134 training_loss 0.0785453387349844 test_loss: 0.10288541316986084
epoch: 135 training_loss 0.08154572317376733 test_loss: 0.10930166244506836
epoch: 136 training_loss 0.08904839780181646 test_loss: 0.10165001153945923
epoch: 137 training_loss 0.07584897723048925 test_loss: 0.10003352165222168
epoch: 138 training_loss 0.08446661181747914 test_loss: 0.11048325300216674
epoch: 139 training_loss 0.07743940269574523 test_loss: 0.10392293930053711
epoch: 140 training_loss 0.0810984624736011 test_loss: 0.10551527738571168
epoch: 141 training_loss 0.07336636491119862 test_loss: 0.11317566633224488
epoch: 142 training_loss 0.07831877492368221 test_loss: 0.11083660125732422
epoch: 143 training_loss 0.07622263949364423 test_loss: 0.10913865566253662
epoch: 144 training_loss 0.07681759640574455 test_loss: 0.11395916938781739
epoch: 145 training_loss 0.07818413507193327 test_loss: 0.09899204969406128
epoch: 146 training_loss 0.07973081598058343 test_loss: 0.0845971405506134
epoch: 147 training_loss 0.07664124552160502 test_loss: 0.10842831134796142
epoch: 148 training_loss 0.07801509577780961 test_loss: 0.09961274266242981
epoch: 149 training_loss 0.0771486335620284 test_loss: 0.09549974799156188
epoch: 0 training_loss 0.26391219824552536 test_loss: 0.1641397476196289
epoch: 1 training_loss 0.13380157174542545 test_loss: 0.17925397157669068
epoch: 2 training_loss 0.14203558988869192 test_loss: 0.1175909161567688
epoch: 3 training_loss 0.12838592819869518 test_loss: 0.14392356872558593
epoch: 4 training_loss 0.10851889751851558 test_loss: 0.10748225450515747
epoch: 5 training_loss 0.1151700721681118 test_loss: 0.12882856130599976
epoch: 6 training_loss 0.11051215328276158 test_loss: 0.10540987253189087
epoch: 7 training_loss 0.10439882718026638 test_loss: 0.10288773775100708
epoch: 8 training_loss 0.10433044290170074 test_loss: 0.10497972965240479
epoch: 9 training_loss 0.10364189356565476 test_loss: 0.10792473554611207
epoch: 10 training_loss 0.10847360115498304 test_loss: 0.09748451709747315
epoch: 11 training_loss 0.10349823042750358 test_loss: 0.09604485630989075
epoch: 12 training_loss 0.09995702393352986 test_loss: 0.11047682762145997
epoch: 13 training_loss 0.10614913382567465 test_loss: 0.11469295024871826
epoch: 14 training_loss 0.10090435944497585 test_loss: 0.11472899913787842
epoch: 15 training_loss 0.10451003545895218 test_loss: 0.10487979650497437
epoch: 16 training_loss 0.09988067358732224 test_loss: 0.10186902284622193
epoch: 17 training_loss 0.11038033984601497 test_loss: 0.11016318798065186
epoch: 18 training_loss 0.09917717078700662 test_loss: 0.09602344632148743
epoch: 19 training_loss 0.09778333246707917 test_loss: 0.08903862237930298
epoch: 20 training_loss 0.0966029310785234 test_loss: 0.09544692635536194
epoch: 21 training_loss 0.09004944184795022 test_loss: 0.10088428258895873
epoch: 22 training_loss 0.09689563740044832 test_loss: 0.09568451046943664
epoch: 23 training_loss 0.09675121208652854 test_loss: 0.10411902666091918
epoch: 24 training_loss 0.1022660871502012 test_loss: 0.10531619787216187
epoch: 25 training_loss 0.09200178530067206 test_loss: 0.10836362838745117
epoch: 26 training_loss 0.09743601572699845 test_loss: 0.0975397527217865
epoch: 27 training_loss 0.0917795604839921 test_loss: 0.10387370586395264
epoch: 28 training_loss 0.0930667107924819 test_loss: 0.1050440788269043
epoch: 29 training_loss 0.09696039877831936 test_loss: 0.10076322555541992
epoch: 30 training_loss 0.08410632459446787 test_loss: 0.08361496329307556
epoch: 31 training_loss 0.09270626734942197 test_loss: 0.08765690326690674
epoch: 32 training_loss 0.09179011203348636 test_loss: 0.08235566020011902
epoch: 33 training_loss 0.09354163812473416 test_loss: 0.10477651357650757
epoch: 34 training_loss 0.09401895329356194 test_loss: 0.10441219806671143
epoch: 35 training_loss 0.09296977147459984 test_loss: 0.08448755145072936
epoch: 36 training_loss 0.0908982970751822 test_loss: 0.08923783302307128
epoch: 37 training_loss 0.09138654734939337 test_loss: 0.10556507110595703
epoch: 38 training_loss 0.09365944352000952 test_loss: 0.09645419120788574
epoch: 39 training_loss 0.08349227353930473 test_loss: 0.11616322994232178
epoch: 40 training_loss 0.08600001057609916 test_loss: 0.09796309471130371
epoch: 41 training_loss 0.08521826827898621 test_loss: 0.13726966381072997
epoch: 42 training_loss 0.0886128448229283 test_loss: 0.099592787027359
epoch: 43 training_loss 0.08391234252601862 test_loss: 0.10330731868743896
epoch: 44 training_loss 0.08902648955583572 test_loss: 0.09995516538619995
epoch: 45 training_loss 0.09280673626810312 test_loss: 0.10701721906661987
epoch: 46 training_loss 0.08992779040709138 test_loss: 0.09166507124900818
epoch: 47 training_loss 0.09251813512295484 test_loss: 0.0817626178264618
epoch: 48 training_loss 0.0881210863403976 test_loss: 0.09866997599601746
epoch: 49 training_loss 0.09231369502842426 test_loss: 0.09133891463279724
epoch: 50 training_loss 0.0908909431938082 test_loss: 0.10180563926696777
epoch: 51 training_loss 0.08778374386951328 test_loss: 0.08692457675933837
epoch: 52 training_loss 0.08605922628194093 test_loss: 0.09575276374816895
epoch: 53 training_loss 0.08642114056274294 test_loss: 0.10219646692276001
epoch: 54 training_loss 0.09207006761804223 test_loss: 0.09551535844802857
epoch: 55 training_loss 0.08976828197017311 test_loss: 0.08846480846405029
epoch: 56 training_loss 0.08325277520343662 test_loss: 0.09012388586997985
epoch: 57 training_loss 0.08288832116872072 test_loss: 0.11747742891311645
epoch: 58 training_loss 0.0780218746419996 test_loss: 0.1114585280418396
epoch: 59 training_loss 0.07546199150383473 test_loss: 0.10386687517166138
epoch: 60 training_loss 0.09133050629869104 test_loss: 0.09620906710624695
epoch: 61 training_loss 0.08875544846989214 test_loss: 0.09086238741874694
epoch: 62 training_loss 0.08966724274680019 test_loss: 0.0977593183517456
epoch: 63 training_loss 0.09069393193349243 test_loss: 0.10166805982589722
epoch: 64 training_loss 0.08436185985803604 test_loss: 0.08179627060890197
epoch: 65 training_loss 0.07928871545940637 test_loss: 0.0905076265335083
epoch: 66 training_loss 0.080013733394444 test_loss: 0.09192433357238769
epoch: 67 training_loss 0.08661779630929231 test_loss: 0.09498543739318847
epoch: 68 training_loss 0.08961965193971992 test_loss: 0.09146059155464173
epoch: 69 training_loss 0.08773420451208949 test_loss: 0.08238794207572937
epoch: 70 training_loss 0.08930791672319174 test_loss: 0.1003667950630188
epoch: 71 training_loss 0.08788254305720329 test_loss: 0.08938186764717101
epoch: 72 training_loss 0.08937267813831568 test_loss: 0.11189849376678467
epoch: 73 training_loss 0.0845898068509996 test_loss: 0.0977138638496399
epoch: 74 training_loss 0.08714593170210719 test_loss: 0.0978400468826294
epoch: 75 training_loss 0.08443479871377349 test_loss: 0.08825088143348694
epoch: 76 training_loss 0.08939141988754272 test_loss: 0.0946148157119751
epoch: 77 training_loss 0.08012490838766098 test_loss: 0.11443519592285156
epoch: 78 training_loss 0.08727516010403633 test_loss: 0.09522902369499206
epoch: 79 training_loss 0.08270737130194902 test_loss: 0.09736686944961548
epoch: 80 training_loss 0.0827840161882341 test_loss: 0.09588137865066529
epoch: 81 training_loss 0.0904663178883493 test_loss: 0.10131086111068725
epoch: 82 training_loss 0.08335391553118825 test_loss: 0.09886305928230285
epoch: 83 training_loss 0.08692238107323647 test_loss: 0.10661766529083253
epoch: 84 training_loss 0.08810791416093706 test_loss: 0.10691763162612915
epoch: 85 training_loss 0.0864636555314064 test_loss: 0.09053593277931213
epoch: 86 training_loss 0.0844344742782414 test_loss: 0.10915321111679077
epoch: 87 training_loss 0.08701784356031567 test_loss: 0.08515737652778625
epoch: 88 training_loss 0.08952682562172413 test_loss: 0.1000771164894104
epoch: 89 training_loss 0.08693142641335726 test_loss: 0.10491390228271484
epoch: 90 training_loss 0.0818752233311534 test_loss: 0.10332545042037963
epoch: 91 training_loss 0.079515860080719 test_loss: 0.11082103252410888
epoch: 92 training_loss 0.08091126184910535 test_loss: 0.09505508542060852
epoch: 93 training_loss 0.08621237266808748 test_loss: 0.08352177739143371
epoch: 94 training_loss 0.07762587694451213 test_loss: 0.07158421277999878
epoch: 95 training_loss 0.08298935327678919 test_loss: 0.08789293766021729
epoch: 96 training_loss 0.0904695812985301 test_loss: 0.09297968745231629
epoch: 97 training_loss 0.09070955164730549 test_loss: 0.08851860165596008
epoch: 98 training_loss 0.08193550389260054 test_loss: 0.08404698371887206
epoch: 99 training_loss 0.08689115012064576 test_loss: 0.08471958637237549
epoch: 100 training_loss 0.08133689815178513 test_loss: 0.0828942596912384
epoch: 101 training_loss 0.08290186628699303 test_loss: 0.09442640542984009
epoch: 102 training_loss 0.08172059191390872 test_loss: 0.08599347472190857
epoch: 103 training_loss 0.08954434685409068 test_loss: 0.09117712974548339
epoch: 104 training_loss 0.08113212766125798 test_loss: 0.11150283813476562
epoch: 105 training_loss 0.0859635498188436 test_loss: 0.0858287274837494
epoch: 106 training_loss 0.08123987644910813 test_loss: 0.09885241389274597
epoch: 107 training_loss 0.08152958890423179 test_loss: 0.09434439539909363
epoch: 108 training_loss 0.08378838503733277 test_loss: 0.07962198853492737
epoch: 109 training_loss 0.08796789040789008 test_loss: 0.10560367107391358
epoch: 110 training_loss 0.07561295846477151 test_loss: 0.09538324475288391
epoch: 111 training_loss 0.08845006544142961 test_loss: 0.09362885355949402
epoch: 112 training_loss 0.0854058832116425 test_loss: 0.1018669605255127
epoch: 113 training_loss 0.08769874449819326 test_loss: 0.09818280339241028
epoch: 114 training_loss 0.08718419543467462 test_loss: 0.09941682815551758
epoch: 115 training_loss 0.08460528219118714 test_loss: 0.10699625015258789
epoch: 116 training_loss 0.08429202362895012 test_loss: 0.09744115471839905
epoch: 117 training_loss 0.084565111938864 test_loss: 0.09515351057052612
epoch: 118 training_loss 0.08276332254521548 test_loss: 0.09416353702545166
epoch: 119 training_loss 0.08167771466076373 test_loss: 0.0970622181892395
epoch: 120 training_loss 0.07903353281319142 test_loss: 0.1019403338432312
epoch: 121 training_loss 0.08284954074770212 test_loss: 0.09515249729156494
epoch: 122 training_loss 0.0843462953157723 test_loss: 0.0927262544631958
epoch: 123 training_loss 0.08065326038748026 test_loss: 0.09806208610534668
epoch: 124 training_loss 0.07929046330973506 test_loss: 0.09508965611457824
epoch: 125 training_loss 0.07766423981636762 test_loss: 0.10358954668045044
epoch: 126 training_loss 0.0832758466899395 test_loss: 0.0990460216999054
epoch: 127 training_loss 0.08335615955293178 test_loss: 0.08557933568954468
epoch: 128 training_loss 0.08363476846367121 test_loss: 0.08473430275917053
epoch: 129 training_loss 0.08172948187217116 test_loss: 0.09075517058372498
epoch: 130 training_loss 0.08004676043987274 test_loss: 0.06874581575393676
epoch: 131 training_loss 0.08610725820995867 test_loss: 0.09232873916625976
epoch: 132 training_loss 0.08574537105858326 test_loss: 0.10009775161743165
epoch: 133 training_loss 0.08128786973655223 test_loss: 0.11820788383483886
epoch: 134 training_loss 0.08460746394470334 test_loss: 0.09513222575187683
epoch: 135 training_loss 0.081281801443547 test_loss: 0.09439620971679688
epoch: 136 training_loss 0.08329002792015672 test_loss: 0.08500087261199951
epoch: 137 training_loss 0.078277189694345 test_loss: 0.07967311143875122
epoch: 138 training_loss 0.08056302791461349 test_loss: 0.08412502408027649
epoch: 139 training_loss 0.07757775532081723 test_loss: 0.08751490116119384
epoch: 140 training_loss 0.07889911444857717 test_loss: 0.08710869550704955
epoch: 141 training_loss 0.08498867146670819 test_loss: 0.11015175580978394
epoch: 142 training_loss 0.08008521087467671 test_loss: 0.10098351240158081
epoch: 143 training_loss 0.0851873461715877 test_loss: 0.08861908316612244
epoch: 144 training_loss 0.0882013869844377 test_loss: 0.1051867961883545
epoch: 145 training_loss 0.07984270794317126 test_loss: 0.09438256621360779
epoch: 146 training_loss 0.08634293731302023 test_loss: 0.0790608286857605
epoch: 147 training_loss 0.07586079360917211 test_loss: 0.10686140060424805
epoch: 148 training_loss 0.08167024848982692 test_loss: 0.09140378832817078
epoch: 149 training_loss 0.075435830950737 test_loss: 0.10170432329177856
episode: 0 training return: -864.4097470840937
episode: 1 training return: -799.5882495550219
episode: 2 training return: -838.4551328394416
episode: 3 training return: -824.6343100033881
epoch: 1 test_true_pfm: -24.24288805302646 sim_pfm: -620.3439418366319
episode: 4 training return: -816.9337691141167
episode: 5 training return: -852.9145628644418
episode: 6 training return: -834.1755570449589
episode: 7 training return: -806.4156935422839
epoch: 2 test_true_pfm: 135.92836011487228 sim_pfm: -694.0469546950441
episode: 8 training return: -821.1011007626032
episode: 9 training return: -833.4950630729577
episode: 10 training return: -831.7344920515517
episode: 11 training return: -662.0435278647485
epoch: 3 test_true_pfm: 113.58256399382314 sim_pfm: -589.6408444913296
episode: 12 training return: -621.254138097509
episode: 13 training return: -841.4826992413632
episode: 14 training return: -827.2519654174514
episode: 15 training return: -855.5203159148404
epoch: 4 test_true_pfm: -120.21991922305186 sim_pfm: -612.590355272408
episode: 16 training return: -783.1095626399942
episode: 17 training return: -791.9142578201042
episode: 18 training return: -844.2419901273395
episode: 19 training return: -765.188701347922
epoch: 5 test_true_pfm: 58.0456286142035 sim_pfm: -517.12951937407
episode: 20 training return: -665.5248318115105
episode: 21 training return: -805.8383388742034
episode: 22 training return: -779.1137117385389
episode: 23 training return: -725.8664131924222
epoch: 6 test_true_pfm: 22.159304860997114 sim_pfm: -599.3356141734324
episode: 24 training return: -701.3138148810566
episode: 25 training return: -768.6777529810724
episode: 26 training return: -823.5100225727065
episode: 27 training return: -848.5300278565225
epoch: 7 test_true_pfm: 117.35422205383492 sim_pfm: -861.3713150485297
episode: 28 training return: -793.0749291572545
episode: 29 training return: -716.8762058611984
episode: 30 training return: -619.2997578394571
episode: 31 training return: -761.8692647391646
epoch: 8 test_true_pfm: 45.45386029435728 sim_pfm: -535.3670213127358
episode: 32 training return: -715.5187174082268
episode: 33 training return: -850.0456824109187
episode: 34 training return: -583.8689621139073
episode: 35 training return: -576.4924115217588
epoch: 9 test_true_pfm: 18.89766249821597 sim_pfm: -529.6414542287174
episode: 36 training return: -606.9134227986771
episode: 37 training return: -648.5234002427569
episode: 38 training return: -579.2339052570848
episode: 39 training return: -544.6227882491861
epoch: 10 test_true_pfm: 92.10197352348742 sim_pfm: -488.134176408873
episode: 40 training return: -674.0479775569977
episode: 41 training return: -536.9386736482987
episode: 42 training return: -588.1130457519072
episode: 43 training return: -733.1588515598579
epoch: 11 test_true_pfm: 161.237130887752 sim_pfm: -427.4663579861147
episode: 44 training return: -515.950690440812
episode: 45 training return: -582.1592341966017
episode: 46 training return: -527.2302321119564
episode: 47 training return: -576.6307446750054
epoch: 12 test_true_pfm: 169.04204834799432 sim_pfm: -479.321529747449
episode: 48 training return: -630.7368694721016
episode: 49 training return: -531.1340237248268
episode: 50 training return: -528.5209637065504
episode: 51 training return: -539.7014399870684
epoch: 13 test_true_pfm: 226.53961360581238 sim_pfm: -491.3896930374087
episode: 52 training return: -561.9642272206372
episode: 53 training return: -536.243480647254
episode: 54 training return: -501.33766393288425
episode: 55 training return: -535.8191449404609
epoch: 14 test_true_pfm: 290.2501168497601 sim_pfm: -470.98914942073225
episode: 56 training return: -559.6628407138893
episode: 57 training return: -513.5417639750954
episode: 58 training return: -533.8684457506806
episode: 59 training return: -523.4734370330583
epoch: 15 test_true_pfm: 296.1732144720818 sim_pfm: -429.44242542473256
episode: 60 training return: -475.14816522128484
episode: 61 training return: -568.6541763091684
episode: 62 training return: -498.9746215050832
episode: 63 training return: -577.7681408677962
epoch: 16 test_true_pfm: 398.75003610623304 sim_pfm: -449.67289579507616
episode: 64 training return: -614.3895943920504
episode: 65 training return: -539.4400633844249
episode: 66 training return: -581.5401600693019
episode: 67 training return: -529.0298908455305
epoch: 17 test_true_pfm: 363.4676207637514 sim_pfm: -427.104509427237
episode: 68 training return: -534.0854525464126
episode: 69 training return: -579.231560303656
episode: 70 training return: -528.5118071887717
episode: 71 training return: -537.9750384563762
epoch: 18 test_true_pfm: 434.00677413423097 sim_pfm: -428.6240418813884
episode: 72 training return: -538.5089108785473
episode: 73 training return: -511.6855862621836
episode: 74 training return: -590.9013738644406
episode: 75 training return: -544.4947166336245
epoch: 19 test_true_pfm: 340.74849143579326 sim_pfm: -423.31181345272216
episode: 76 training return: -541.3011458683159
episode: 77 training return: -506.9859197582506
episode: 78 training return: -489.89480026953294
episode: 79 training return: -535.9248032389419
epoch: 20 test_true_pfm: 142.52392726970857 sim_pfm: -471.5373047852956
episode: 80 training return: -491.22772103708485
episode: 81 training return: -567.25817674961
episode: 82 training return: -556.890187702374
episode: 83 training return: -553.2808849839694
epoch: 21 test_true_pfm: 309.72389757620255 sim_pfm: -445.980394768613
episode: 84 training return: -507.3300707274987
episode: 85 training return: -529.9175965614647
episode: 86 training return: -528.970061784938
episode: 87 training return: -562.3188066622967
epoch: 22 test_true_pfm: 173.8349502705306 sim_pfm: -436.87647174225685
episode: 88 training return: -511.44035804874807
episode: 89 training return: -586.8684570993895
episode: 90 training return: -531.1117376428513
episode: 91 training return: -489.6407246304237
epoch: 23 test_true_pfm: 175.04808869405372 sim_pfm: -432.73116281241875
episode: 92 training return: -490.9361174220059
episode: 93 training return: -530.4870357766914
episode: 94 training return: -494.7297732843229
episode: 95 training return: -439.1380205612715
epoch: 24 test_true_pfm: 323.7303967963028 sim_pfm: -404.88942457189637
episode: 96 training return: -653.3597176843847
episode: 97 training return: -478.06069193855586
episode: 98 training return: -481.9015523079206
episode: 99 training return: -494.11527398271534
epoch: 25 test_true_pfm: 467.6159486165615 sim_pfm: -393.0098542254719
episode: 100 training return: -489.95545322199223
episode: 101 training return: -471.76890760290877
episode: 102 training return: -480.2166758268075
episode: 103 training return: -485.9123728504135
epoch: 26 test_true_pfm: 382.6246144809856 sim_pfm: -401.6570270919927
episode: 104 training return: -479.18396175261444
episode: 105 training return: -437.68102970457
episode: 106 training return: -488.7666863886476
episode: 107 training return: -475.0103963858962
epoch: 27 test_true_pfm: 413.5001793332977 sim_pfm: -400.0193010946039
episode: 108 training return: -475.78754976087345
episode: 109 training return: -473.3870036592636
episode: 110 training return: -487.156347466465
episode: 111 training return: -468.5906694230456
epoch: 28 test_true_pfm: 457.9409541589716 sim_pfm: -380.8791577963427
episode: 112 training return: -517.3102897456954
episode: 113 training return: -495.70033486529644
episode: 114 training return: -457.2732151332098
episode: 115 training return: -495.0927497333731
epoch: 29 test_true_pfm: 186.91164255332214 sim_pfm: -411.16568196490834
episode: 116 training return: -540.3929680539947
episode: 117 training return: -450.32690481233357
episode: 118 training return: -468.9954950535821
episode: 119 training return: -513.3669359284858
epoch: 30 test_true_pfm: 377.1283262194145 sim_pfm: -389.35227409451636
episode: 120 training return: -450.6134543261604
episode: 121 training return: -517.5786855331684
episode: 122 training return: -473.5054347400579
episode: 123 training return: -568.5513012245397
epoch: 31 test_true_pfm: 353.0425723841495 sim_pfm: -426.05491773674294
episode: 124 training return: -439.79443437148007
episode: 125 training return: -485.73186025611005
episode: 126 training return: -524.2678728322111
episode: 127 training return: -455.43121255906686
epoch: 32 test_true_pfm: 277.88944631203805 sim_pfm: -383.2841834522566
episode: 128 training return: -474.1003803797908
episode: 129 training return: -457.50826694572595
episode: 130 training return: -569.1751308583046
episode: 131 training return: -510.4704496503792
epoch: 33 test_true_pfm: 408.30816887863074 sim_pfm: -387.71222393045815
episode: 132 training return: -473.7667113044619
episode: 133 training return: -500.4568296175191
episode: 134 training return: -498.99405252790206
episode: 135 training return: -498.3008231675206
epoch: 34 test_true_pfm: 361.2518880333251 sim_pfm: -366.2895947894797
episode: 136 training return: -470.2750649508288
episode: 137 training return: -486.7107123629932
episode: 138 training return: -532.3797734936344
episode: 139 training return: -535.450157322877
epoch: 35 test_true_pfm: 451.52614109836094 sim_pfm: -406.85927105308787
episode: 140 training return: -514.3487238142473
episode: 141 training return: -538.2221402369053
episode: 142 training return: -463.14974786441314
episode: 143 training return: -568.8000457207539
epoch: 36 test_true_pfm: 487.8655540054745 sim_pfm: -406.0825358652857
episode: 144 training return: -681.4682339015743
episode: 145 training return: -519.8015172162454
episode: 146 training return: -515.0762414919915
episode: 147 training return: -503.74873531721846
epoch: 37 test_true_pfm: 376.03974001904663 sim_pfm: -367.748800920832
episode: 148 training return: -461.46160854109263
episode: 149 training return: -440.14217815021306
episode: 150 training return: -485.44389496254684
episode: 151 training return: -487.19961170996834
epoch: 38 test_true_pfm: 448.1639210525014 sim_pfm: -373.81011208301385
episode: 152 training return: -452.28075954064565
episode: 153 training return: -445.4915247445399
episode: 154 training return: -501.5131767484643
episode: 155 training return: -543.5646963738504
epoch: 39 test_true_pfm: 318.94617359531657 sim_pfm: -382.7670332490224
episode: 156 training return: -462.2300899920889
episode: 157 training return: -470.78783316009793
episode: 158 training return: -466.75608128029893
episode: 159 training return: -505.6789572507076
epoch: 40 test_true_pfm: 381.4390238792992 sim_pfm: -416.5651113719871
episode: 160 training return: -474.3792254715221
episode: 161 training return: -480.5514102260222
episode: 162 training return: -478.0368713928869
episode: 163 training return: -533.5545955381792
epoch: 41 test_true_pfm: 236.59901007515774 sim_pfm: -420.8263408941596
episode: 164 training return: -476.34111879681564
episode: 165 training return: -483.8717874104944
episode: 166 training return: -468.7578483563678
episode: 167 training return: -507.4403763563175
epoch: 42 test_true_pfm: 341.3490450303104 sim_pfm: -364.8857530875014
episode: 168 training return: -423.56854652005404
episode: 169 training return: -435.3208376659624
episode: 170 training return: -497.35156941936054
episode: 171 training return: -569.6977743658014
epoch: 43 test_true_pfm: 274.0546071064309 sim_pfm: -396.57195243188386
episode: 172 training return: -494.29162346773876
episode: 173 training return: -408.1031982716525
episode: 174 training return: -504.2179017911013
episode: 175 training return: -528.0215399105836
epoch: 44 test_true_pfm: 362.98543682368546 sim_pfm: -353.0035480790271
episode: 176 training return: -541.914334063366
episode: 177 training return: -498.66284049921614
episode: 178 training return: -467.95125151763773
episode: 179 training return: -461.2899065962293
epoch: 45 test_true_pfm: 188.17517546034193 sim_pfm: -395.2404079908738
episode: 180 training return: -455.583826520344
episode: 181 training return: -478.66711370584017
episode: 182 training return: -466.542010467646
episode: 183 training return: -460.3522516907862
epoch: 46 test_true_pfm: 206.5954390825888 sim_pfm: -412.80405680305665
episode: 184 training return: -448.69953102099964
episode: 185 training return: -471.1610624574354
episode: 186 training return: -452.1922774063657
episode: 187 training return: -462.94398018684797
epoch: 47 test_true_pfm: 284.54204910322863 sim_pfm: -422.12559078014687
episode: 188 training return: -485.0228335203948
episode: 189 training return: -487.8374279861773
episode: 190 training return: -465.62514453614614
episode: 191 training return: -543.0982111359646
epoch: 48 test_true_pfm: 436.8176510200065 sim_pfm: -390.6301603425457
episode: 192 training return: -482.1137021325556
episode: 193 training return: -481.44347959477886
episode: 194 training return: -465.8084206344998
episode: 195 training return: -475.57501609296804
epoch: 49 test_true_pfm: 476.70169756966783 sim_pfm: -373.4895040795609
episode: 196 training return: -507.4479291346533
episode: 197 training return: -491.76135589995994
episode: 198 training return: -489.2862254561958
episode: 199 training return: -472.48057100225304
epoch: 50 test_true_pfm: 370.9670754003869 sim_pfm: -376.6175212965035
episode: 200 training return: -479.0446395078904
episode: 201 training return: -460.70722054123087
episode: 202 training return: -481.59564769367773
episode: 203 training return: -466.18642544067853
epoch: 51 test_true_pfm: 335.340880725232 sim_pfm: -372.4130972493719
episode: 204 training return: -472.54597729795506
episode: 205 training return: -463.7106057671641
episode: 206 training return: -495.7613594244245
episode: 207 training return: -485.3680099801499
epoch: 52 test_true_pfm: 123.51995290661539 sim_pfm: -383.5054855415868
episode: 208 training return: -491.715413200162
episode: 209 training return: -448.85895184733556
episode: 210 training return: -465.6276096528307
episode: 211 training return: -492.2097743032926
epoch: 53 test_true_pfm: 298.37738742530695 sim_pfm: -381.26169936100285
episode: 212 training return: -453.86330035050435
episode: 213 training return: -454.5862897500462
episode: 214 training return: -489.90944542194177
episode: 215 training return: -470.0866775823478
epoch: 54 test_true_pfm: 379.98881039374106 sim_pfm: -386.9556963318633
episode: 216 training return: -473.32413932258123
episode: 217 training return: -514.4195661708304
episode: 218 training return: -458.3667894878424
episode: 219 training return: -463.8263128455033
epoch: 55 test_true_pfm: 450.947050385533 sim_pfm: -357.99432989909775
episode: 220 training return: -441.46739453878746
episode: 221 training return: -466.8921975476807
episode: 222 training return: -495.25977005578955
episode: 223 training return: -472.75205169480574
epoch: 56 test_true_pfm: 380.82518815228127 sim_pfm: -380.9661953314349
episode: 224 training return: -465.8717381852679
episode: 225 training return: -524.5860729149815
episode: 226 training return: -456.24782833143126
episode: 227 training return: -442.06984965266196
epoch: 57 test_true_pfm: 291.35076999421494 sim_pfm: -365.5188417761511
episode: 228 training return: -458.1304475357308
episode: 229 training return: -457.3992029093715
episode: 230 training return: -432.2981824280725
episode: 231 training return: -452.68451046088427
epoch: 58 test_true_pfm: 443.82602836088023 sim_pfm: -348.47247724566097
episode: 232 training return: -529.2171702212845
episode: 233 training return: -468.1211811528162
episode: 234 training return: -434.96466220007255
episode: 235 training return: -473.6249252329598
epoch: 59 test_true_pfm: 404.9608527106164 sim_pfm: -366.37474430994644
episode: 236 training return: -472.3616063914993
episode: 237 training return: -460.14039777287593
episode: 238 training return: -442.7494294488911
episode: 239 training return: -458.47337329145927
epoch: 60 test_true_pfm: 400.7403453811723 sim_pfm: -354.7790162876256
episode: 240 training return: -489.60103936989185
episode: 241 training return: -501.82085365172975
episode: 242 training return: -453.94174610500676
episode: 243 training return: -449.98542249675836
epoch: 61 test_true_pfm: 367.677444606416 sim_pfm: -371.05118819095077
episode: 244 training return: -432.59077984740037
episode: 245 training return: -479.69059337715987
episode: 246 training return: -489.7591386224791
episode: 247 training return: -480.2733123115608
epoch: 62 test_true_pfm: 410.17295980851054 sim_pfm: -357.3666917525465
episode: 248 training return: -455.1901151523368
episode: 249 training return: -476.73533734526404
episode: 250 training return: -490.33033030328755
episode: 251 training return: -469.8543597104137
epoch: 63 test_true_pfm: 342.2838251509085 sim_pfm: -369.0792900580131
episode: 252 training return: -455.13227411887027
episode: 253 training return: -453.3415653276103
episode: 254 training return: -457.3413286895784
episode: 255 training return: -458.18469598084704
epoch: 64 test_true_pfm: 228.86562022912184 sim_pfm: -377.5390843253742
episode: 256 training return: -443.6344229386968
episode: 257 training return: -479.4874669792462
episode: 258 training return: -434.20529658718647
episode: 259 training return: -490.0053580592729
epoch: 65 test_true_pfm: 296.87093140206645 sim_pfm: -356.7277685136546
episode: 260 training return: -491.1005862311395
episode: 261 training return: -454.9376598251069
episode: 262 training return: -496.5173917263355
episode: 263 training return: -503.0615055087684
epoch: 66 test_true_pfm: 368.52705171303 sim_pfm: -362.22272157253354
episode: 264 training return: -454.6397349679129
episode: 265 training return: -467.37028460144313
episode: 266 training return: -516.144033893772
episode: 267 training return: -432.10014423412775
epoch: 67 test_true_pfm: 435.43952001499 sim_pfm: -364.1210501717515
episode: 268 training return: -483.1942843570387
episode: 269 training return: -478.29209461460636
episode: 270 training return: -473.61040516908463
episode: 271 training return: -467.3573370801649
epoch: 68 test_true_pfm: 371.89524722984225 sim_pfm: -375.5845973796781
episode: 272 training return: -504.6165081379192
episode: 273 training return: -421.4316072071159
episode: 274 training return: -450.02551792097404
episode: 275 training return: -467.97909842864556
epoch: 69 test_true_pfm: 351.36896500665034 sim_pfm: -367.3452371714802
episode: 276 training return: -479.26226112239124
episode: 277 training return: -441.58694899709377
episode: 278 training return: -429.36861325698345
episode: 279 training return: -446.05683636012617
epoch: 70 test_true_pfm: 552.3050471082091 sim_pfm: -381.7140445258439
episode: 280 training return: -464.8059952740356
episode: 281 training return: -443.5316122325933
episode: 282 training return: -468.4534567123507
episode: 283 training return: -491.9253394491242
epoch: 71 test_true_pfm: 580.1283409049524 sim_pfm: -338.91154366829693
episode: 284 training return: -859.3891879069741
episode: 285 training return: -450.756313772532
episode: 286 training return: -400.9419750041135
episode: 287 training return: -410.05946049526113
epoch: 72 test_true_pfm: 283.9068421935649 sim_pfm: -370.4765922846961
episode: 288 training return: -436.27852833784783
episode: 289 training return: -428.348664207868
episode: 290 training return: -482.103185986943
episode: 291 training return: -444.16299388281766
epoch: 73 test_true_pfm: 471.9780453566447 sim_pfm: -328.10019977296264
episode: 292 training return: -448.4823713628096
episode: 293 training return: -459.78608553833305
episode: 294 training return: -485.0771236320897
episode: 295 training return: -461.0629871438222
epoch: 74 test_true_pfm: 438.024384229354 sim_pfm: -368.8517810186449
episode: 296 training return: -497.1565279487232
episode: 297 training return: -483.93495756246585
episode: 298 training return: -543.6210021946893
episode: 299 training return: -472.03625959671854
epoch: 75 test_true_pfm: 575.8729784038597 sim_pfm: -334.7768079197155
episode: 300 training return: -449.8784076110187
episode: 301 training return: -441.3821268631279
episode: 302 training return: -445.93084162447576
episode: 303 training return: -445.7437851014558
epoch: 76 test_true_pfm: 364.86284658254385 sim_pfm: -393.59971147277594
episode: 304 training return: -439.2866501174588
episode: 305 training return: -460.8368256476749
episode: 306 training return: -508.925803613334
episode: 307 training return: -469.6398296040415
epoch: 77 test_true_pfm: 415.5768798578124 sim_pfm: -377.8084430179969
episode: 308 training return: -454.3244782788711
episode: 309 training return: -435.747506926187
episode: 310 training return: -433.9904783235199
episode: 311 training return: -392.6853431276535
epoch: 78 test_true_pfm: 533.0089002202467 sim_pfm: -357.5893921759104
episode: 312 training return: -547.7900578240906
episode: 313 training return: -448.4141088846379
episode: 314 training return: -438.75334918826866
episode: 315 training return: -412.24856893050907
epoch: 79 test_true_pfm: 557.6370597344171 sim_pfm: -359.1509511367869
episode: 316 training return: -441.2878494921662
episode: 317 training return: -466.9631164532886
episode: 318 training return: -442.83933730638955
episode: 319 training return: -442.0664389117889
epoch: 80 test_true_pfm: 428.1550460799658 sim_pfm: -328.77197183123366
episode: 320 training return: -455.0323922260253
episode: 321 training return: -502.85688626216455
episode: 322 training return: -447.811784364626
episode: 323 training return: -444.31962835859815
epoch: 81 test_true_pfm: 556.8773408053845 sim_pfm: -345.5812150788615
episode: 324 training return: -435.1865695543817
episode: 325 training return: -439.6123384141766
episode: 326 training return: -454.6574978017825
episode: 327 training return: -463.5507500355695
epoch: 82 test_true_pfm: 557.3800844992883 sim_pfm: -332.79695170747755
episode: 328 training return: -463.71637216825985
episode: 329 training return: -457.0299478298445
episode: 330 training return: -443.12882602241
episode: 331 training return: -456.49314293484184
epoch: 83 test_true_pfm: 492.76575036713257 sim_pfm: -360.5434301556057
episode: 332 training return: -445.19228648949587
episode: 333 training return: -434.9067777450225
episode: 334 training return: -455.79763758613984
episode: 335 training return: -407.79447587176765
epoch: 84 test_true_pfm: 544.1079835668991 sim_pfm: -349.83250513490276
episode: 336 training return: -471.4281197394128
episode: 337 training return: -454.3738022477108
episode: 338 training return: -437.8668068076436
episode: 339 training return: -491.74136906953595
epoch: 85 test_true_pfm: 658.5399916796888 sim_pfm: -334.7010106627536
episode: 340 training return: -439.36758379365244
episode: 341 training return: -475.09633073018955
episode: 342 training return: -426.66310158110116
episode: 343 training return: -474.5374614976503
epoch: 86 test_true_pfm: 574.5204644333943 sim_pfm: -312.8591017062155
episode: 344 training return: -469.5011204306695
episode: 345 training return: -438.8126120092136
episode: 346 training return: -458.96669381980524
episode: 347 training return: -521.7953608450612
epoch: 87 test_true_pfm: 509.57298198605827 sim_pfm: -327.1527355085325
episode: 348 training return: -503.56699966473036
episode: 349 training return: -409.4028566899908
episode: 350 training return: -451.4892078080318
episode: 351 training return: -469.03557586105654
epoch: 88 test_true_pfm: 531.5914720075929 sim_pfm: -337.7671629882226
episode: 352 training return: -438.91068374814165
episode: 353 training return: -436.4215550656146
episode: 354 training return: -452.4893142048227
episode: 355 training return: -458.63165269773754
epoch: 89 test_true_pfm: 366.8096332590494 sim_pfm: -358.8967076281411
episode: 356 training return: -514.7412784744077
episode: 357 training return: -436.70001815619014
episode: 358 training return: -425.88174119488855
episode: 359 training return: -441.28016327739124
epoch: 90 test_true_pfm: 285.8004497830381 sim_pfm: -373.9728176143469
episode: 360 training return: -441.92787455113955
episode: 361 training return: -430.4190833289924
episode: 362 training return: -490.97610331267634
episode: 363 training return: -449.99724415621904
epoch: 91 test_true_pfm: 499.3060509394618 sim_pfm: -332.2742646156847
episode: 364 training return: -416.0343536768582
episode: 365 training return: -456.4500015872012
episode: 366 training return: -438.5610625006888
episode: 367 training return: -469.70144346433517
epoch: 92 test_true_pfm: 490.67145402398 sim_pfm: -341.7482514136093
episode: 368 training return: -477.6338678848707
episode: 369 training return: -444.7334405488088
episode: 370 training return: -453.80124534856634
episode: 371 training return: -469.8318483308244
epoch: 93 test_true_pfm: 478.13435102031025 sim_pfm: -356.86020667483126
episode: 372 training return: -402.71963933744473
episode: 373 training return: -510.9918684760771
episode: 374 training return: -424.06563097755054
episode: 375 training return: -424.71054508102776
epoch: 94 test_true_pfm: 351.310619290057 sim_pfm: -349.10293046419565
episode: 376 training return: -472.23153983853683
episode: 377 training return: -425.26060826252467
episode: 378 training return: -477.09587868412194
episode: 379 training return: -466.96901200649535
epoch: 95 test_true_pfm: 408.0482242580901 sim_pfm: -362.3241791822745
episode: 380 training return: -459.9622317943658
episode: 381 training return: -449.93995106433835
episode: 382 training return: -439.2585329880958
episode: 383 training return: -445.8570445352832
epoch: 96 test_true_pfm: 601.4652317209053 sim_pfm: -342.50256872817545
episode: 384 training return: -443.85226035808853
episode: 385 training return: -473.69213226397517
episode: 386 training return: -443.23356295075274
episode: 387 training return: -466.7285180855825
epoch: 97 test_true_pfm: 454.7442377249595 sim_pfm: -345.50570287938723
episode: 388 training return: -464.44097385696597
episode: 389 training return: -405.0118083100976
episode: 390 training return: -495.0758623592526
episode: 391 training return: -423.4968917696845
epoch: 98 test_true_pfm: 504.7541620060145 sim_pfm: -352.1500397629573
episode: 392 training return: -447.52430300781646
episode: 393 training return: -433.56376547945104
episode: 394 training return: -453.3718930471865
episode: 395 training return: -456.88913489614805
epoch: 99 test_true_pfm: 484.4972737751846 sim_pfm: -360.7756983747906
episode: 396 training return: -484.7223622452568
episode: 397 training return: -431.62637453855865
episode: 398 training return: -446.1095090261972
episode: 399 training return: -459.7535005914674
epoch: 100 test_true_pfm: 474.14861995061847 sim_pfm: -343.69149125058203
episode: 400 training return: -421.93652591922336
episode: 401 training return: -448.9092986902569
episode: 402 training return: -445.8009485069813
episode: 403 training return: -478.099238584322
epoch: 101 test_true_pfm: 360.367340311153 sim_pfm: -349.0837361916372
episode: 404 training return: -450.32821836048913
episode: 405 training return: -420.63755528892233
episode: 406 training return: -479.1168809654199
episode: 407 training return: -503.91118004654487
epoch: 102 test_true_pfm: 340.385595244753 sim_pfm: -346.69166956640737
episode: 408 training return: -438.2558931200315
episode: 409 training return: -450.87653583286306
episode: 410 training return: -426.24376441942474
episode: 411 training return: -449.8867373820488
epoch: 103 test_true_pfm: 443.63698398150876 sim_pfm: -350.1580691470743
episode: 412 training return: -452.37249387506165
episode: 413 training return: -451.4458120191269
episode: 414 training return: -460.81018167039275
episode: 415 training return: -444.1413001924167
epoch: 104 test_true_pfm: 684.6856902022997 sim_pfm: -335.0905031854749
episode: 416 training return: -427.72324275608537
episode: 417 training return: -431.0159143879773
episode: 418 training return: -427.81827068501207
episode: 419 training return: -445.9836776355586
epoch: 105 test_true_pfm: 432.9658250011812 sim_pfm: -350.9178708428863
episode: 420 training return: -448.11074941384555
episode: 421 training return: -431.02684367870006
episode: 422 training return: -459.9594930413847
episode: 423 training return: -446.9731765300477
epoch: 106 test_true_pfm: 511.92566762183424 sim_pfm: -328.66861401443686
episode: 424 training return: -435.59585079353786
episode: 425 training return: -423.7795017792871
episode: 426 training return: -462.8010607575814
episode: 427 training return: -445.7984198837573
epoch: 107 test_true_pfm: 572.3835538601296 sim_pfm: -347.23499277665906
episode: 428 training return: -448.5931816103318
episode: 429 training return: -501.50971943648807
episode: 430 training return: -428.3646724641813
episode: 431 training return: -424.9916277410363
epoch: 108 test_true_pfm: 609.8119525198562 sim_pfm: -340.391373770326
episode: 432 training return: -445.78174353745277
episode: 433 training return: -474.3368221954087
episode: 434 training return: -465.01600945894444
episode: 435 training return: -422.3973483485411
epoch: 109 test_true_pfm: 514.7663859968269 sim_pfm: -360.09423321099257
episode: 436 training return: -459.3070483869838
episode: 437 training return: -451.0551625267396
episode: 438 training return: -461.2373448650943
episode: 439 training return: -416.26949701650943
epoch: 110 test_true_pfm: 652.8920856099388 sim_pfm: -330.57243990776436
episode: 440 training return: -441.1713519280259
episode: 441 training return: -464.9057314811648
episode: 442 training return: -449.59927461171156
episode: 443 training return: -516.9892354395531
epoch: 111 test_true_pfm: 379.4232373998407 sim_pfm: -392.2029980320993
episode: 444 training return: -396.8466387277735
episode: 445 training return: -465.9922422472996
episode: 446 training return: -443.76476243207424
episode: 447 training return: -417.7079756189027
epoch: 112 test_true_pfm: 524.7653048310485 sim_pfm: -336.53902241803615
episode: 448 training return: -434.9025180721487
episode: 449 training return: -415.7615761039269
episode: 450 training return: -444.0105773029233
episode: 451 training return: -459.95396152122737
epoch: 113 test_true_pfm: 440.20439849952874 sim_pfm: -320.13313759378974
episode: 452 training return: -418.4206845834772
episode: 453 training return: -462.0740460810812
episode: 454 training return: -411.71362751060366
episode: 455 training return: -398.3893481326261
epoch: 114 test_true_pfm: 522.2571799820212 sim_pfm: -313.8060144875513
episode: 456 training return: -459.77215589589235
episode: 457 training return: -399.64557663452933
episode: 458 training return: -448.7138939226775
episode: 459 training return: -451.61897307052567
epoch: 115 test_true_pfm: 372.8202830466723 sim_pfm: -333.649747081773
episode: 460 training return: -424.7435303245945
episode: 461 training return: -476.41856084066967
episode: 462 training return: -526.5152151382306
episode: 463 training return: -455.7572666785796
epoch: 116 test_true_pfm: 465.8280650468693 sim_pfm: -323.27021216222994
episode: 464 training return: -447.22053284474185
episode: 465 training return: -403.1138540680118
episode: 466 training return: -424.55567475801655
episode: 467 training return: -440.23810159777867
epoch: 117 test_true_pfm: 488.5783918705488 sim_pfm: -356.16545733206334
episode: 468 training return: -421.2009888456703
episode: 469 training return: -455.39273795008796
episode: 470 training return: -445.6823840555774
episode: 471 training return: -480.4861903166913
epoch: 118 test_true_pfm: 491.1407140025854 sim_pfm: -327.91667688409905
episode: 472 training return: -487.85514492170444
episode: 473 training return: -420.66468054955016
episode: 474 training return: -467.50670969038316
episode: 475 training return: -508.70000681884795
epoch: 119 test_true_pfm: 570.8811440083671 sim_pfm: -324.90004196877044
episode: 476 training return: -463.0280749238721
episode: 477 training return: -448.5476152352632
episode: 478 training return: -432.83037115067617
episode: 479 training return: -472.2859101156683
epoch: 120 test_true_pfm: 527.5643425189514 sim_pfm: -364.5934102052509
episode: 480 training return: -448.5775231274255
episode: 481 training return: -420.0998668555807
episode: 482 training return: -465.10187055501973
episode: 483 training return: -471.3887461234759
epoch: 121 test_true_pfm: 483.8330784846012 sim_pfm: -334.47034577578097
episode: 484 training return: -471.94691530888707
episode: 485 training return: -469.06763007984574
episode: 486 training return: -429.80573590432266
episode: 487 training return: -478.1948921461578
epoch: 122 test_true_pfm: 646.885728936889 sim_pfm: -342.26500141496496
episode: 488 training return: -412.38634871597804
episode: 489 training return: -416.6508844879148
episode: 490 training return: -441.0812736760961
episode: 491 training return: -462.7775305112747
epoch: 123 test_true_pfm: 579.86755860236 sim_pfm: -345.4519904587143
episode: 492 training return: -449.0582785180668
episode: 493 training return: -435.32014355495295
episode: 494 training return: -424.4245228030218
episode: 495 training return: -407.18879856736595
epoch: 124 test_true_pfm: 601.1592663294949 sim_pfm: -326.82976185931904
episode: 496 training return: -426.45274999161177
episode: 497 training return: -423.61173706418987
episode: 498 training return: -404.8483559318255
episode: 499 training return: -449.63437835127354
epoch: 125 test_true_pfm: 580.4753369290241 sim_pfm: -367.15383814888173
episode: 500 training return: -442.67277783467324
episode: 501 training return: -481.9749283379015
episode: 502 training return: -436.7112476989376
episode: 503 training return: -442.45555716399974
epoch: 126 test_true_pfm: 545.8870962348914 sim_pfm: -332.33942735193506
episode: 504 training return: -436.1622281330109
episode: 505 training return: -427.251270985985
episode: 506 training return: -515.1717438109014
episode: 507 training return: -522.5002330712846
epoch: 127 test_true_pfm: 714.9430243696346 sim_pfm: -330.87931917409975
episode: 508 training return: -478.6190508995336
episode: 509 training return: -456.27129495849755
episode: 510 training return: -447.8103928510847
episode: 511 training return: -449.2598294426762
epoch: 128 test_true_pfm: 560.8287216634828 sim_pfm: -344.30942272683177
episode: 512 training return: -439.6416733597584
episode: 513 training return: -432.27740851235734
episode: 514 training return: -471.49611868635986
episode: 515 training return: -471.38113971910883
epoch: 129 test_true_pfm: 612.4268098773307 sim_pfm: -314.4241981292351
episode: 516 training return: -414.0039415792805
episode: 517 training return: -470.8691113005009
episode: 518 training return: -446.86817115913857
episode: 519 training return: -447.9805189832988
epoch: 130 test_true_pfm: 641.2457245458683 sim_pfm: -340.5477945495358
episode: 520 training return: -451.3730906706387
episode: 521 training return: -457.69074705896645
episode: 522 training return: -457.2569199138623
episode: 523 training return: -462.72984873853915
epoch: 131 test_true_pfm: 553.3693998453145 sim_pfm: -343.28086062095235
episode: 524 training return: -473.68620552229333
episode: 525 training return: -460.74037354253534
episode: 526 training return: -428.9743298819493
episode: 527 training return: -464.13000922304843
epoch: 132 test_true_pfm: 567.5463236562307 sim_pfm: -347.2762636812404
episode: 528 training return: -428.1341393478977
episode: 529 training return: -475.3050817878768
episode: 530 training return: -446.06876207376496
episode: 531 training return: -443.83113594721675
epoch: 133 test_true_pfm: 522.2789599085622 sim_pfm: -332.127255012393
episode: 532 training return: -429.41921444741257
episode: 533 training return: -467.43945249148055
episode: 534 training return: -441.1396942271555
episode: 535 training return: -446.4958015089322
epoch: 134 test_true_pfm: 509.7818115864657 sim_pfm: -348.71385588334834
episode: 536 training return: -443.25821626396856
episode: 537 training return: -449.93836634851385
episode: 538 training return: -449.8362262585227
episode: 539 training return: -433.9474392846478
epoch: 135 test_true_pfm: 524.1518737048287 sim_pfm: -385.2163348950821
episode: 540 training return: -452.6714384851195
episode: 541 training return: -468.10513007751854
episode: 542 training return: -456.30546539089585
episode: 543 training return: -438.4897363910846
epoch: 136 test_true_pfm: 524.0717711748574 sim_pfm: -353.7640377186972
episode: 544 training return: -441.9853688989564
episode: 545 training return: -432.98601514292733
episode: 546 training return: -509.0819013477722
episode: 547 training return: -439.3678298669673
epoch: 137 test_true_pfm: 635.2793821296262 sim_pfm: -342.39904128689085
episode: 548 training return: -484.1020226702718
episode: 549 training return: -423.765786790063
episode: 550 training return: -421.7981680365515
episode: 551 training return: -436.5211014097383
epoch: 138 test_true_pfm: 606.9601353779458 sim_pfm: -317.31653775096856
episode: 552 training return: -431.1793220972196
episode: 553 training return: -473.4600946684934
episode: 554 training return: -457.5823980590782
episode: 555 training return: -396.40349780924413
epoch: 139 test_true_pfm: 545.4196425735536 sim_pfm: -328.2438723875895
episode: 556 training return: -406.740606695495
episode: 557 training return: -432.5295084946444
episode: 558 training return: -432.2454005703613
episode: 559 training return: -412.90240541481205
epoch: 140 test_true_pfm: 552.9079812353078 sim_pfm: -330.5350795682391
episode: 560 training return: -440.9175843345832
episode: 561 training return: -525.1617002092664
episode: 562 training return: -438.11679446891145
episode: 563 training return: -496.57809386293223
epoch: 141 test_true_pfm: 583.6771517727034 sim_pfm: -339.8389564536963
episode: 564 training return: -452.8568491571218
episode: 565 training return: -439.08458986324
episode: 566 training return: -411.24168720256085
episode: 567 training return: -435.4351932855422
epoch: 142 test_true_pfm: 554.9351741400774 sim_pfm: -351.1499163850705
episode: 568 training return: -426.81747879859205
episode: 569 training return: -465.1317720969056
episode: 570 training return: -466.0559248795021
episode: 571 training return: -447.7869902289828
epoch: 143 test_true_pfm: 422.8222974550931 sim_pfm: -357.30229748011556
episode: 572 training return: -457.48416376101517
episode: 573 training return: -473.7881664605896
episode: 574 training return: -435.24227821669774
episode: 575 training return: -390.3882827377064
epoch: 144 test_true_pfm: 636.1056513646478 sim_pfm: -339.1387637190289
episode: 576 training return: -484.17160641486765
episode: 577 training return: -485.2524216385626
episode: 578 training return: -465.21295401914716
episode: 579 training return: -442.69376810680967
epoch: 145 test_true_pfm: 585.8503906276056 sim_pfm: -331.64295932547293
episode: 580 training return: -467.8689238353055
episode: 581 training return: -422.61639172190195
episode: 582 training return: -464.8249977401476
episode: 583 training return: -415.2326475924844
epoch: 146 test_true_pfm: 699.684545987736 sim_pfm: -312.55410692477903
episode: 584 training return: -425.6602745453098
episode: 585 training return: -432.42727127314834
episode: 586 training return: -450.15039608483096
episode: 587 training return: -433.24743631310173
epoch: 147 test_true_pfm: 618.4923370329496 sim_pfm: -343.79773000857375
episode: 588 training return: -465.1326126835353
episode: 589 training return: -470.3495058654337
episode: 590 training return: -413.9554712455238
episode: 591 training return: -467.2315687567657
epoch: 148 test_true_pfm: 613.1639966620179 sim_pfm: -317.57709399451556
episode: 592 training return: -456.85308963054945
episode: 593 training return: -438.3625320554157
episode: 594 training return: -478.2616305005354
episode: 595 training return: -465.1835068509121
epoch: 149 test_true_pfm: 515.8291300986021 sim_pfm: -351.5534094131319
episode: 596 training return: -440.19650116967546
episode: 597 training return: -405.4310066867947
episode: 598 training return: -487.91900213177064
episode: 599 training return: -448.5133806301319
epoch: 150 test_true_pfm: 720.2815051358634 sim_pfm: -322.27151308426846
