['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'uncertainty', '--traj', 'medium', '--seed', '1', '--data', '100000']
epoch: 0 training_loss 0.2570549046248198 test_loss: 0.17639384269714356
epoch: 1 training_loss 0.16015968449413776 test_loss: 0.1459348678588867
epoch: 2 training_loss 0.1396332125365734 test_loss: 0.12243885993957519
epoch: 3 training_loss 0.12618770487606526 test_loss: 0.1515972137451172
epoch: 4 training_loss 0.1298751337453723 test_loss: 0.12505172491073607
epoch: 5 training_loss 0.1170486918464303 test_loss: 0.11405365467071533
epoch: 6 training_loss 0.12262546360492706 test_loss: 0.13295116424560546
epoch: 7 training_loss 0.11166479166597128 test_loss: 0.11071487665176391
epoch: 8 training_loss 0.11453776387497783 test_loss: 0.10675663948059082
epoch: 9 training_loss 0.1097674936056137 test_loss: 0.11432939767837524
epoch: 10 training_loss 0.10933642018586397 test_loss: 0.11056101322174072
epoch: 11 training_loss 0.10000190187245607 test_loss: 0.1026503562927246
epoch: 12 training_loss 0.10729280084371567 test_loss: 0.1305164098739624
epoch: 13 training_loss 0.1063227784819901 test_loss: 0.09757746458053589
epoch: 14 training_loss 0.11006151061505079 test_loss: 0.09703235626220703
epoch: 15 training_loss 0.10206398529931902 test_loss: 0.10587648153305054
epoch: 16 training_loss 0.10548758495599031 test_loss: 0.10665332078933716
epoch: 17 training_loss 0.10388868188485503 test_loss: 0.10374605655670166
epoch: 18 training_loss 0.09876581696793436 test_loss: 0.09973304271697998
epoch: 19 training_loss 0.1029695369489491 test_loss: 0.09955917000770569
epoch: 20 training_loss 0.11063729904592037 test_loss: 0.11964113712310791
epoch: 21 training_loss 0.0959383056499064 test_loss: 0.1290486454963684
epoch: 22 training_loss 0.09878385104238988 test_loss: 0.10060690641403199
epoch: 23 training_loss 0.09744828132912517 test_loss: 0.11377297639846802
epoch: 24 training_loss 0.09102368144318461 test_loss: 0.10588409900665283
epoch: 25 training_loss 0.09839442890137434 test_loss: 0.09618512392044068
epoch: 26 training_loss 0.09048535604029893 test_loss: 0.09596959948539734
epoch: 27 training_loss 0.09874745031818748 test_loss: 0.0984329640865326
epoch: 28 training_loss 0.09468511437997222 test_loss: 0.12268462181091308
epoch: 29 training_loss 0.09123071245849132 test_loss: 0.10807809829711915
epoch: 30 training_loss 0.09444502651691437 test_loss: 0.1194269299507141
epoch: 31 training_loss 0.09386712681502103 test_loss: 0.091689532995224
epoch: 32 training_loss 0.09538673635572195 test_loss: 0.09895495772361755
epoch: 33 training_loss 0.0988487671688199 test_loss: 0.0929059386253357
epoch: 34 training_loss 0.08703772434964777 test_loss: 0.10385574102401733
epoch: 35 training_loss 0.09607669314369559 test_loss: 0.11733319759368896
epoch: 36 training_loss 0.09109818648546934 test_loss: 0.1114809274673462
epoch: 37 training_loss 0.09468596290796995 test_loss: 0.11601506471633911
epoch: 38 training_loss 0.0970066692493856 test_loss: 0.09916259050369262
epoch: 39 training_loss 0.09194296702742577 test_loss: 0.09625433087348938
epoch: 40 training_loss 0.09723953820765019 test_loss: 0.10354777574539184
epoch: 41 training_loss 0.08937152422964573 test_loss: 0.10540522336959839
epoch: 42 training_loss 0.09654331300407648 test_loss: 0.09615579843521119
epoch: 43 training_loss 0.08916835550218821 test_loss: 0.10147860050201415
epoch: 44 training_loss 0.09245354833081365 test_loss: 0.10209530591964722
epoch: 45 training_loss 0.08777426242828369 test_loss: 0.11060758829116821
epoch: 46 training_loss 0.09378247791901231 test_loss: 0.10061652660369873
epoch: 47 training_loss 0.0903564921952784 test_loss: 0.11310702562332153
epoch: 48 training_loss 0.09327403022907674 test_loss: 0.1089049220085144
epoch: 49 training_loss 0.0942372033558786 test_loss: 0.10509538650512695
epoch: 50 training_loss 0.09095380205661058 test_loss: 0.10207638740539551
epoch: 51 training_loss 0.0937022252753377 test_loss: 0.10315024852752686
epoch: 52 training_loss 0.09303422294557094 test_loss: 0.0956521213054657
epoch: 53 training_loss 0.09038158455863594 test_loss: 0.101239013671875
epoch: 54 training_loss 0.08860532624647022 test_loss: 0.09388071894645691
epoch: 55 training_loss 0.09758243963122368 test_loss: 0.1003633975982666
epoch: 56 training_loss 0.08183527747169136 test_loss: 0.10766620635986328
epoch: 57 training_loss 0.09121380308642983 test_loss: 0.09080892205238342
epoch: 58 training_loss 0.08721059024333953 test_loss: 0.0990452766418457
epoch: 59 training_loss 0.08693142084404826 test_loss: 0.09424130916595459
epoch: 60 training_loss 0.09447371857240797 test_loss: 0.10322339534759521
epoch: 61 training_loss 0.08863556755706668 test_loss: 0.10703222751617432
epoch: 62 training_loss 0.09201481256633998 test_loss: 0.11043533086776733
epoch: 63 training_loss 0.0856017979234457 test_loss: 0.09268786907196044
epoch: 64 training_loss 0.0897051491215825 test_loss: 0.1072997808456421
epoch: 65 training_loss 0.08580488355830312 test_loss: 0.09429667592048645
epoch: 66 training_loss 0.08430221400223673 test_loss: 0.08755595088005066
epoch: 67 training_loss 0.08534389236941933 test_loss: 0.11653573513031006
epoch: 68 training_loss 0.09372205823659897 test_loss: 0.09302988052368164
epoch: 69 training_loss 0.0918609081208706 test_loss: 0.09404987692832947
epoch: 70 training_loss 0.08595879901200533 test_loss: 0.10567617416381836
epoch: 71 training_loss 0.08925446277484297 test_loss: 0.09085583686828613
epoch: 72 training_loss 0.09497251508757472 test_loss: 0.09376368522644044
epoch: 73 training_loss 0.08414731319993735 test_loss: 0.10073533058166503
epoch: 74 training_loss 0.08900008834898472 test_loss: 0.10945580005645753
epoch: 75 training_loss 0.08620455447584391 test_loss: 0.09249738454818726
epoch: 76 training_loss 0.08669896433129906 test_loss: 0.09370180368423461
epoch: 77 training_loss 0.08459580792114138 test_loss: 0.09347811937332154
epoch: 78 training_loss 0.08402818236500025 test_loss: 0.09653925895690918
epoch: 79 training_loss 0.08409847030416132 test_loss: 0.10269170999526978
epoch: 80 training_loss 0.08867895996198058 test_loss: 0.092361319065094
epoch: 81 training_loss 0.08093667510896921 test_loss: 0.09050657749176025
epoch: 82 training_loss 0.08866287728771567 test_loss: 0.1076743483543396
epoch: 83 training_loss 0.08565794695168734 test_loss: 0.08390818238258362
epoch: 84 training_loss 0.08937855230644345 test_loss: 0.09869181513786315
epoch: 85 training_loss 0.08763623345643282 test_loss: 0.09972060322761536
epoch: 86 training_loss 0.08610893938690424 test_loss: 0.08967119455337524
epoch: 87 training_loss 0.08594548121094704 test_loss: 0.09526722431182862
epoch: 88 training_loss 0.083785805599764 test_loss: 0.10586535930633545
epoch: 89 training_loss 0.08807564470916987 test_loss: 0.10546811819076538
epoch: 90 training_loss 0.09042397411540151 test_loss: 0.10456489324569702
epoch: 91 training_loss 0.08864408174529671 test_loss: 0.1100204586982727
epoch: 92 training_loss 0.08494317047297954 test_loss: 0.10504924058914185
epoch: 93 training_loss 0.07736353965476156 test_loss: 0.1158551573753357
epoch: 94 training_loss 0.08406323967501521 test_loss: 0.09030618667602539
epoch: 95 training_loss 0.08075820624828339 test_loss: 0.10337330102920532
epoch: 96 training_loss 0.08442350648343563 test_loss: 0.09523832201957702
epoch: 97 training_loss 0.0861699078977108 test_loss: 0.09799298644065857
epoch: 98 training_loss 0.07979330588132143 test_loss: 0.0895251214504242
epoch: 99 training_loss 0.0851749630086124 test_loss: 0.09691976308822632
epoch: 100 training_loss 0.08119268061593175 test_loss: 0.10790090560913086
epoch: 101 training_loss 0.09055893090553582 test_loss: 0.1018599271774292
epoch: 102 training_loss 0.07518926527351141 test_loss: 0.10052543878555298
epoch: 103 training_loss 0.08029001168906688 test_loss: 0.09623224139213563
epoch: 104 training_loss 0.09014814408496022 test_loss: 0.08911610841751098
epoch: 105 training_loss 0.08606636073440313 test_loss: 0.09842302799224853
epoch: 106 training_loss 0.08043830005452036 test_loss: 0.09865521788597106
epoch: 107 training_loss 0.08964110463857651 test_loss: 0.10181654691696167
epoch: 108 training_loss 0.08769403098151088 test_loss: 0.09383135437965393
epoch: 109 training_loss 0.0831316214799881 test_loss: 0.09027109742164612
epoch: 110 training_loss 0.08533098053187133 test_loss: 0.11220604181289673
epoch: 111 training_loss 0.07138754207640886 test_loss: 0.0967073380947113
epoch: 112 training_loss 0.08189697727560998 test_loss: 0.09876497983932495
epoch: 113 training_loss 0.07867172243073582 test_loss: 0.0941409170627594
epoch: 114 training_loss 0.08250157337635755 test_loss: 0.11392712593078613
epoch: 115 training_loss 0.08273263679817319 test_loss: 0.12052261829376221
epoch: 116 training_loss 0.07503707066178322 test_loss: 0.08464196920394898
epoch: 117 training_loss 0.08456916758790613 test_loss: 0.11457387208938599
epoch: 118 training_loss 0.0796336804702878 test_loss: 0.09598770141601562
epoch: 119 training_loss 0.08042547835037112 test_loss: 0.09300398826599121
epoch: 120 training_loss 0.0773512295819819 test_loss: 0.10620043277740479
epoch: 121 training_loss 0.0831091351248324 test_loss: 0.11097888946533203
epoch: 122 training_loss 0.07956460814923048 test_loss: 0.08618253469467163
epoch: 123 training_loss 0.07941944636404515 test_loss: 0.11101008653640747
epoch: 124 training_loss 0.08152510661631823 test_loss: 0.11321415901184081
epoch: 125 training_loss 0.08347624154761434 test_loss: 0.09716255068778992
epoch: 126 training_loss 0.08424568493850529 test_loss: 0.08056654334068299
epoch: 127 training_loss 0.08167151363566517 test_loss: 0.10080907344818116
epoch: 128 training_loss 0.08604765294119715 test_loss: 0.10290905237197875
epoch: 129 training_loss 0.07507604479789734 test_loss: 0.09045605063438415
epoch: 130 training_loss 0.08025088274851441 test_loss: 0.10146361589431763
epoch: 131 training_loss 0.08543669966980814 test_loss: 0.11102882623672486
epoch: 132 training_loss 0.08525569152086973 test_loss: 0.09150500297546386
epoch: 133 training_loss 0.08691927460953593 test_loss: 0.11096543073654175
epoch: 134 training_loss 0.07507491268217564 test_loss: 0.08953561782836914
epoch: 135 training_loss 0.07932906245812774 test_loss: 0.10807340145111084
epoch: 136 training_loss 0.08397471019998193 test_loss: 0.09038090705871582
epoch: 137 training_loss 0.07589703194797039 test_loss: 0.08538312911987304
epoch: 138 training_loss 0.07770921936258673 test_loss: 0.12344889640808106
epoch: 139 training_loss 0.0833421716839075 test_loss: 0.09636662602424621
epoch: 140 training_loss 0.0736969358380884 test_loss: 0.09243760704994201
epoch: 141 training_loss 0.0826941598765552 test_loss: 0.1068889856338501
epoch: 142 training_loss 0.08325011774897575 test_loss: 0.08413822054862977
epoch: 143 training_loss 0.08111764620989562 test_loss: 0.11270493268966675
epoch: 144 training_loss 0.076462745629251 test_loss: 0.08102564215660095
epoch: 145 training_loss 0.08169479451142252 test_loss: 0.10494387149810791
epoch: 146 training_loss 0.0775662375241518 test_loss: 0.09256293177604676
epoch: 147 training_loss 0.07956849105656147 test_loss: 0.08505576848983765
epoch: 148 training_loss 0.07526538614183664 test_loss: 0.10717720985412597
epoch: 149 training_loss 0.08820833001285791 test_loss: 0.08867473602294922
epoch: 0 training_loss 0.25935675017535686 test_loss: 0.17280441522598267
epoch: 1 training_loss 0.15527863830327987 test_loss: 0.14687267541885377
epoch: 2 training_loss 0.13114372447133063 test_loss: 0.11906203031539916
epoch: 3 training_loss 0.12586529582738876 test_loss: 0.14620994329452514
epoch: 4 training_loss 0.11791481798514723 test_loss: 0.11936144828796387
epoch: 5 training_loss 0.10872016284614801 test_loss: 0.10699498653411865
epoch: 6 training_loss 0.11444208171218634 test_loss: 0.0963942527770996
epoch: 7 training_loss 0.10476686652749777 test_loss: 0.10883500576019287
epoch: 8 training_loss 0.11083475582301616 test_loss: 0.09186003208160401
epoch: 9 training_loss 0.10196369476616382 test_loss: 0.09006941914558411
epoch: 10 training_loss 0.10399029839783908 test_loss: 0.10554816722869872
epoch: 11 training_loss 0.10022983636707067 test_loss: 0.0954069435596466
epoch: 12 training_loss 0.10184775965288281 test_loss: 0.09905277490615845
epoch: 13 training_loss 0.0983107141032815 test_loss: 0.08407339453697205
epoch: 14 training_loss 0.09535724490880966 test_loss: 0.10115529298782348
epoch: 15 training_loss 0.09094138219952583 test_loss: 0.08342377543449402
epoch: 16 training_loss 0.09713512286543846 test_loss: 0.09046590328216553
epoch: 17 training_loss 0.10010574620217084 test_loss: 0.09293672442436218
epoch: 18 training_loss 0.09460395460948348 test_loss: 0.0881355345249176
epoch: 19 training_loss 0.09449905723333359 test_loss: 0.08117196559906006
epoch: 20 training_loss 0.09307007493451237 test_loss: 0.09280123710632324
epoch: 21 training_loss 0.09758792409673334 test_loss: 0.09293655157089234
epoch: 22 training_loss 0.09841031223535537 test_loss: 0.09656869769096374
epoch: 23 training_loss 0.09835237007588148 test_loss: 0.08870222568511962
epoch: 24 training_loss 0.09114350784569979 test_loss: 0.09512116909027099
epoch: 25 training_loss 0.09674347979947924 test_loss: 0.08163290023803711
epoch: 26 training_loss 0.09651644144207239 test_loss: 0.08000922203063965
epoch: 27 training_loss 0.09611528903245926 test_loss: 0.0848679780960083
epoch: 28 training_loss 0.09346352631226182 test_loss: 0.08525949120521545
epoch: 29 training_loss 0.0964484578743577 test_loss: 0.09975120425224304
epoch: 30 training_loss 0.09666285639628767 test_loss: 0.0825565755367279
epoch: 31 training_loss 0.09868439331650734 test_loss: 0.09067742228507995
epoch: 32 training_loss 0.09142267828807235 test_loss: 0.0910426914691925
epoch: 33 training_loss 0.09523135924711823 test_loss: 0.0845803439617157
epoch: 34 training_loss 0.09678229995071888 test_loss: 0.08926426768302917
epoch: 35 training_loss 0.08438698146492243 test_loss: 0.09069374799728394
epoch: 36 training_loss 0.09371482327580452 test_loss: 0.0777585744857788
epoch: 37 training_loss 0.09214508552104235 test_loss: 0.08307593464851379
epoch: 38 training_loss 0.09918690018355847 test_loss: 0.07156838774681092
epoch: 39 training_loss 0.0836065973713994 test_loss: 0.08240493535995483
epoch: 40 training_loss 0.08988611416891218 test_loss: 0.08910558223724366
epoch: 41 training_loss 0.09297102220356464 test_loss: 0.08360437154769898
epoch: 42 training_loss 0.08823122343048453 test_loss: 0.10630557537078858
epoch: 43 training_loss 0.09021641014143825 test_loss: 0.0864226758480072
epoch: 44 training_loss 0.09131950730457902 test_loss: 0.08542158603668212
epoch: 45 training_loss 0.0869531830959022 test_loss: 0.0799152135848999
epoch: 46 training_loss 0.0870646602474153 test_loss: 0.08855580687522888
epoch: 47 training_loss 0.08656399238854647 test_loss: 0.09104548096656799
epoch: 48 training_loss 0.09184774588793516 test_loss: 0.08740415573120117
epoch: 49 training_loss 0.09000851653516292 test_loss: 0.08792378902435302
epoch: 50 training_loss 0.08562275594100356 test_loss: 0.07527499198913574
epoch: 51 training_loss 0.08274372190237045 test_loss: 0.08644336462020874
epoch: 52 training_loss 0.08318454524502158 test_loss: 0.08769612908363342
epoch: 53 training_loss 0.09166216809302569 test_loss: 0.08774985671043396
epoch: 54 training_loss 0.08310426553711295 test_loss: 0.07407143115997314
epoch: 55 training_loss 0.09070954225957394 test_loss: 0.08226714730262756
epoch: 56 training_loss 0.08803393370471894 test_loss: 0.08796630501747131
epoch: 57 training_loss 0.08214392392896115 test_loss: 0.08749792575836182
epoch: 58 training_loss 0.08665127381682396 test_loss: 0.07989002466201782
epoch: 59 training_loss 0.0864577324129641 test_loss: 0.08901867866516114
epoch: 60 training_loss 0.08496265899389982 test_loss: 0.08769401907920837
epoch: 61 training_loss 0.08470603987574578 test_loss: 0.09627500772476197
epoch: 62 training_loss 0.08947929553687572 test_loss: 0.09358482956886291
epoch: 63 training_loss 0.08648846430703998 test_loss: 0.09420191645622253
epoch: 64 training_loss 0.08094191627576947 test_loss: 0.08866127133369446
epoch: 65 training_loss 0.08007402600720524 test_loss: 0.08271305561065674
epoch: 66 training_loss 0.0809704033844173 test_loss: 0.08881098628044129
epoch: 67 training_loss 0.08831105206161738 test_loss: 0.08584792017936707
epoch: 68 training_loss 0.08354151681996883 test_loss: 0.09121088981628418
epoch: 69 training_loss 0.08283198954537511 test_loss: 0.08468285202980042
epoch: 70 training_loss 0.08708688441663981 test_loss: 0.09045906066894531
epoch: 71 training_loss 0.08928272482007742 test_loss: 0.09593886137008667
epoch: 72 training_loss 0.0859110165014863 test_loss: 0.09381545186042786
epoch: 73 training_loss 0.09004637178033591 test_loss: 0.07395043969154358
epoch: 74 training_loss 0.07940094482153653 test_loss: 0.08117794394493102
epoch: 75 training_loss 0.08230479812249542 test_loss: 0.09427631497383118
epoch: 76 training_loss 0.0930375169403851 test_loss: 0.09211662411689758
epoch: 77 training_loss 0.0796465027332306 test_loss: 0.07506367564201355
epoch: 78 training_loss 0.08145077299326658 test_loss: 0.07167317271232605
epoch: 79 training_loss 0.08433943832293153 test_loss: 0.08567762970924378
epoch: 80 training_loss 0.08645433211699129 test_loss: 0.08690252900123596
epoch: 81 training_loss 0.08121972983703017 test_loss: 0.08135732412338256
epoch: 82 training_loss 0.08168559845536948 test_loss: 0.07595150470733643
epoch: 83 training_loss 0.08715852186083793 test_loss: 0.0855939269065857
epoch: 84 training_loss 0.08606556789949536 test_loss: 0.09608452320098877
epoch: 85 training_loss 0.08870739882811904 test_loss: 0.08346484303474426
epoch: 86 training_loss 0.0876523901708424 test_loss: 0.0843549907207489
epoch: 87 training_loss 0.08304533192887903 test_loss: 0.07907463312149048
epoch: 88 training_loss 0.08695587574504315 test_loss: 0.09139927625656127
epoch: 89 training_loss 0.08531477194279433 test_loss: 0.08505474925041198
epoch: 90 training_loss 0.08428957764059306 test_loss: 0.076106595993042
epoch: 91 training_loss 0.08777494793757797 test_loss: 0.09147112965583801
epoch: 92 training_loss 0.08439420534297824 test_loss: 0.0821206033229828
epoch: 93 training_loss 0.08499355658888817 test_loss: 0.08449193239212036
epoch: 94 training_loss 0.08502594508230686 test_loss: 0.08621567487716675
epoch: 95 training_loss 0.0824149949848652 test_loss: 0.09808884859085083
epoch: 96 training_loss 0.08846896924078465 test_loss: 0.0836367666721344
epoch: 97 training_loss 0.08058813517913223 test_loss: 0.08406960964202881
epoch: 98 training_loss 0.0884870700351894 test_loss: 0.08312628269195557
epoch: 99 training_loss 0.08504411898553371 test_loss: 0.1000324010848999
epoch: 100 training_loss 0.08612243657931686 test_loss: 0.08924950957298279
epoch: 101 training_loss 0.08375619987025856 test_loss: 0.08020809888839722
epoch: 102 training_loss 0.07676557810045778 test_loss: 0.09038205742835999
epoch: 103 training_loss 0.07715888424776494 test_loss: 0.08707280158996582
epoch: 104 training_loss 0.08353644108399749 test_loss: 0.09053841829299927
epoch: 105 training_loss 0.08143235268071294 test_loss: 0.09338189959526062
epoch: 106 training_loss 0.08727876026183366 test_loss: 0.07751931548118592
epoch: 107 training_loss 0.08217545337975025 test_loss: 0.09198600053787231
epoch: 108 training_loss 0.07543572060763835 test_loss: 0.07609097361564636
epoch: 109 training_loss 0.0868909190967679 test_loss: 0.07520257234573365
epoch: 110 training_loss 0.07842384558171034 test_loss: 0.0847844660282135
epoch: 111 training_loss 0.08000960869714618 test_loss: 0.08794620037078857
epoch: 112 training_loss 0.07761760769411921 test_loss: 0.08102040886878967
epoch: 113 training_loss 0.08371521309018135 test_loss: 0.08416227102279664
epoch: 114 training_loss 0.07623051891103387 test_loss: 0.07306761145591736
epoch: 115 training_loss 0.08038906549103558 test_loss: 0.09163540601730347
epoch: 116 training_loss 0.07856720261275768 test_loss: 0.08178150653839111
epoch: 117 training_loss 0.08222965754568577 test_loss: 0.0890981137752533
epoch: 118 training_loss 0.076898503433913 test_loss: 0.07702935934066772
epoch: 119 training_loss 0.07986207786947488 test_loss: 0.09334939122200012
epoch: 120 training_loss 0.0843159239180386 test_loss: 0.08759353160858155
epoch: 121 training_loss 0.08280984525568784 test_loss: 0.08688961267471314
epoch: 122 training_loss 0.07796831928193569 test_loss: 0.08773131370544433
epoch: 123 training_loss 0.07892558018211275 test_loss: 0.07693899273872376
epoch: 124 training_loss 0.08800005659461022 test_loss: 0.09027559757232666
epoch: 125 training_loss 0.0854971419647336 test_loss: 0.07899984121322631
epoch: 126 training_loss 0.08224666793830693 test_loss: 0.09072753190994262
epoch: 127 training_loss 0.07694634184241295 test_loss: 0.09206015467643738
epoch: 128 training_loss 0.07841954547911882 test_loss: 0.0935322105884552
epoch: 129 training_loss 0.08016424030996859 test_loss: 0.06936838030815125
epoch: 130 training_loss 0.08448028206825256 test_loss: 0.080174320936203
epoch: 131 training_loss 0.08506921550258995 test_loss: 0.08585084080696107
epoch: 132 training_loss 0.08385047163814306 test_loss: 0.08148468732833862
epoch: 133 training_loss 0.08076625723391771 test_loss: 0.08424514532089233
epoch: 134 training_loss 0.08080891259014607 test_loss: 0.10010333061218261
epoch: 135 training_loss 0.08203759793192149 test_loss: 0.0776084303855896
epoch: 136 training_loss 0.07907437186688185 test_loss: 0.09255519509315491
epoch: 137 training_loss 0.08061840230599046 test_loss: 0.09169802069664001
epoch: 138 training_loss 0.07556497087702155 test_loss: 0.09025325179100037
epoch: 139 training_loss 0.08053329888731241 test_loss: 0.09114093184471131
epoch: 140 training_loss 0.07940212039276957 test_loss: 0.0852785050868988
epoch: 141 training_loss 0.07935373866930603 test_loss: 0.08220441341400146
epoch: 142 training_loss 0.0823752311617136 test_loss: 0.07689430713653564
epoch: 143 training_loss 0.08117877218872309 test_loss: 0.09663347601890564
epoch: 144 training_loss 0.07888593623414636 test_loss: 0.09202426671981812
epoch: 145 training_loss 0.084575284589082 test_loss: 0.08979103565216065
epoch: 146 training_loss 0.08121776261366903 test_loss: 0.0844268798828125
epoch: 147 training_loss 0.07975950933992863 test_loss: 0.08517683744430542
epoch: 148 training_loss 0.07956790255382656 test_loss: 0.09730015397071838
epoch: 149 training_loss 0.08345173606649041 test_loss: 0.0923956036567688
epoch: 0 training_loss 0.2958693876862526 test_loss: 0.18456398248672484
epoch: 1 training_loss 0.15872014187276362 test_loss: 0.15614534616470338
epoch: 2 training_loss 0.1268893315270543 test_loss: 0.11996263265609741
epoch: 3 training_loss 0.13429923843592406 test_loss: 0.11842266321182252
epoch: 4 training_loss 0.12641890950500964 test_loss: 0.11907825469970704
epoch: 5 training_loss 0.11255234587937593 test_loss: 0.1176250696182251
epoch: 6 training_loss 0.11037600031122566 test_loss: 0.11545480489730835
epoch: 7 training_loss 0.11713000945746899 test_loss: 0.11611194610595703
epoch: 8 training_loss 0.10692602910101413 test_loss: 0.13377156257629394
epoch: 9 training_loss 0.10311370864510536 test_loss: 0.117923104763031
epoch: 10 training_loss 0.10521163202822209 test_loss: 0.11313487291336059
epoch: 11 training_loss 0.10231227114796639 test_loss: 0.13591334819793702
epoch: 12 training_loss 0.10205772135406732 test_loss: 0.11936309337615966
epoch: 13 training_loss 0.10457175482064486 test_loss: 0.1285488247871399
epoch: 14 training_loss 0.1008972347714007 test_loss: 0.12206997871398925
epoch: 15 training_loss 0.10454566918313503 test_loss: 0.11788582801818848
epoch: 16 training_loss 0.10005655601620674 test_loss: 0.11235777139663697
epoch: 17 training_loss 0.09869424365460873 test_loss: 0.10297583341598511
epoch: 18 training_loss 0.09772526061162352 test_loss: 0.10519272089004517
epoch: 19 training_loss 0.0983668840676546 test_loss: 0.10352635383605957
epoch: 20 training_loss 0.10031851617619396 test_loss: 0.09913513660430909
epoch: 21 training_loss 0.09422855161130428 test_loss: 0.10525370836257934
epoch: 22 training_loss 0.09547286897897721 test_loss: 0.10115717649459839
epoch: 23 training_loss 0.0948986255005002 test_loss: 0.10269596576690673
epoch: 24 training_loss 0.09880477679893375 test_loss: 0.09200509190559387
epoch: 25 training_loss 0.09361583741381764 test_loss: 0.11245393753051758
epoch: 26 training_loss 0.09766109559684992 test_loss: 0.09782357811927796
epoch: 27 training_loss 0.09528606429696083 test_loss: 0.09965458512306213
epoch: 28 training_loss 0.09730202015489339 test_loss: 0.12123085260391235
epoch: 29 training_loss 0.08952975467778743 test_loss: 0.10595524311065674
epoch: 30 training_loss 0.0921271956898272 test_loss: 0.08982446193695068
epoch: 31 training_loss 0.08522260649129748 test_loss: 0.10962804555892944
epoch: 32 training_loss 0.09405146986246109 test_loss: 0.1254591464996338
epoch: 33 training_loss 0.09092388238757848 test_loss: 0.09872594475746155
epoch: 34 training_loss 0.09913430865854025 test_loss: 0.08315598368644714
epoch: 35 training_loss 0.09268699353560805 test_loss: 0.1095426321029663
epoch: 36 training_loss 0.09009089400991797 test_loss: 0.09066731929779052
epoch: 37 training_loss 0.08682744856923819 test_loss: 0.09565788507461548
epoch: 38 training_loss 0.0914659064076841 test_loss: 0.1005772590637207
epoch: 39 training_loss 0.08653109284117817 test_loss: 0.11047196388244629
epoch: 40 training_loss 0.0850081305205822 test_loss: 0.09603908658027649
epoch: 41 training_loss 0.09357649955898523 test_loss: 0.10871970653533936
epoch: 42 training_loss 0.081253715287894 test_loss: 0.12370861768722534
epoch: 43 training_loss 0.08927343394607305 test_loss: 0.11356930732727051
epoch: 44 training_loss 0.10167030174285173 test_loss: 0.08911949992179871
epoch: 45 training_loss 0.08950227899476886 test_loss: 0.0892801821231842
epoch: 46 training_loss 0.09106068348512053 test_loss: 0.09794644117355347
epoch: 47 training_loss 0.08671643543988466 test_loss: 0.09999989867210388
epoch: 48 training_loss 0.09448577541857958 test_loss: 0.09919726848602295
epoch: 49 training_loss 0.08650254165753722 test_loss: 0.11295448541641236
epoch: 50 training_loss 0.09555328607559205 test_loss: 0.11036447286605836
epoch: 51 training_loss 0.09247130896896123 test_loss: 0.10617915391921998
epoch: 52 training_loss 0.08763784930109977 test_loss: 0.0902625024318695
epoch: 53 training_loss 0.08276823423802852 test_loss: 0.10437411069869995
epoch: 54 training_loss 0.09101941954344511 test_loss: 0.11564226150512695
epoch: 55 training_loss 0.08926072224974632 test_loss: 0.09612962603569031
epoch: 56 training_loss 0.0853839822858572 test_loss: 0.10054687261581421
epoch: 57 training_loss 0.0890673332195729 test_loss: 0.10482406616210938
epoch: 58 training_loss 0.08707141030579806 test_loss: 0.11397318840026856
epoch: 59 training_loss 0.09015462461858988 test_loss: 0.11085807085037232
epoch: 60 training_loss 0.08542716719210147 test_loss: 0.09242650270462036
epoch: 61 training_loss 0.09622048780322075 test_loss: 0.0987902581691742
epoch: 62 training_loss 0.08216640820726752 test_loss: 0.08748976588249206
epoch: 63 training_loss 0.08752866687253118 test_loss: 0.10771926641464233
epoch: 64 training_loss 0.08636819541454316 test_loss: 0.09927089810371399
epoch: 65 training_loss 0.08252927074208856 test_loss: 0.09573426842689514
epoch: 66 training_loss 0.08682347469963134 test_loss: 0.10008140802383422
epoch: 67 training_loss 0.0896937856823206 test_loss: 0.09969518184661866
epoch: 68 training_loss 0.08651504786685109 test_loss: 0.09106871485710144
epoch: 69 training_loss 0.0891717399097979 test_loss: 0.10536679029464721
epoch: 70 training_loss 0.08563152397051453 test_loss: 0.11312305927276611
epoch: 71 training_loss 0.08135467423126101 test_loss: 0.09649019241333008
epoch: 72 training_loss 0.08710477696731686 test_loss: 0.10575329065322876
epoch: 73 training_loss 0.08011424837633968 test_loss: 0.10830780267715454
epoch: 74 training_loss 0.08235265342518687 test_loss: 0.08763355016708374
epoch: 75 training_loss 0.08560238245874643 test_loss: 0.0946635663509369
epoch: 76 training_loss 0.08866594282910228 test_loss: 0.08555235862731933
epoch: 77 training_loss 0.09149618685245514 test_loss: 0.09110053181648255
epoch: 78 training_loss 0.08851055076345801 test_loss: 0.09618697166442872
epoch: 79 training_loss 0.08302395792677998 test_loss: 0.09887741208076477
epoch: 80 training_loss 0.08708230907097458 test_loss: 0.10912989377975464
epoch: 81 training_loss 0.08746674828231335 test_loss: 0.09811879992485047
epoch: 82 training_loss 0.08625412371009589 test_loss: 0.09026076793670654
epoch: 83 training_loss 0.08182481080293655 test_loss: 0.09289370775222779
epoch: 84 training_loss 0.08575660347938538 test_loss: 0.09134882688522339
epoch: 85 training_loss 0.09256688587367534 test_loss: 0.10253745317459106
epoch: 86 training_loss 0.0826370283216238 test_loss: 0.08429925441741944
epoch: 87 training_loss 0.08286470090970396 test_loss: 0.0909085214138031
epoch: 88 training_loss 0.08395375387743115 test_loss: 0.10452377796173096
epoch: 89 training_loss 0.0832792273722589 test_loss: 0.08569490909576416
epoch: 90 training_loss 0.08629501668736339 test_loss: 0.0999276101589203
epoch: 91 training_loss 0.08322932122275234 test_loss: 0.10073825120925903
epoch: 92 training_loss 0.08513205088675022 test_loss: 0.09368150234222412
epoch: 93 training_loss 0.08591736556962132 test_loss: 0.10655322074890136
epoch: 94 training_loss 0.07566300114616752 test_loss: 0.10123994350433349
epoch: 95 training_loss 0.08335292838513851 test_loss: 0.10683140754699708
epoch: 96 training_loss 0.07927135240286588 test_loss: 0.10663598775863647
epoch: 97 training_loss 0.08395160892978311 test_loss: 0.08579323887825012
epoch: 98 training_loss 0.08288383204489946 test_loss: 0.09390047788619996
epoch: 99 training_loss 0.0833233885280788 test_loss: 0.10473003387451171
epoch: 100 training_loss 0.08050679543986916 test_loss: 0.09839164614677429
epoch: 101 training_loss 0.08400321731343866 test_loss: 0.09404664039611817
epoch: 102 training_loss 0.07684531499631703 test_loss: 0.09136359095573425
epoch: 103 training_loss 0.085409383084625 test_loss: 0.0919152855873108
epoch: 104 training_loss 0.08076267171651125 test_loss: 0.09028489589691162
epoch: 105 training_loss 0.08762428692542017 test_loss: 0.08602498769760132
epoch: 106 training_loss 0.0796495421230793 test_loss: 0.09836346507072449
epoch: 107 training_loss 0.08046652786433697 test_loss: 0.10999693870544433
epoch: 108 training_loss 0.08224051835015417 test_loss: 0.09591635465621948
epoch: 109 training_loss 0.07551481222733855 test_loss: 0.11878734827041626
epoch: 110 training_loss 0.07806630244478584 test_loss: 0.11133878231048584
epoch: 111 training_loss 0.08995040593668818 test_loss: 0.0937300443649292
epoch: 112 training_loss 0.08052769236266613 test_loss: 0.09007792472839356
epoch: 113 training_loss 0.08391361735761166 test_loss: 0.10388914346694947
epoch: 114 training_loss 0.0794901972077787 test_loss: 0.0911781370639801
epoch: 115 training_loss 0.08350489649921655 test_loss: 0.09315712451934814
epoch: 116 training_loss 0.0804750526510179 test_loss: 0.10166682004928589
epoch: 117 training_loss 0.08467999938875437 test_loss: 0.07765308618545533
epoch: 118 training_loss 0.08198906682431698 test_loss: 0.09742490649223327
epoch: 119 training_loss 0.08443767534568906 test_loss: 0.10067530870437622
epoch: 120 training_loss 0.08119306748732924 test_loss: 0.10438247919082641
epoch: 121 training_loss 0.08112800356000661 test_loss: 0.09392285346984863
epoch: 122 training_loss 0.0794018361158669 test_loss: 0.08945465683937073
epoch: 123 training_loss 0.07616504767909646 test_loss: 0.11536399126052857
epoch: 124 training_loss 0.08196679158136248 test_loss: 0.09118315577507019
epoch: 125 training_loss 0.08171102039515972 test_loss: 0.10643430948257446
epoch: 126 training_loss 0.0817281686142087 test_loss: 0.08894053101539612
epoch: 127 training_loss 0.07661547556519509 test_loss: 0.10773532390594483
epoch: 128 training_loss 0.0871921013109386 test_loss: 0.09849178194999694
epoch: 129 training_loss 0.0793427861481905 test_loss: 0.1028377890586853
epoch: 130 training_loss 0.07820901181548834 test_loss: 0.09837268590927124
epoch: 131 training_loss 0.08051080042496324 test_loss: 0.0969328224658966
epoch: 132 training_loss 0.07855251109227539 test_loss: 0.09357933402061462
epoch: 133 training_loss 0.07825829524546862 test_loss: 0.09758171439170837
epoch: 134 training_loss 0.0764265776053071 test_loss: 0.09634904265403747
epoch: 135 training_loss 0.08109248910099268 test_loss: 0.09616151452064514
epoch: 136 training_loss 0.08530608648434282 test_loss: 0.11963119506835937
epoch: 137 training_loss 0.0863421323057264 test_loss: 0.10226287841796874
epoch: 138 training_loss 0.07326439712196589 test_loss: 0.09645085334777832
epoch: 139 training_loss 0.0801171499863267 test_loss: 0.10256222486495972
epoch: 140 training_loss 0.07417041858658195 test_loss: 0.10335025787353516
epoch: 141 training_loss 0.07966637516394258 test_loss: 0.1121858835220337
epoch: 142 training_loss 0.0824314621463418 test_loss: 0.08278367519378663
epoch: 143 training_loss 0.07719798479229212 test_loss: 0.10704935789108276
epoch: 144 training_loss 0.07626138319261372 test_loss: 0.09930485486984253
epoch: 145 training_loss 0.08032042151317001 test_loss: 0.08900590538978577
epoch: 146 training_loss 0.07917266357690096 test_loss: 0.12373642921447754
epoch: 147 training_loss 0.0761865995451808 test_loss: 0.10478072166442871
epoch: 148 training_loss 0.07917689591646195 test_loss: 0.10300973653793336
epoch: 149 training_loss 0.0732395492400974 test_loss: 0.09492062330245972
epoch: 0 training_loss 0.23919082142412662 test_loss: 0.16531760692596437
epoch: 1 training_loss 0.13845466759055852 test_loss: 0.15601736307144165
epoch: 2 training_loss 0.1286758975684643 test_loss: 0.14472801685333253
epoch: 3 training_loss 0.11959220644086599 test_loss: 0.12355254888534546
epoch: 4 training_loss 0.1124288410320878 test_loss: 0.10621552467346192
epoch: 5 training_loss 0.11527650400996209 test_loss: 0.10590728521347045
epoch: 6 training_loss 0.09858291296288371 test_loss: 0.09918779730796815
epoch: 7 training_loss 0.09947092548012733 test_loss: 0.14045883417129518
epoch: 8 training_loss 0.10808792987838388 test_loss: 0.12183855772018433
epoch: 9 training_loss 0.10177093241363763 test_loss: 0.10986703634262085
epoch: 10 training_loss 0.09453780710697174 test_loss: 0.10967932939529419
epoch: 11 training_loss 0.10396858302876354 test_loss: 0.11958729028701783
epoch: 12 training_loss 0.0985558819770813 test_loss: 0.10519576072692871
epoch: 13 training_loss 0.09131665494292975 test_loss: 0.11908053159713745
epoch: 14 training_loss 0.09655662458389998 test_loss: 0.09988794326782227
epoch: 15 training_loss 0.09383553126361222 test_loss: 0.11467869281768799
epoch: 16 training_loss 0.09424184493720532 test_loss: 0.10517492294311523
epoch: 17 training_loss 0.09137205282226205 test_loss: 0.10967884063720704
epoch: 18 training_loss 0.08975803222507238 test_loss: 0.1002313733100891
epoch: 19 training_loss 0.09282766757532954 test_loss: 0.11397775411605834
epoch: 20 training_loss 0.09112930539995431 test_loss: 0.09584900736808777
epoch: 21 training_loss 0.09939957212656736 test_loss: 0.11527976989746094
epoch: 22 training_loss 0.0993241485208273 test_loss: 0.09903649091720582
epoch: 23 training_loss 0.09033646808937192 test_loss: 0.09873346090316773
epoch: 24 training_loss 0.0861736417375505 test_loss: 0.11825006008148194
epoch: 25 training_loss 0.09169119248166681 test_loss: 0.1096612572669983
epoch: 26 training_loss 0.08615957301110029 test_loss: 0.10681364536285401
epoch: 27 training_loss 0.09193957280367612 test_loss: 0.10350314378738404
epoch: 28 training_loss 0.0920573815703392 test_loss: 0.10516158342361451
epoch: 29 training_loss 0.09118361283093691 test_loss: 0.11472162008285522
epoch: 30 training_loss 0.08705382328480482 test_loss: 0.10559107065200805
epoch: 31 training_loss 0.08365423858165741 test_loss: 0.10713223218917847
epoch: 32 training_loss 0.08951268353499472 test_loss: 0.09961022734642029
epoch: 33 training_loss 0.08722258925437927 test_loss: 0.10406757593154907
epoch: 34 training_loss 0.08651504073292017 test_loss: 0.10554882287979125
epoch: 35 training_loss 0.08811902655288577 test_loss: 0.11053783893585205
epoch: 36 training_loss 0.0919573876541108 test_loss: 0.11951922178268433
epoch: 37 training_loss 0.08328234909102321 test_loss: 0.10686006546020507
epoch: 38 training_loss 0.09366407534107565 test_loss: 0.1191407561302185
epoch: 39 training_loss 0.08486400656402111 test_loss: 0.10384854078292846
epoch: 40 training_loss 0.0866614143550396 test_loss: 0.11183676719665528
epoch: 41 training_loss 0.09028773821890354 test_loss: 0.10106292963027955
epoch: 42 training_loss 0.09183840848505496 test_loss: 0.1166380524635315
epoch: 43 training_loss 0.08171145291998982 test_loss: 0.13018150329589845
epoch: 44 training_loss 0.08542375590652228 test_loss: 0.11338948011398316
epoch: 45 training_loss 0.0987788255698979 test_loss: 0.10698122978210449
epoch: 46 training_loss 0.08492288371548057 test_loss: 0.10586707592010498
epoch: 47 training_loss 0.08841332029551267 test_loss: 0.113278329372406
epoch: 48 training_loss 0.0910649112612009 test_loss: 0.10514471530914307
epoch: 49 training_loss 0.08833025043830275 test_loss: 0.10865585803985596
epoch: 50 training_loss 0.08207635017111897 test_loss: 0.10066032409667969
epoch: 51 training_loss 0.08947167877107859 test_loss: 0.10900858640670777
epoch: 52 training_loss 0.09343607876449823 test_loss: 0.12207456827163696
epoch: 53 training_loss 0.08673925843089819 test_loss: 0.09532082080841064
epoch: 54 training_loss 0.08974860714748502 test_loss: 0.11194479465484619
epoch: 55 training_loss 0.08836510878056288 test_loss: 0.11244376897811889
epoch: 56 training_loss 0.08721749745309353 test_loss: 0.09669141173362732
epoch: 57 training_loss 0.0833722068555653 test_loss: 0.09399514198303223
epoch: 58 training_loss 0.08801036335527897 test_loss: 0.10853852033615112
epoch: 59 training_loss 0.08413923848420382 test_loss: 0.10375102758407592
epoch: 60 training_loss 0.08322515957057476 test_loss: 0.106437087059021
epoch: 61 training_loss 0.08296689444221556 test_loss: 0.12007861137390137
epoch: 62 training_loss 0.07976359898224473 test_loss: 0.10445088148117065
epoch: 63 training_loss 0.09759005414322018 test_loss: 0.10695939064025879
epoch: 64 training_loss 0.08289151947945356 test_loss: 0.12369638681411743
epoch: 65 training_loss 0.07814211862161756 test_loss: 0.11362572908401489
epoch: 66 training_loss 0.08044183710590005 test_loss: 0.10131784677505493
epoch: 67 training_loss 0.08420781709253788 test_loss: 0.10501717329025269
epoch: 68 training_loss 0.08411513207480312 test_loss: 0.09791873097419738
epoch: 69 training_loss 0.08435870049521327 test_loss: 0.11314836740493775
epoch: 70 training_loss 0.0868200335279107 test_loss: 0.12040069103240966
epoch: 71 training_loss 0.0877785898000002 test_loss: 0.11928397417068481
epoch: 72 training_loss 0.08313668604008853 test_loss: 0.11757700443267823
epoch: 73 training_loss 0.08371572230011225 test_loss: 0.10566139221191406
epoch: 74 training_loss 0.08549790589138866 test_loss: 0.10917876958847046
epoch: 75 training_loss 0.08112035972997546 test_loss: 0.09936792850494384
epoch: 76 training_loss 0.08266394704580307 test_loss: 0.11107363700866699
epoch: 77 training_loss 0.08801680529490113 test_loss: 0.10149046182632446
epoch: 78 training_loss 0.08478329852223396 test_loss: 0.11079729795455932
epoch: 79 training_loss 0.08997713983058929 test_loss: 0.1135587215423584
epoch: 80 training_loss 0.08543061895295978 test_loss: 0.10102274417877197
epoch: 81 training_loss 0.08506102845072747 test_loss: 0.10599339008331299
epoch: 82 training_loss 0.08133881155401468 test_loss: 0.10854058265686035
epoch: 83 training_loss 0.08925400752574206 test_loss: 0.108558988571167
epoch: 84 training_loss 0.08906398283317685 test_loss: 0.1016895055770874
epoch: 85 training_loss 0.08296747675165533 test_loss: 0.10341578722000122
epoch: 86 training_loss 0.07733050866052509 test_loss: 0.1129760503768921
epoch: 87 training_loss 0.08054703236557544 test_loss: 0.11928108930587769
epoch: 88 training_loss 0.08270442926324904 test_loss: 0.10154753923416138
epoch: 89 training_loss 0.08358739936724305 test_loss: 0.10628771781921387
epoch: 90 training_loss 0.08214926738291979 test_loss: 0.10302431583404541
epoch: 91 training_loss 0.08010075287893414 test_loss: 0.11138129234313965
epoch: 92 training_loss 0.08666488255374133 test_loss: 0.09566004872322083
epoch: 93 training_loss 0.08903271768242121 test_loss: 0.1095922589302063
epoch: 94 training_loss 0.08598650824278593 test_loss: 0.09906423687934876
epoch: 95 training_loss 0.07755804423242807 test_loss: 0.11367777585983277
epoch: 96 training_loss 0.08445826990529895 test_loss: 0.11481136083602905
epoch: 97 training_loss 0.0765549029968679 test_loss: 0.10941890478134156
epoch: 98 training_loss 0.07969347450882197 test_loss: 0.12113467454910279
epoch: 99 training_loss 0.08003055034205317 test_loss: 0.10128628015518189
epoch: 100 training_loss 0.0802204135619104 test_loss: 0.09146537780761718
epoch: 101 training_loss 0.08036989621818065 test_loss: 0.09953411221504212
epoch: 102 training_loss 0.08575128991156816 test_loss: 0.09945093393325806
epoch: 103 training_loss 0.080845562890172 test_loss: 0.09109988808631897
epoch: 104 training_loss 0.0760877281986177 test_loss: 0.1037146806716919
epoch: 105 training_loss 0.07174403358250857 test_loss: 0.1016888976097107
epoch: 106 training_loss 0.08158392135053873 test_loss: 0.10656286478042602
epoch: 107 training_loss 0.08028797468170523 test_loss: 0.10550498962402344
epoch: 108 training_loss 0.07652758147567511 test_loss: 0.11134127378463746
epoch: 109 training_loss 0.07786860663443804 test_loss: 0.110901141166687
epoch: 110 training_loss 0.0740126832202077 test_loss: 0.10618267059326172
epoch: 111 training_loss 0.07980564076453447 test_loss: 0.10559587478637696
epoch: 112 training_loss 0.07785353219136595 test_loss: 0.11884430646896363
epoch: 113 training_loss 0.08318908334709704 test_loss: 0.09335048794746399
epoch: 114 training_loss 0.08096214389428497 test_loss: 0.09771864414215088
epoch: 115 training_loss 0.08570948610082268 test_loss: 0.0948198139667511
epoch: 116 training_loss 0.07750798989087343 test_loss: 0.11348872184753418
epoch: 117 training_loss 0.07864969532936811 test_loss: 0.12364078760147094
epoch: 118 training_loss 0.07704814419150352 test_loss: 0.10841611623764039
epoch: 119 training_loss 0.07610866513103247 test_loss: 0.10844391584396362
epoch: 120 training_loss 0.08174654064700007 test_loss: 0.10781362056732177
epoch: 121 training_loss 0.07749407389201224 test_loss: 0.10596216917037964
epoch: 122 training_loss 0.07749744217842817 test_loss: 0.1012988567352295
epoch: 123 training_loss 0.08114460315555334 test_loss: 0.1151614785194397
epoch: 124 training_loss 0.08327546821907163 test_loss: 0.12174173593521118
epoch: 125 training_loss 0.07583446368575096 test_loss: 0.10088773965835571
epoch: 126 training_loss 0.07745761157944799 test_loss: 0.11734844446182251
epoch: 127 training_loss 0.07706508511677385 test_loss: 0.11108573675155639
epoch: 128 training_loss 0.07802915919572115 test_loss: 0.10982178449630738
epoch: 129 training_loss 0.07836369667202234 test_loss: 0.11680679321289063
epoch: 130 training_loss 0.08235602680593729 test_loss: 0.1154552698135376
epoch: 131 training_loss 0.08199155373498797 test_loss: 0.10975911617279052
epoch: 132 training_loss 0.08010442771017551 test_loss: 0.10761586427688599
epoch: 133 training_loss 0.06947382243350148 test_loss: 0.10338006019592286
epoch: 134 training_loss 0.07701670940034092 test_loss: 0.10385161638259888
epoch: 135 training_loss 0.07797999528236688 test_loss: 0.1103684663772583
epoch: 136 training_loss 0.0751011105440557 test_loss: 0.1186897873878479
epoch: 137 training_loss 0.08361206609755754 test_loss: 0.13634487390518188
epoch: 138 training_loss 0.07877862240187823 test_loss: 0.11012259721755982
epoch: 139 training_loss 0.0721730969287455 test_loss: 0.10583887100219727
epoch: 140 training_loss 0.08090783981606364 test_loss: 0.1037873387336731
epoch: 141 training_loss 0.07876089435070753 test_loss: 0.10442283153533935
epoch: 142 training_loss 0.07965955382212997 test_loss: 0.10638924837112426
epoch: 143 training_loss 0.07776061899028719 test_loss: 0.09665229320526122
epoch: 144 training_loss 0.07553332602605224 test_loss: 0.10227329730987549
epoch: 145 training_loss 0.07760583268478513 test_loss: 0.09622708559036255
epoch: 146 training_loss 0.07776762336492539 test_loss: 0.11038203239440918
epoch: 147 training_loss 0.07464656423777342 test_loss: 0.10322147607803345
epoch: 148 training_loss 0.08257202975451947 test_loss: 0.10084574222564698
epoch: 149 training_loss 0.08011731026694179 test_loss: 0.10406693220138549
episode: 0 training return: -807.5734118301444
episode: 1 training return: -870.8410830833648
episode: 2 training return: -759.9250456177027
episode: 3 training return: -848.3793933165322
epoch: 1 test_true_pfm: 535.7383814632807 sim_pfm: -700.345537900685
episode: 4 training return: -828.1147957608152
episode: 5 training return: -819.0752252288419
episode: 6 training return: -847.7840121872646
episode: 7 training return: -863.9192950569941
epoch: 2 test_true_pfm: -88.37579490263867 sim_pfm: -687.7845027676734
episode: 8 training return: -835.7771557057539
episode: 9 training return: -799.349427091977
episode: 10 training return: -915.3553037589452
episode: 11 training return: -922.1727453187441
epoch: 3 test_true_pfm: -39.97905927729193 sim_pfm: -753.527805417982
episode: 12 training return: -871.1466918557261
episode: 13 training return: -830.6882763657765
episode: 14 training return: -845.2179943621678
episode: 15 training return: -837.3954151877053
epoch: 4 test_true_pfm: -41.77552749254758 sim_pfm: -675.7797413574137
episode: 16 training return: -793.8703613661971
episode: 17 training return: -772.8995028401323
episode: 18 training return: -869.0924225733809
episode: 19 training return: -643.7824206809631
epoch: 5 test_true_pfm: -2.0790461292334377 sim_pfm: -557.4729267599573
episode: 20 training return: -731.0723456903355
episode: 21 training return: -677.4113425481548
episode: 22 training return: -885.8810801977994
episode: 23 training return: -802.5302105590719
epoch: 6 test_true_pfm: 269.66842857354 sim_pfm: -782.9123082736281
episode: 24 training return: -775.1187874317304
episode: 25 training return: -757.0100806844999
episode: 26 training return: -718.682407045868
episode: 27 training return: -578.6375556092057
epoch: 7 test_true_pfm: 197.19035479281402 sim_pfm: -661.595977366175
episode: 28 training return: -731.4595365525824
episode: 29 training return: -606.7113734481965
episode: 30 training return: -708.6600145236165
episode: 31 training return: -650.6931351946097
epoch: 8 test_true_pfm: 19.4627749860958 sim_pfm: -531.3243576825545
episode: 32 training return: -728.5119900446807
episode: 33 training return: -628.7081573016459
episode: 34 training return: -738.2230103953062
episode: 35 training return: -659.5312660504125
epoch: 9 test_true_pfm: 341.11053060431146 sim_pfm: -525.3020024758733
episode: 36 training return: -620.6572557129118
episode: 37 training return: -620.3075457973023
episode: 38 training return: -581.1310547889468
episode: 39 training return: -579.2145320923213
epoch: 10 test_true_pfm: 157.57574057895644 sim_pfm: -558.7080743480179
episode: 40 training return: -553.7062908008688
episode: 41 training return: -610.9997408022737
episode: 42 training return: -557.5760092399541
episode: 43 training return: -576.2458333074796
epoch: 11 test_true_pfm: 262.03381435033504 sim_pfm: -501.75300288466434
episode: 44 training return: -621.3221689776451
episode: 45 training return: -598.6744424183263
episode: 46 training return: -579.0578435012188
episode: 47 training return: -537.1173158869134
epoch: 12 test_true_pfm: 303.537947878118 sim_pfm: -499.6050227523844
episode: 48 training return: -637.5439381056211
episode: 49 training return: -578.9505611138782
episode: 50 training return: -561.4518814231056
episode: 51 training return: -565.6369625303063
epoch: 13 test_true_pfm: 303.2160476854761 sim_pfm: -480.86357685410786
episode: 52 training return: -678.6147683886719
episode: 53 training return: -538.446081499161
episode: 54 training return: -580.3519289023442
episode: 55 training return: -524.6310794709253
epoch: 14 test_true_pfm: 329.85056954439324 sim_pfm: -484.112144958714
episode: 56 training return: -575.877016702876
episode: 57 training return: -577.8752371094464
episode: 58 training return: -653.6887343352532
episode: 59 training return: -517.6290204097754
epoch: 15 test_true_pfm: 153.57017943736045 sim_pfm: -468.68167449358884
episode: 60 training return: -620.7081428615104
episode: 61 training return: -530.8109284706976
episode: 62 training return: -552.3888374360761
episode: 63 training return: -583.7577225189083
epoch: 16 test_true_pfm: 147.9102943285717 sim_pfm: -507.21113076898524
episode: 64 training return: -518.5508223739054
episode: 65 training return: -547.3324635075168
episode: 66 training return: -535.260628105888
episode: 67 training return: -657.7376080762189
epoch: 17 test_true_pfm: 218.72037052886006 sim_pfm: -462.8112891735225
episode: 68 training return: -518.2255418559572
episode: 69 training return: -567.5908320672509
episode: 70 training return: -502.60094955698384
episode: 71 training return: -598.8917606058848
epoch: 18 test_true_pfm: 324.96602625277166 sim_pfm: -466.42917041133904
episode: 72 training return: -619.8508476266624
episode: 73 training return: -545.2092843197553
episode: 74 training return: -530.6448522349734
episode: 75 training return: -613.3373944621583
epoch: 19 test_true_pfm: 359.5756077407402 sim_pfm: -454.95103549530637
episode: 76 training return: -547.5479690043863
episode: 77 training return: -545.0998474119831
episode: 78 training return: -567.1404552032845
episode: 79 training return: -538.4079771579446
epoch: 20 test_true_pfm: 172.86331672846143 sim_pfm: -453.17304040211866
episode: 80 training return: -575.0204220733527
episode: 81 training return: -530.6906686575787
episode: 82 training return: -567.4247137374571
episode: 83 training return: -529.3619121284232
epoch: 21 test_true_pfm: 253.55326853255056 sim_pfm: -477.54507496262687
episode: 84 training return: -535.4584588554815
episode: 85 training return: -485.3036395013678
episode: 86 training return: -545.3210563618522
episode: 87 training return: -627.30258988948
epoch: 22 test_true_pfm: 274.34899845144474 sim_pfm: -458.63510949721814
episode: 88 training return: -526.4982827387042
episode: 89 training return: -547.409791769705
episode: 90 training return: -544.2770275805691
episode: 91 training return: -524.2190161271161
epoch: 23 test_true_pfm: 161.74612576860662 sim_pfm: -488.99485273813593
episode: 92 training return: -548.3777594459307
episode: 93 training return: -584.2286027917414
episode: 94 training return: -495.07737797617904
episode: 95 training return: -567.641190468228
epoch: 24 test_true_pfm: 88.53051144417164 sim_pfm: -609.6529753372653
episode: 96 training return: -576.5454079086506
episode: 97 training return: -547.3229159812703
episode: 98 training return: -591.1392841180492
episode: 99 training return: -566.056704733601
epoch: 25 test_true_pfm: 200.4782282198753 sim_pfm: -489.68431421350596
episode: 100 training return: -493.2018473201379
episode: 101 training return: -559.4906135183948
episode: 102 training return: -574.4131393403377
episode: 103 training return: -652.3926154685291
epoch: 26 test_true_pfm: 351.24239350890616 sim_pfm: -463.430537338116
episode: 104 training return: -637.1784037904054
episode: 105 training return: -592.0667570686185
episode: 106 training return: -572.9188416056041
episode: 107 training return: -503.522174715276
epoch: 27 test_true_pfm: 226.8583929412235 sim_pfm: -450.7262658636785
episode: 108 training return: -592.1901394870838
episode: 109 training return: -514.2407803915589
episode: 110 training return: -541.6779518002486
episode: 111 training return: -602.3497932216844
epoch: 28 test_true_pfm: 276.4348854428519 sim_pfm: -466.21684887586997
episode: 112 training return: -527.3017056894744
episode: 113 training return: -594.2630781371796
episode: 114 training return: -599.8163198857925
episode: 115 training return: -525.3739071303102
epoch: 29 test_true_pfm: 101.01240469389491 sim_pfm: -478.0217877318656
episode: 116 training return: -766.8772599667872
episode: 117 training return: -553.6137982644416
episode: 118 training return: -647.546184558253
episode: 119 training return: -536.1861280980919
epoch: 30 test_true_pfm: 227.8951406957398 sim_pfm: -466.2928768712843
episode: 120 training return: -522.2386048563262
episode: 121 training return: -498.1456251294272
episode: 122 training return: -501.7527767230013
episode: 123 training return: -550.5679480437618
epoch: 31 test_true_pfm: 277.24720861264154 sim_pfm: -445.95418527018654
episode: 124 training return: -520.8816339300186
episode: 125 training return: -563.5069369693283
episode: 126 training return: -603.030354702422
episode: 127 training return: -515.5250257427881
epoch: 32 test_true_pfm: 271.66414771158315 sim_pfm: -455.2171621097353
episode: 128 training return: -508.2804158421172
episode: 129 training return: -515.0747265859414
episode: 130 training return: -574.3416452823379
episode: 131 training return: -493.2085281414907
epoch: 33 test_true_pfm: 142.14823934406604 sim_pfm: -457.6603862440872
episode: 132 training return: -549.3768657365115
episode: 133 training return: -495.5224427658466
episode: 134 training return: -515.0801204084609
episode: 135 training return: -504.57486390871304
epoch: 34 test_true_pfm: 342.9974519505008 sim_pfm: -447.92492603242573
episode: 136 training return: -526.5279106712062
episode: 137 training return: -527.5509985259278
episode: 138 training return: -533.6604498747506
episode: 139 training return: -483.08539067202656
epoch: 35 test_true_pfm: 439.97126731528607 sim_pfm: -439.4481860712017
episode: 140 training return: -503.18512775139993
episode: 141 training return: -526.8821820528108
episode: 142 training return: -532.744626897794
episode: 143 training return: -561.1566541331617
epoch: 36 test_true_pfm: 271.41016176669893 sim_pfm: -438.77277228297424
episode: 144 training return: -532.0313006399574
episode: 145 training return: -582.2415012604755
episode: 146 training return: -564.5160329394567
episode: 147 training return: -529.8423406996698
epoch: 37 test_true_pfm: 314.35247639514495 sim_pfm: -461.6645195850845
episode: 148 training return: -500.6525393118252
episode: 149 training return: -464.6048860430753
episode: 150 training return: -504.35019338394716
episode: 151 training return: -527.6637850698847
epoch: 38 test_true_pfm: 266.6606511171943 sim_pfm: -436.30735389799037
episode: 152 training return: -572.5414549875164
episode: 153 training return: -547.685783532738
episode: 154 training return: -536.3229544812742
episode: 155 training return: -474.46889776884893
epoch: 39 test_true_pfm: 231.75462538661134 sim_pfm: -448.4731095639733
episode: 156 training return: -500.9899219938772
episode: 157 training return: -469.1156018154786
episode: 158 training return: -516.3782868570379
episode: 159 training return: -554.3335508680151
epoch: 40 test_true_pfm: 267.67405225423335 sim_pfm: -433.7276758163445
episode: 160 training return: -538.3681241878979
episode: 161 training return: -558.4965056476602
episode: 162 training return: -502.87773306865716
episode: 163 training return: -514.345696208022
epoch: 41 test_true_pfm: 236.67711051638344 sim_pfm: -458.93092886732074
episode: 164 training return: -487.12135866530554
episode: 165 training return: -505.6012234937328
episode: 166 training return: -512.7828707676558
episode: 167 training return: -519.3907573278334
epoch: 42 test_true_pfm: 331.34390158283685 sim_pfm: -431.32311638402376
episode: 168 training return: -435.94055706679
episode: 169 training return: -646.3251320204336
episode: 170 training return: -479.3635542951261
episode: 171 training return: -493.20495193702504
epoch: 43 test_true_pfm: 247.4226252199695 sim_pfm: -413.9731817411728
episode: 172 training return: -488.864426954516
episode: 173 training return: -438.3459310370147
episode: 174 training return: -572.3531814088375
episode: 175 training return: -506.7153410665758
epoch: 44 test_true_pfm: 191.47733894671885 sim_pfm: -430.72824343877045
episode: 176 training return: -508.0671592630453
episode: 177 training return: -522.7408378325285
episode: 178 training return: -466.37096637027423
episode: 179 training return: -501.78924870833487
epoch: 45 test_true_pfm: 420.57701139866555 sim_pfm: -403.3348976559082
episode: 180 training return: -518.3469778777711
episode: 181 training return: -517.7918538522457
episode: 182 training return: -529.4460114340452
episode: 183 training return: -528.1992188633862
epoch: 46 test_true_pfm: 276.33955890963534 sim_pfm: -436.49911764794643
episode: 184 training return: -488.1571275791664
episode: 185 training return: -488.45504846649766
episode: 186 training return: -497.4997581040239
episode: 187 training return: -511.74428565535203
epoch: 47 test_true_pfm: 180.77345051623936 sim_pfm: -439.8396201281792
episode: 188 training return: -531.4366542821263
episode: 189 training return: -510.229562343049
episode: 190 training return: -514.0397255906344
episode: 191 training return: -485.2913372736783
epoch: 48 test_true_pfm: 354.7205670454939 sim_pfm: -431.46270478424157
episode: 192 training return: -848.885805800383
episode: 193 training return: -547.828760206903
episode: 194 training return: -525.2835673782055
episode: 195 training return: -516.4501351384888
epoch: 49 test_true_pfm: 208.1692722764285 sim_pfm: -418.9693776692756
episode: 196 training return: -487.558401841407
episode: 197 training return: -876.11506772138
episode: 198 training return: -577.2519843853069
episode: 199 training return: -508.44727045191814
epoch: 50 test_true_pfm: 455.314202135914 sim_pfm: -394.03897461771
episode: 200 training return: -530.8959773588626
episode: 201 training return: -640.2258034122066
episode: 202 training return: -582.3340237457294
episode: 203 training return: -501.35161579255345
epoch: 51 test_true_pfm: 238.95224881103377 sim_pfm: -446.31211496694823
episode: 204 training return: -527.638508054604
episode: 205 training return: -487.0520966579758
episode: 206 training return: -512.9444764922182
episode: 207 training return: -511.9312910800152
epoch: 52 test_true_pfm: 414.08145320003285 sim_pfm: -426.43224293454864
episode: 208 training return: -512.0724634702375
episode: 209 training return: -549.3047652867664
episode: 210 training return: -494.06401669778813
episode: 211 training return: -498.62088084727117
epoch: 53 test_true_pfm: 291.97171527106985 sim_pfm: -419.0375816332717
episode: 212 training return: -545.9564626973397
episode: 213 training return: -495.14112296592134
episode: 214 training return: -490.02271034106553
episode: 215 training return: -466.0180723002824
epoch: 54 test_true_pfm: 271.1183016289004 sim_pfm: -406.79543568583676
episode: 216 training return: -487.5625688754543
episode: 217 training return: -544.6083412320619
episode: 218 training return: -498.20611772799646
episode: 219 training return: -504.6662467563547
epoch: 55 test_true_pfm: 332.4789156438337 sim_pfm: -449.090437632349
episode: 220 training return: -499.6122936651866
episode: 221 training return: -475.527829424025
episode: 222 training return: -508.74133854877414
episode: 223 training return: -450.93760679142997
epoch: 56 test_true_pfm: 516.0607433306467 sim_pfm: -475.1983373935329
episode: 224 training return: -523.043075440256
episode: 225 training return: -540.1212662114084
episode: 226 training return: -548.2718365409437
episode: 227 training return: -496.11499724854394
epoch: 57 test_true_pfm: 307.31143293133374 sim_pfm: -443.62894095986525
episode: 228 training return: -494.17400333656866
episode: 229 training return: -503.52423986375675
episode: 230 training return: -509.6281174213486
episode: 231 training return: -494.89450250557377
epoch: 58 test_true_pfm: 159.55585570224767 sim_pfm: -429.878157065087
episode: 232 training return: -497.2774552281731
episode: 233 training return: -477.5845481936678
episode: 234 training return: -510.7537386285294
episode: 235 training return: -460.69743403620225
epoch: 59 test_true_pfm: 443.5545604006961 sim_pfm: -400.3128667950307
episode: 236 training return: -540.0925972786528
episode: 237 training return: -456.4393802107646
episode: 238 training return: -513.9575858773609
episode: 239 training return: -492.9707436370224
epoch: 60 test_true_pfm: 447.7252006230658 sim_pfm: -410.22888512546825
episode: 240 training return: -471.84055982602473
episode: 241 training return: -512.1582757344081
episode: 242 training return: -500.76466625429464
episode: 243 training return: -498.1926944544288
epoch: 61 test_true_pfm: 261.04136076726223 sim_pfm: -408.05029329324424
episode: 244 training return: -562.1699408851152
episode: 245 training return: -512.9955745180355
episode: 246 training return: -446.5358303861933
episode: 247 training return: -488.1811785726052
epoch: 62 test_true_pfm: 379.7498358919654 sim_pfm: -414.77713512281156
episode: 248 training return: -511.7697561012329
episode: 249 training return: -529.6140596905512
episode: 250 training return: -432.8683792354574
episode: 251 training return: -574.0385859478795
epoch: 63 test_true_pfm: 476.57994062186316 sim_pfm: -404.16596482838395
episode: 252 training return: -499.96281139050586
episode: 253 training return: -506.66032480941675
episode: 254 training return: -487.8212478149941
episode: 255 training return: -441.1940529877499
epoch: 64 test_true_pfm: 420.61683672515124 sim_pfm: -412.3897225573987
episode: 256 training return: -488.4102992298862
episode: 257 training return: -479.02203297155063
episode: 258 training return: -487.2600432669185
episode: 259 training return: -486.5955750294517
epoch: 65 test_true_pfm: 363.8791003434001 sim_pfm: -399.10360752887954
episode: 260 training return: -505.94094335957476
episode: 261 training return: -512.2233120528666
episode: 262 training return: -570.6414663776571
episode: 263 training return: -517.2040561172946
epoch: 66 test_true_pfm: 81.35090896782707 sim_pfm: -412.7793168183053
episode: 264 training return: -472.9558073035428
episode: 265 training return: -495.69003133087966
episode: 266 training return: -435.46982391889435
episode: 267 training return: -472.36404056482365
epoch: 67 test_true_pfm: 479.90263199719465 sim_pfm: -398.25181869900774
episode: 268 training return: -539.1722326120661
episode: 269 training return: -509.8316658292777
episode: 270 training return: -519.9763443838208
episode: 271 training return: -503.40672450265834
epoch: 68 test_true_pfm: 340.21871255316154 sim_pfm: -428.18698332057915
episode: 272 training return: -493.20815659938245
episode: 273 training return: -653.0077494167737
episode: 274 training return: -465.87208717278685
episode: 275 training return: -536.3391736541478
epoch: 69 test_true_pfm: 569.5095068373435 sim_pfm: -391.663917058613
episode: 276 training return: -475.7943950105857
episode: 277 training return: -496.96547060987916
episode: 278 training return: -506.8995637598927
episode: 279 training return: -518.8697480443392
epoch: 70 test_true_pfm: 517.6129499903004 sim_pfm: -415.5293178880511
episode: 280 training return: -507.7774802975101
episode: 281 training return: -483.68932144623574
episode: 282 training return: -498.4760599058884
episode: 283 training return: -489.77840678795855
epoch: 71 test_true_pfm: 368.865722994388 sim_pfm: -407.8471124295149
episode: 284 training return: -551.4183742540276
episode: 285 training return: -456.21636581356023
episode: 286 training return: -548.2228912496865
episode: 287 training return: -482.44279450635173
epoch: 72 test_true_pfm: 479.41915254999566 sim_pfm: -391.71171289337525
episode: 288 training return: -507.8655349488649
episode: 289 training return: -484.2057734811778
episode: 290 training return: -525.4972121286579
episode: 291 training return: -560.4101016346336
epoch: 73 test_true_pfm: 380.69053955898266 sim_pfm: -405.965100385185
episode: 292 training return: -492.38372621134056
episode: 293 training return: -497.37203815302814
episode: 294 training return: -510.42816020895367
episode: 295 training return: -531.6756210726522
epoch: 74 test_true_pfm: 493.85440734017175 sim_pfm: -395.2778372424928
episode: 296 training return: -531.5064225358761
episode: 297 training return: -479.0167561839225
episode: 298 training return: -466.71505769867883
episode: 299 training return: -479.5382840038407
epoch: 75 test_true_pfm: 380.46940595637506 sim_pfm: -398.9484791539574
episode: 300 training return: -523.349308755046
episode: 301 training return: -456.3165181634753
episode: 302 training return: -452.72524348684436
episode: 303 training return: -505.91118020438637
epoch: 76 test_true_pfm: 496.7062457565467 sim_pfm: -389.61961760418757
episode: 304 training return: -464.09758472682483
episode: 305 training return: -462.2501556762344
episode: 306 training return: -535.4692652663399
episode: 307 training return: -459.428600177473
epoch: 77 test_true_pfm: 317.3174413440843 sim_pfm: -390.6869341119928
episode: 308 training return: -453.10277699793016
episode: 309 training return: -522.3044172225443
episode: 310 training return: -548.0389104512418
episode: 311 training return: -518.6056129486738
epoch: 78 test_true_pfm: 384.02056211336884 sim_pfm: -397.92056804207044
episode: 312 training return: -542.4762759713349
episode: 313 training return: -495.20354700702546
episode: 314 training return: -475.03094006758835
episode: 315 training return: -511.30549336581885
epoch: 79 test_true_pfm: 509.4644822956081 sim_pfm: -451.5803981741351
episode: 316 training return: -504.9878739361419
episode: 317 training return: -500.5014305094376
episode: 318 training return: -505.6072182496969
episode: 319 training return: -492.3743894820277
epoch: 80 test_true_pfm: 405.4946093600458 sim_pfm: -394.2973746691468
episode: 320 training return: -512.4885105222223
episode: 321 training return: -481.2176129261254
episode: 322 training return: -535.9362419996852
episode: 323 training return: -494.37393425689106
epoch: 81 test_true_pfm: 339.2069155200636 sim_pfm: -415.1069442411637
episode: 324 training return: -485.7229993289
episode: 325 training return: -470.83282759327494
episode: 326 training return: -522.0063668960152
episode: 327 training return: -490.17580697815777
epoch: 82 test_true_pfm: 466.0620916853389 sim_pfm: -404.4756661025935
episode: 328 training return: -522.9518223184971
episode: 329 training return: -503.82453303217375
episode: 330 training return: -501.9413951092408
episode: 331 training return: -496.8256539720373
epoch: 83 test_true_pfm: 429.06655851022316 sim_pfm: -376.9926100716463
episode: 332 training return: -515.953433930846
episode: 333 training return: -515.8704867685257
episode: 334 training return: -442.88846902898945
episode: 335 training return: -477.6489212307979
epoch: 84 test_true_pfm: 467.56769202629977 sim_pfm: -389.3795218833654
episode: 336 training return: -504.26110949552844
episode: 337 training return: -579.800390782054
episode: 338 training return: -461.6085307275083
episode: 339 training return: -505.6742767488995
epoch: 85 test_true_pfm: 504.26884431510194 sim_pfm: -412.63278071964805
episode: 340 training return: -473.0142439827144
episode: 341 training return: -547.9473583091535
episode: 342 training return: -460.7565890386101
episode: 343 training return: -487.65406979582804
epoch: 86 test_true_pfm: 209.91765456819826 sim_pfm: -417.67968827029745
episode: 344 training return: -509.76428451487584
episode: 345 training return: -515.3597130279657
episode: 346 training return: -502.78138778676214
episode: 347 training return: -469.64552709913454
epoch: 87 test_true_pfm: 521.3443897343737 sim_pfm: -387.89192946099047
episode: 348 training return: -443.0911620954847
episode: 349 training return: -486.82264779728575
episode: 350 training return: -492.5948666250708
episode: 351 training return: -486.83269069480536
epoch: 88 test_true_pfm: 463.4859546299537 sim_pfm: -391.0569147931524
episode: 352 training return: -531.2296183507536
episode: 353 training return: -505.9252135029822
episode: 354 training return: -511.3399178263806
episode: 355 training return: -485.5825672888115
epoch: 89 test_true_pfm: 424.7840862404305 sim_pfm: -417.9865791344707
episode: 356 training return: -429.2945373813567
episode: 357 training return: -478.7247358256618
episode: 358 training return: -484.9856791559983
episode: 359 training return: -509.04828710430127
epoch: 90 test_true_pfm: 442.53925307892246 sim_pfm: -390.33750949102296
episode: 360 training return: -479.9148857853916
episode: 361 training return: -496.30066928955307
episode: 362 training return: -529.0863748080818
episode: 363 training return: -472.53551830262495
epoch: 91 test_true_pfm: 323.17911060909455 sim_pfm: -442.47369683813804
episode: 364 training return: -516.109297610075
episode: 365 training return: -505.00647803813416
episode: 366 training return: -471.4100015971011
episode: 367 training return: -449.1784247965508
epoch: 92 test_true_pfm: 419.18817687243717 sim_pfm: -421.0443949371186
episode: 368 training return: -508.6903070269196
episode: 369 training return: -464.3015775266508
episode: 370 training return: -496.96250352176344
episode: 371 training return: -539.5086843911214
epoch: 93 test_true_pfm: 617.7756260979868 sim_pfm: -371.4652848301868
episode: 372 training return: -447.5470731716226
episode: 373 training return: -473.8687711113767
episode: 374 training return: -466.9740453951281
episode: 375 training return: -498.94753363949474
epoch: 94 test_true_pfm: 472.7845980503874 sim_pfm: -408.71482711245
episode: 376 training return: -467.08109768713905
episode: 377 training return: -446.218598784981
episode: 378 training return: -494.94675283283607
episode: 379 training return: -479.46295363131173
epoch: 95 test_true_pfm: 327.0778846368623 sim_pfm: -409.47009895524747
episode: 380 training return: -498.1522116249032
episode: 381 training return: -454.34337352026444
episode: 382 training return: -499.8563288174646
episode: 383 training return: -499.10738384816943
epoch: 96 test_true_pfm: 496.060778787552 sim_pfm: -402.6583223247256
episode: 384 training return: -497.88180928195845
episode: 385 training return: -516.9382381810748
episode: 386 training return: -508.12517064867427
episode: 387 training return: -460.10182987681077
epoch: 97 test_true_pfm: 502.5773826350928 sim_pfm: -373.94605189221176
episode: 388 training return: -446.64202984661375
episode: 389 training return: -478.21762359540344
episode: 390 training return: -487.6375782185234
episode: 391 training return: -467.0696674155926
epoch: 98 test_true_pfm: 455.67620127366575 sim_pfm: -409.23459961896106
episode: 392 training return: -477.16348950733686
episode: 393 training return: -453.63674727668456
episode: 394 training return: -525.7581883783927
episode: 395 training return: -464.0192749063633
epoch: 99 test_true_pfm: 654.1151901615661 sim_pfm: -380.81107012815255
episode: 396 training return: -448.41947061983024
episode: 397 training return: -475.5092841557353
episode: 398 training return: -515.6090244868532
episode: 399 training return: -481.94091442931546
epoch: 100 test_true_pfm: 586.4085076146441 sim_pfm: -387.880884009897
episode: 400 training return: -484.2987563184981
episode: 401 training return: -628.8845992270782
episode: 402 training return: -574.4578622657529
episode: 403 training return: -515.7285369493349
epoch: 101 test_true_pfm: 525.8092218485036 sim_pfm: -347.2832606501203
episode: 404 training return: -446.0864675923092
episode: 405 training return: -504.81473917421374
episode: 406 training return: -504.2304290362752
episode: 407 training return: -413.48988538601844
epoch: 102 test_true_pfm: 504.2351405348659 sim_pfm: -384.85534605474777
episode: 408 training return: -808.9367668367294
episode: 409 training return: -501.0945824230778
episode: 410 training return: -555.6866474001956
episode: 411 training return: -460.5468999374009
epoch: 103 test_true_pfm: 421.3492161327222 sim_pfm: -411.48597285957345
episode: 412 training return: -487.8243464920125
episode: 413 training return: -469.028161950969
episode: 414 training return: -532.566150757779
episode: 415 training return: -492.3390814324952
epoch: 104 test_true_pfm: 501.49664487444915 sim_pfm: -377.2807895850439
episode: 416 training return: -495.54738232690624
episode: 417 training return: -462.15493179826575
episode: 418 training return: -463.4474512465241
episode: 419 training return: -449.5224194950517
epoch: 105 test_true_pfm: 372.07405431744127 sim_pfm: -408.0685003275125
episode: 420 training return: -471.3650799774813
episode: 421 training return: -473.060027698217
episode: 422 training return: -430.81672170760953
episode: 423 training return: -479.8821273264092
epoch: 106 test_true_pfm: 473.87608134067955 sim_pfm: -368.503027768775
episode: 424 training return: -493.2386766167234
episode: 425 training return: -506.7822801997793
episode: 426 training return: -501.59833944529686
episode: 427 training return: -465.14005768802616
epoch: 107 test_true_pfm: 465.90229543897794 sim_pfm: -395.6774831914441
episode: 428 training return: -489.4606987931916
episode: 429 training return: -471.2440153967661
episode: 430 training return: -518.0376046068783
episode: 431 training return: -468.28811792088305
epoch: 108 test_true_pfm: 661.8897967519933 sim_pfm: -375.2032379956874
episode: 432 training return: -508.7460467398146
episode: 433 training return: -466.10584933461
episode: 434 training return: -525.0503978010868
episode: 435 training return: -547.2549920920608
epoch: 109 test_true_pfm: 478.68237212841495 sim_pfm: -396.80968995109515
episode: 436 training return: -460.77400938249787
episode: 437 training return: -459.8675627872329
episode: 438 training return: -478.47338243209583
episode: 439 training return: -471.399842915325
epoch: 110 test_true_pfm: 402.7987134286414 sim_pfm: -396.58668610533863
episode: 440 training return: -464.8301916038637
episode: 441 training return: -545.3373647307676
episode: 442 training return: -495.01574570413794
episode: 443 training return: -490.2681632896182
epoch: 111 test_true_pfm: 357.6458347741988 sim_pfm: -378.40639518306944
episode: 444 training return: -443.16234416124854
episode: 445 training return: -514.3283373323924
episode: 446 training return: -478.73523514361716
episode: 447 training return: -464.32935927571634
epoch: 112 test_true_pfm: 636.5593890732962 sim_pfm: -372.07998793451617
episode: 448 training return: -473.1116936806564
episode: 449 training return: -477.66911814723954
episode: 450 training return: -468.00139202519915
episode: 451 training return: -437.6542413925234
epoch: 113 test_true_pfm: 340.1780276770242 sim_pfm: -362.4909409181423
episode: 452 training return: -430.4248937217089
episode: 453 training return: -457.0943668898659
episode: 454 training return: -465.5438105324807
episode: 455 training return: -475.32907636438057
epoch: 114 test_true_pfm: 312.5593397261528 sim_pfm: -410.7794792584047
episode: 456 training return: -426.3056362298076
episode: 457 training return: -503.01147954214235
episode: 458 training return: -468.4290962298618
episode: 459 training return: -529.9997824819237
epoch: 115 test_true_pfm: 586.5307555746948 sim_pfm: -361.6874626604063
episode: 460 training return: -515.449307937375
episode: 461 training return: -467.4566874539471
episode: 462 training return: -494.2941210233995
episode: 463 training return: -552.5136933165087
epoch: 116 test_true_pfm: 529.1134764664392 sim_pfm: -385.9188640701929
episode: 464 training return: -479.0400956509573
episode: 465 training return: -492.40524909936767
episode: 466 training return: -488.90967808449426
episode: 467 training return: -448.0193355998451
epoch: 117 test_true_pfm: 505.9562400107059 sim_pfm: -408.34255365461973
episode: 468 training return: -506.12825070683743
episode: 469 training return: -484.2396415365686
episode: 470 training return: -435.84581105414867
episode: 471 training return: -486.3040937927386
epoch: 118 test_true_pfm: 456.9078032902469 sim_pfm: -380.2886232145075
episode: 472 training return: -541.9639063421341
episode: 473 training return: -504.79455593250236
episode: 474 training return: -465.12529104295567
episode: 475 training return: -445.69159014145765
epoch: 119 test_true_pfm: 467.72576426572414 sim_pfm: -405.18440400354285
episode: 476 training return: -496.147889527713
episode: 477 training return: -506.9628237929168
episode: 478 training return: -461.1957859847753
episode: 479 training return: -502.39015119850404
epoch: 120 test_true_pfm: 496.29227026779085 sim_pfm: -392.5953359967114
episode: 480 training return: -509.39015943955485
episode: 481 training return: -462.1952161641607
episode: 482 training return: -503.2623038199449
episode: 483 training return: -525.0414869019082
epoch: 121 test_true_pfm: 710.073481104491 sim_pfm: -371.25393017376047
episode: 484 training return: -431.99182357140205
episode: 485 training return: -419.07993002078695
episode: 486 training return: -428.8396341026655
episode: 487 training return: -460.55366238316776
epoch: 122 test_true_pfm: 497.5782254250084 sim_pfm: -356.5539704281395
episode: 488 training return: -475.56332975962994
episode: 489 training return: -455.40029167056946
episode: 490 training return: -516.3646023145373
episode: 491 training return: -528.1386728133846
epoch: 123 test_true_pfm: 507.02874593345285 sim_pfm: -374.36256283192796
episode: 492 training return: -447.4805309643653
episode: 493 training return: -477.2888533796816
episode: 494 training return: -428.8027136449948
episode: 495 training return: -478.13526021517794
epoch: 124 test_true_pfm: 508.4610459554342 sim_pfm: -395.0879530152849
episode: 496 training return: -483.95424150706833
episode: 497 training return: -480.03082935405297
episode: 498 training return: -505.2895327139555
episode: 499 training return: -581.4492451834982
epoch: 125 test_true_pfm: 493.29617911965164 sim_pfm: -372.45303859584993
episode: 500 training return: -543.3599402987395
episode: 501 training return: -437.05405153548753
episode: 502 training return: -493.2575185373621
episode: 503 training return: -559.1533051504822
epoch: 126 test_true_pfm: 522.5731397803811 sim_pfm: -388.72607334461503
episode: 504 training return: -450.74312946656727
episode: 505 training return: -495.8003300811482
episode: 506 training return: -486.27664514785965
episode: 507 training return: -442.49171305479024
epoch: 127 test_true_pfm: 583.3030224458281 sim_pfm: -350.073047297188
episode: 508 training return: -486.27740740455806
episode: 509 training return: -458.15162654457464
episode: 510 training return: -468.8220069660881
episode: 511 training return: -443.8479062075711
epoch: 128 test_true_pfm: 659.5856884043527 sim_pfm: -333.11336794539505
episode: 512 training return: -464.8554881262199
episode: 513 training return: -417.10090157794394
episode: 514 training return: -481.5385359077832
episode: 515 training return: -539.3108389355265
epoch: 129 test_true_pfm: 659.0584079165334 sim_pfm: -371.3820288599884
episode: 516 training return: -489.1914164100233
episode: 517 training return: -460.262100559837
episode: 518 training return: -467.8214541044072
episode: 519 training return: -433.9574833350159
epoch: 130 test_true_pfm: 627.0420182332092 sim_pfm: -364.61433390412844
episode: 520 training return: -502.84180856543185
episode: 521 training return: -441.72460393920414
episode: 522 training return: -465.0142726300144
episode: 523 training return: -523.625223070334
epoch: 131 test_true_pfm: 501.83207787228184 sim_pfm: -385.42482795013296
episode: 524 training return: -488.746738784995
episode: 525 training return: -410.72065581792776
episode: 526 training return: -508.12369927073684
episode: 527 training return: -490.5548902291518
epoch: 132 test_true_pfm: 606.1108648149701 sim_pfm: -336.4182147233492
episode: 528 training return: -472.74423382884794
episode: 529 training return: -516.321571148423
episode: 530 training return: -485.64465880855926
episode: 531 training return: -493.30233393258106
epoch: 133 test_true_pfm: 510.6054684341074 sim_pfm: -357.61727957778265
episode: 532 training return: -460.830957644531
episode: 533 training return: -496.6112507846169
episode: 534 training return: -513.3959842394008
episode: 535 training return: -538.2946575257724
epoch: 134 test_true_pfm: 652.9071537234444 sim_pfm: -379.3233938340932
episode: 536 training return: -499.2756916783205
episode: 537 training return: -513.0333950355562
episode: 538 training return: -494.082596117054
episode: 539 training return: -487.71566332916905
epoch: 135 test_true_pfm: 529.2036050874074 sim_pfm: -364.65474316575273
episode: 540 training return: -515.7448941616776
episode: 541 training return: -446.4065478049099
episode: 542 training return: -473.1425904965984
episode: 543 training return: -469.8362986971897
epoch: 136 test_true_pfm: 574.1958591934251 sim_pfm: -387.2101369675734
episode: 544 training return: -513.908443180821
episode: 545 training return: -461.96932199126286
episode: 546 training return: -410.6856061307167
episode: 547 training return: -497.43341150613423
epoch: 137 test_true_pfm: 527.8511134264464 sim_pfm: -381.04809437429964
episode: 548 training return: -443.1890942040068
episode: 549 training return: -498.8496713578043
episode: 550 training return: -511.6627658649142
episode: 551 training return: -477.41072260966405
epoch: 138 test_true_pfm: 570.0155654657979 sim_pfm: -367.73549332298506
episode: 552 training return: -458.37160320844464
episode: 553 training return: -475.56902150208055
episode: 554 training return: -467.1797891491732
episode: 555 training return: -523.436765665318
epoch: 139 test_true_pfm: 592.0632244193101 sim_pfm: -350.5918722766758
episode: 556 training return: -499.3921420854222
episode: 557 training return: -483.09897397209437
episode: 558 training return: -471.0694187835267
episode: 559 training return: -470.2788669368293
epoch: 140 test_true_pfm: 578.4921528087137 sim_pfm: -350.5291839020595
episode: 560 training return: -538.9646609989078
episode: 561 training return: -474.35135790147405
episode: 562 training return: -446.0513366497215
episode: 563 training return: -470.18555555523534
epoch: 141 test_true_pfm: 603.0805886682527 sim_pfm: -345.6527040310519
episode: 564 training return: -484.59581072970417
episode: 565 training return: -453.4371872662996
episode: 566 training return: -454.01581396752937
episode: 567 training return: -470.26276520367327
epoch: 142 test_true_pfm: 557.4361688938087 sim_pfm: -367.45019953939845
episode: 568 training return: -492.90347554248103
episode: 569 training return: -451.2779717000462
episode: 570 training return: -482.5790742624695
episode: 571 training return: -472.97660844713454
epoch: 143 test_true_pfm: 397.03964287978556 sim_pfm: -378.2600206149785
episode: 572 training return: -481.6435256510923
episode: 573 training return: -459.90820601993846
episode: 574 training return: -479.972052129695
episode: 575 training return: -441.68037211434506
epoch: 144 test_true_pfm: 543.40816830172 sim_pfm: -363.5208232897762
episode: 576 training return: -486.00382679583754
episode: 577 training return: -495.75953881842645
episode: 578 training return: -450.3078845472193
episode: 579 training return: -464.87687613358827
epoch: 145 test_true_pfm: 493.75351368227274 sim_pfm: -368.4443289978428
episode: 580 training return: -457.08695182112467
episode: 581 training return: -508.9176339377205
episode: 582 training return: -437.5463220624552
episode: 583 training return: -423.7087051805235
epoch: 146 test_true_pfm: 613.6173996121754 sim_pfm: -351.7412498591012
episode: 584 training return: -448.87082046307506
episode: 585 training return: -461.9170517626647
episode: 586 training return: -456.0226064686128
episode: 587 training return: -433.6488082338963
epoch: 147 test_true_pfm: 482.54202101141914 sim_pfm: -374.58728551853824
episode: 588 training return: -506.24059595992213
episode: 589 training return: -495.74044948485977
episode: 590 training return: -447.0233152945931
episode: 591 training return: -432.2320567771455
epoch: 148 test_true_pfm: 440.99035962205244 sim_pfm: -399.0997042689417
episode: 592 training return: -495.78060324187226
episode: 593 training return: -490.6398743861361
episode: 594 training return: -473.8630137310319
episode: 595 training return: -582.7077386974569
epoch: 149 test_true_pfm: 483.39911655823516 sim_pfm: -379.57410148800085
episode: 596 training return: -479.2076990412391
episode: 597 training return: -473.4615108612238
episode: 598 training return: -491.6072324041381
episode: 599 training return: -460.59590650295297
epoch: 150 test_true_pfm: 508.6413338833552 sim_pfm: -387.12583894961955
