['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'uncertainty', '--traj', 'expert', '--seed', '3', '--data', '100000']
epoch: 0 training_loss 0.263420736566186 test_loss: 0.19257662296295167
epoch: 1 training_loss 0.1998171529918909 test_loss: 0.1719391703605652
epoch: 2 training_loss 0.17030673444271088 test_loss: 0.14116348028182985
epoch: 3 training_loss 0.15398188188672066 test_loss: 0.18967434167861938
epoch: 4 training_loss 0.15576310262084006 test_loss: 0.14325871467590331
epoch: 5 training_loss 0.1453572365269065 test_loss: 0.12268030643463135
epoch: 6 training_loss 0.143402057364583 test_loss: 0.13705521821975708
epoch: 7 training_loss 0.132154483795166 test_loss: 0.12627413272857665
epoch: 8 training_loss 0.13432888310402633 test_loss: 0.1205510139465332
epoch: 9 training_loss 0.1432599575817585 test_loss: 0.13663675785064697
epoch: 10 training_loss 0.12908328410238026 test_loss: 0.1372899293899536
epoch: 11 training_loss 0.13297974094748496 test_loss: 0.15152667760848998
epoch: 12 training_loss 0.1422730601206422 test_loss: 0.11119500398635865
epoch: 13 training_loss 0.13866778891533613 test_loss: 0.13268940448760985
epoch: 14 training_loss 0.12476040411740541 test_loss: 0.15353014469146728
epoch: 15 training_loss 0.1409482335112989 test_loss: 0.15494823455810547
epoch: 16 training_loss 0.12964184150099756 test_loss: 0.1415422797203064
epoch: 17 training_loss 0.13156953576952218 test_loss: 0.11176782846450806
epoch: 18 training_loss 0.1255372929573059 test_loss: 0.1116258978843689
epoch: 19 training_loss 0.11937972635030747 test_loss: 0.11883426904678344
epoch: 20 training_loss 0.13683810088783502 test_loss: 0.11670595407485962
epoch: 21 training_loss 0.12328845489770174 test_loss: 0.13599679470062256
epoch: 22 training_loss 0.13155064191669225 test_loss: 0.12272409200668336
epoch: 23 training_loss 0.14175854180008174 test_loss: 0.12983635663986207
epoch: 24 training_loss 0.13108338374644518 test_loss: 0.1524674892425537
epoch: 25 training_loss 0.13054532404989005 test_loss: 0.13057055473327636
epoch: 26 training_loss 0.12917673084884881 test_loss: 0.12484345436096192
epoch: 27 training_loss 0.12004452876746655 test_loss: 0.1344178795814514
epoch: 28 training_loss 0.1205412282422185 test_loss: 0.12299998998641967
epoch: 29 training_loss 0.12653225615620614 test_loss: 0.11984553337097167
epoch: 30 training_loss 0.13288799572736024 test_loss: 0.11962041854858399
epoch: 31 training_loss 0.11781310804188251 test_loss: 0.12210791110992432
epoch: 32 training_loss 0.12881978847086428 test_loss: 0.13106783628463745
epoch: 33 training_loss 0.1267450910806656 test_loss: 0.14235916137695312
epoch: 34 training_loss 0.12712469406425952 test_loss: 0.1527348518371582
epoch: 35 training_loss 0.1310615197196603 test_loss: 0.1065633773803711
epoch: 36 training_loss 0.12122688287869096 test_loss: 0.11901565790176391
epoch: 37 training_loss 0.12082506407052279 test_loss: 0.12388765811920166
epoch: 38 training_loss 0.11799894727766513 test_loss: 0.1359517455101013
epoch: 39 training_loss 0.12288313876837492 test_loss: 0.11436673402786254
epoch: 40 training_loss 0.12630833279341458 test_loss: 0.1095434546470642
epoch: 41 training_loss 0.12800240822136402 test_loss: 0.12043159008026123
epoch: 42 training_loss 0.1168491437099874 test_loss: 0.10845588445663452
epoch: 43 training_loss 0.11777976103127002 test_loss: 0.13069334030151367
epoch: 44 training_loss 0.12798661291599273 test_loss: 0.11656782627105713
epoch: 45 training_loss 0.1297317745536566 test_loss: 0.11113613843917847
epoch: 46 training_loss 0.11437679104506969 test_loss: 0.11158027648925781
epoch: 47 training_loss 0.12116818394511938 test_loss: 0.1248171091079712
epoch: 48 training_loss 0.118823405187577 test_loss: 0.11391481161117553
epoch: 49 training_loss 0.1165732817351818 test_loss: 0.11638646125793457
epoch: 50 training_loss 0.122906873524189 test_loss: 0.14562925100326538
epoch: 51 training_loss 0.1213468362018466 test_loss: 0.1258984088897705
epoch: 52 training_loss 0.12741119138896465 test_loss: 0.12354868650436401
epoch: 53 training_loss 0.11266912639141083 test_loss: 0.10254184007644654
epoch: 54 training_loss 0.1183569199219346 test_loss: 0.11252126693725586
epoch: 55 training_loss 0.11810725290328264 test_loss: 0.14857977628707886
epoch: 56 training_loss 0.12031644240021705 test_loss: 0.111759352684021
epoch: 57 training_loss 0.11143897913396358 test_loss: 0.11824952363967896
epoch: 58 training_loss 0.12279979582875968 test_loss: 0.13535372018814087
epoch: 59 training_loss 0.11359937170520425 test_loss: 0.11367565393447876
epoch: 60 training_loss 0.1153099150583148 test_loss: 0.13059263229370116
epoch: 61 training_loss 0.1231287381798029 test_loss: 0.11957868337631225
epoch: 62 training_loss 0.11445718629285694 test_loss: 0.14439200162887572
epoch: 63 training_loss 0.11582512889057398 test_loss: 0.10837669372558593
epoch: 64 training_loss 0.12160784140229225 test_loss: 0.13176338672637938
epoch: 65 training_loss 0.11970356199890375 test_loss: 0.13938136100769044
epoch: 66 training_loss 0.11941421031951904 test_loss: 0.11784540414810181
epoch: 67 training_loss 0.1213641957938671 test_loss: 0.11049021482467651
epoch: 68 training_loss 0.10913035936653614 test_loss: 0.12963292598724366
epoch: 69 training_loss 0.12132305935025216 test_loss: 0.1393112301826477
epoch: 70 training_loss 0.11736753534525633 test_loss: 0.10917376279830933
epoch: 71 training_loss 0.11642734467983246 test_loss: 0.117057204246521
epoch: 72 training_loss 0.11888044238090516 test_loss: 0.1085174560546875
epoch: 73 training_loss 0.12087603978812694 test_loss: 0.12219752073287964
epoch: 74 training_loss 0.11763668583706022 test_loss: 0.10044324398040771
epoch: 75 training_loss 0.11743764406070113 test_loss: 0.14261358976364136
epoch: 76 training_loss 0.11475413018837571 test_loss: 0.11508901119232177
epoch: 77 training_loss 0.11663379400968552 test_loss: 0.12495769262313842
epoch: 78 training_loss 0.12864761408418418 test_loss: 0.1179610013961792
epoch: 79 training_loss 0.10830424416810275 test_loss: 0.10411146879196168
epoch: 80 training_loss 0.11932960595935584 test_loss: 0.13300291299819947
epoch: 81 training_loss 0.12659687999635935 test_loss: 0.1289164423942566
epoch: 82 training_loss 0.11600991135463118 test_loss: 0.105624258518219
epoch: 83 training_loss 0.11834599239751696 test_loss: 0.10359622240066528
epoch: 84 training_loss 0.10938839323818683 test_loss: 0.11979056596755981
epoch: 85 training_loss 0.11197627967223525 test_loss: 0.11202014684677124
epoch: 86 training_loss 0.11198504731059074 test_loss: 0.120870041847229
epoch: 87 training_loss 0.1146199507266283 test_loss: 0.11706804037094116
epoch: 88 training_loss 0.11674330295994878 test_loss: 0.11090329885482789
epoch: 89 training_loss 0.11508964769542217 test_loss: 0.12111047506332398
epoch: 90 training_loss 0.11743052624166012 test_loss: 0.13140647411346434
epoch: 91 training_loss 0.12188021773472428 test_loss: 0.1293228030204773
epoch: 92 training_loss 0.11719047460705041 test_loss: 0.12921496629714965
epoch: 93 training_loss 0.11220143519341946 test_loss: 0.10016143321990967
epoch: 94 training_loss 0.11653367098420858 test_loss: 0.13267608880996704
epoch: 95 training_loss 0.11549847763031722 test_loss: 0.13012622594833373
epoch: 96 training_loss 0.1169414410740137 test_loss: 0.12188305854797363
epoch: 97 training_loss 0.1133326280489564 test_loss: 0.12133830785751343
epoch: 98 training_loss 0.11615017212927342 test_loss: 0.10800768136978149
epoch: 99 training_loss 0.11329404346644878 test_loss: 0.11129711866378784
epoch: 100 training_loss 0.11834418967366218 test_loss: 0.10511035919189453
epoch: 101 training_loss 0.11893562782555818 test_loss: 0.1289064645767212
epoch: 102 training_loss 0.1092448622174561 test_loss: 0.13452063798904418
epoch: 103 training_loss 0.11710613191127778 test_loss: 0.11415174007415771
epoch: 104 training_loss 0.10971679784357548 test_loss: 0.13660675287246704
epoch: 105 training_loss 0.11952834390103817 test_loss: 0.11718792915344238
epoch: 106 training_loss 0.11568829210475087 test_loss: 0.12073194980621338
epoch: 107 training_loss 0.11360926000401378 test_loss: 0.11969484090805053
epoch: 108 training_loss 0.12024701222777366 test_loss: 0.09889541864395142
epoch: 109 training_loss 0.11625126326456665 test_loss: 0.11324396133422851
epoch: 110 training_loss 0.10996276089921593 test_loss: 0.1185065507888794
epoch: 111 training_loss 0.11705279223620892 test_loss: 0.10909003019332886
epoch: 112 training_loss 0.1145665879920125 test_loss: 0.13197789192199708
epoch: 113 training_loss 0.11391316736117005 test_loss: 0.12332775592803955
epoch: 114 training_loss 0.11129077767953277 test_loss: 0.11264156103134156
epoch: 115 training_loss 0.11671982605010271 test_loss: 0.11125383377075196
epoch: 116 training_loss 0.10508250638842583 test_loss: 0.1276896595954895
epoch: 117 training_loss 0.1126578551903367 test_loss: 0.12116626501083375
epoch: 118 training_loss 0.1167395168542862 test_loss: 0.13073129653930665
epoch: 119 training_loss 0.11524791345000267 test_loss: 0.12141120433807373
epoch: 120 training_loss 0.11509919926524162 test_loss: 0.11937625408172607
epoch: 121 training_loss 0.12037866801023483 test_loss: 0.12349212169647217
epoch: 122 training_loss 0.12402383670210838 test_loss: 0.1116952657699585
epoch: 123 training_loss 0.11576598033308982 test_loss: 0.1154906153678894
epoch: 124 training_loss 0.11693271484225988 test_loss: 0.10232347249984741
epoch: 125 training_loss 0.11490021172910929 test_loss: 0.11860102415084839
epoch: 126 training_loss 0.11192676592618227 test_loss: 0.14480875730514525
epoch: 127 training_loss 0.12038303572684526 test_loss: 0.1071330189704895
epoch: 128 training_loss 0.12333805497735739 test_loss: 0.1180767297744751
epoch: 129 training_loss 0.11762552298605441 test_loss: 0.13953453302383423
epoch: 130 training_loss 0.12419305976480245 test_loss: 0.11447315216064453
epoch: 131 training_loss 0.10466955002397299 test_loss: 0.13405282497406007
epoch: 132 training_loss 0.10514307217672467 test_loss: 0.1165963053703308
epoch: 133 training_loss 0.11426740383729339 test_loss: 0.1100245475769043
epoch: 134 training_loss 0.12287217650562525 test_loss: 0.10711069107055664
epoch: 135 training_loss 0.11653628677129746 test_loss: 0.11142507791519166
epoch: 136 training_loss 0.10908232605084776 test_loss: 0.12119756937026978
epoch: 137 training_loss 0.11204756274819375 test_loss: 0.09512826204299926
epoch: 138 training_loss 0.11048036932945252 test_loss: 0.11453827619552612
epoch: 139 training_loss 0.11075756166130304 test_loss: 0.122398042678833
epoch: 140 training_loss 0.10756191402673722 test_loss: 0.12970972061157227
epoch: 141 training_loss 0.1182396798208356 test_loss: 0.1069716215133667
epoch: 142 training_loss 0.11541197419166566 test_loss: 0.1259123682975769
epoch: 143 training_loss 0.11649997465312481 test_loss: 0.10347908735275269
epoch: 144 training_loss 0.10913579985499382 test_loss: 0.12191427946090698
epoch: 145 training_loss 0.11352734737098218 test_loss: 0.11801322698593139
epoch: 146 training_loss 0.11720974635332823 test_loss: 0.12109640836715699
epoch: 147 training_loss 0.11263701520860195 test_loss: 0.1379188895225525
epoch: 148 training_loss 0.11409257307648658 test_loss: 0.13513349294662474
epoch: 149 training_loss 0.11633344458416105 test_loss: 0.11738713979721069
epoch: 0 training_loss 0.28476450763642785 test_loss: 0.20311956405639647
epoch: 1 training_loss 0.1906588077545166 test_loss: 0.1751389741897583
epoch: 2 training_loss 0.17203418783843516 test_loss: 0.15990089178085326
epoch: 3 training_loss 0.14698109701275824 test_loss: 0.16907596588134766
epoch: 4 training_loss 0.14854391813278198 test_loss: 0.15473549365997313
epoch: 5 training_loss 0.14955046128481628 test_loss: 0.13947935104370118
epoch: 6 training_loss 0.13982902858406304 test_loss: 0.13086587190628052
epoch: 7 training_loss 0.1463321927934885 test_loss: 0.15096826553344728
epoch: 8 training_loss 0.14032701820135116 test_loss: 0.13955038785934448
epoch: 9 training_loss 0.13563647042959928 test_loss: 0.1274225354194641
epoch: 10 training_loss 0.12994973547756672 test_loss: 0.13658678531646729
epoch: 11 training_loss 0.130933207757771 test_loss: 0.18087418079376222
epoch: 12 training_loss 0.13894307896494865 test_loss: 0.12792956829071045
epoch: 13 training_loss 0.1296091915294528 test_loss: 0.13126220703125
epoch: 14 training_loss 0.1311670133844018 test_loss: 0.12504849433898926
epoch: 15 training_loss 0.13402524542063474 test_loss: 0.10939738750457764
epoch: 16 training_loss 0.12395343452692031 test_loss: 0.13198498487472535
epoch: 17 training_loss 0.12343493273481726 test_loss: 0.172970974445343
epoch: 18 training_loss 0.12996786434203386 test_loss: 0.13535189628601074
epoch: 19 training_loss 0.1223871748521924 test_loss: 0.1417815566062927
epoch: 20 training_loss 0.12790464267134666 test_loss: 0.12804569005966188
epoch: 21 training_loss 0.12700406182557344 test_loss: 0.11027761697769164
epoch: 22 training_loss 0.12241889081895352 test_loss: 0.11551830768585206
epoch: 23 training_loss 0.12544029619544744 test_loss: 0.13251761198043824
epoch: 24 training_loss 0.12506552647799254 test_loss: 0.12553999423980713
epoch: 25 training_loss 0.12800145104527474 test_loss: 0.12386720180511475
epoch: 26 training_loss 0.1282488115504384 test_loss: 0.134350049495697
epoch: 27 training_loss 0.12278900118544697 test_loss: 0.12302356958389282
epoch: 28 training_loss 0.1287744838371873 test_loss: 0.1268441081047058
epoch: 29 training_loss 0.11929608527570963 test_loss: 0.11758983135223389
epoch: 30 training_loss 0.11661519598215818 test_loss: 0.136155104637146
epoch: 31 training_loss 0.11231922563165427 test_loss: 0.13362654447555541
epoch: 32 training_loss 0.11831712931394577 test_loss: 0.14392099380493165
epoch: 33 training_loss 0.11050450503826141 test_loss: 0.14200332164764404
epoch: 34 training_loss 0.1201818972826004 test_loss: 0.12520653009414673
epoch: 35 training_loss 0.1246320715546608 test_loss: 0.12998369932174683
epoch: 36 training_loss 0.11848521687090396 test_loss: 0.11727532148361205
epoch: 37 training_loss 0.11625045021995901 test_loss: 0.12445350885391235
epoch: 38 training_loss 0.11773645998910069 test_loss: 0.1219509482383728
epoch: 39 training_loss 0.1236120718717575 test_loss: 0.12169164419174194
epoch: 40 training_loss 0.12599803145974875 test_loss: 0.11804308891296386
epoch: 41 training_loss 0.13405692081898452 test_loss: 0.15363513231277465
epoch: 42 training_loss 0.11933029897511005 test_loss: 0.12925175428390503
epoch: 43 training_loss 0.12227588009089231 test_loss: 0.13250563144683838
epoch: 44 training_loss 0.12209597520530224 test_loss: 0.13840504884719848
epoch: 45 training_loss 0.11771445069462061 test_loss: 0.10204886198043824
epoch: 46 training_loss 0.11985564248636366 test_loss: 0.1277977705001831
epoch: 47 training_loss 0.11238902419805527 test_loss: 0.1484112858772278
epoch: 48 training_loss 0.11596694201231003 test_loss: 0.1476476788520813
epoch: 49 training_loss 0.11243970513343811 test_loss: 0.102595853805542
epoch: 50 training_loss 0.11897175777703524 test_loss: 0.12061676979064942
epoch: 51 training_loss 0.11863619988784194 test_loss: 0.12485367059707642
epoch: 52 training_loss 0.12181974802166223 test_loss: 0.1373937726020813
epoch: 53 training_loss 0.11575822673738002 test_loss: 0.1335151195526123
epoch: 54 training_loss 0.11957633204758167 test_loss: 0.11723380088806153
epoch: 55 training_loss 0.1156323585100472 test_loss: 0.12796921730041505
epoch: 56 training_loss 0.11161205563694239 test_loss: 0.12916241884231566
epoch: 57 training_loss 0.12536734897643328 test_loss: 0.13375473022460938
epoch: 58 training_loss 0.12483016841113567 test_loss: 0.11174263954162597
epoch: 59 training_loss 0.11926410419866443 test_loss: 0.13242926597595214
epoch: 60 training_loss 0.11874717850238085 test_loss: 0.1207917332649231
epoch: 61 training_loss 0.11695201948285103 test_loss: 0.12576237916946412
epoch: 62 training_loss 0.1168353409320116 test_loss: 0.12876874208450317
epoch: 63 training_loss 0.11153889138251544 test_loss: 0.10338647365570068
epoch: 64 training_loss 0.11845135141164065 test_loss: 0.10737093687057495
epoch: 65 training_loss 0.11873619981110096 test_loss: 0.12514468431472778
epoch: 66 training_loss 0.11856889072805643 test_loss: 0.10424082279205323
epoch: 67 training_loss 0.11956334168091416 test_loss: 0.11940646171569824
epoch: 68 training_loss 0.1168926327675581 test_loss: 0.11359890699386596
epoch: 69 training_loss 0.10674231648445129 test_loss: 0.12147213220596313
epoch: 70 training_loss 0.12764704655855894 test_loss: 0.12565747499465943
epoch: 71 training_loss 0.11656298838555813 test_loss: 0.11082072257995605
epoch: 72 training_loss 0.10995530057698488 test_loss: 0.11014885902404785
epoch: 73 training_loss 0.11029138442128897 test_loss: 0.11398966312408447
epoch: 74 training_loss 0.11542014010250569 test_loss: 0.12019913196563721
epoch: 75 training_loss 0.11313367061316967 test_loss: 0.1308587908744812
epoch: 76 training_loss 0.11195032607764005 test_loss: 0.12909836769104005
epoch: 77 training_loss 0.1159261104464531 test_loss: 0.11270794868469239
epoch: 78 training_loss 0.12327547904103994 test_loss: 0.11147288084030152
epoch: 79 training_loss 0.11599263902753591 test_loss: 0.1160852313041687
epoch: 80 training_loss 0.12094028552994132 test_loss: 0.13810452222824096
epoch: 81 training_loss 0.11394446074962616 test_loss: 0.1266353487968445
epoch: 82 training_loss 0.11568176660686731 test_loss: 0.1164621353149414
epoch: 83 training_loss 0.12415418200194836 test_loss: 0.12473526000976562
epoch: 84 training_loss 0.11617591358721256 test_loss: 0.12266887426376342
epoch: 85 training_loss 0.11257038660347461 test_loss: 0.14092470407485963
epoch: 86 training_loss 0.11861309517174959 test_loss: 0.1292663812637329
epoch: 87 training_loss 0.11351065224036574 test_loss: 0.13425877094268798
epoch: 88 training_loss 0.11368080150336027 test_loss: 0.11761008501052857
epoch: 89 training_loss 0.12291145205497742 test_loss: 0.13963372707366944
epoch: 90 training_loss 0.11212711997330188 test_loss: 0.124450421333313
epoch: 91 training_loss 0.11176437446847558 test_loss: 0.11863009929656983
epoch: 92 training_loss 0.11681777901947499 test_loss: 0.12293390035629273
epoch: 93 training_loss 0.11162607761099935 test_loss: 0.13764230012893677
epoch: 94 training_loss 0.11674890663474798 test_loss: 0.12977458238601686
epoch: 95 training_loss 0.12411925602704287 test_loss: 0.1366149067878723
epoch: 96 training_loss 0.11646174140274525 test_loss: 0.11500015258789062
epoch: 97 training_loss 0.11535723641514778 test_loss: 0.12220184803009033
epoch: 98 training_loss 0.10645899849012494 test_loss: 0.11301002502441407
epoch: 99 training_loss 0.11778450004756451 test_loss: 0.11728156805038452
epoch: 100 training_loss 0.11327828405424953 test_loss: 0.11296128034591675
epoch: 101 training_loss 0.11935379266738892 test_loss: 0.13489871025085448
epoch: 102 training_loss 0.11817167665809393 test_loss: 0.10641725063323974
epoch: 103 training_loss 0.11820762030780316 test_loss: 0.12472771406173706
epoch: 104 training_loss 0.1154513131827116 test_loss: 0.13480242490768432
epoch: 105 training_loss 0.12192925212904811 test_loss: 0.09872533082962036
epoch: 106 training_loss 0.11316274542361499 test_loss: 0.12157390117645264
epoch: 107 training_loss 0.11945139229297638 test_loss: 0.1335052251815796
epoch: 108 training_loss 0.11689640175551176 test_loss: 0.12519341707229614
epoch: 109 training_loss 0.1109454757347703 test_loss: 0.12416285276412964
epoch: 110 training_loss 0.1195244139432907 test_loss: 0.11965911388397217
epoch: 111 training_loss 0.1130176967009902 test_loss: 0.12145371437072754
epoch: 112 training_loss 0.11883501805365086 test_loss: 0.1163110613822937
epoch: 113 training_loss 0.11882686600089074 test_loss: 0.13039097785949708
epoch: 114 training_loss 0.11489801585674286 test_loss: 0.10646148920059204
epoch: 115 training_loss 0.11280322801321745 test_loss: 0.10137752294540406
epoch: 116 training_loss 0.11942053381353616 test_loss: 0.12731190919876098
epoch: 117 training_loss 0.11350535159930587 test_loss: 0.12408267259597779
epoch: 118 training_loss 0.11144276842474937 test_loss: 0.11166290044784546
epoch: 119 training_loss 0.11615064281970262 test_loss: 0.09984028339385986
epoch: 120 training_loss 0.1189951016753912 test_loss: 0.11565189361572266
epoch: 121 training_loss 0.1170214781910181 test_loss: 0.11110514402389526
epoch: 122 training_loss 0.11011634530499577 test_loss: 0.12892708778381348
epoch: 123 training_loss 0.11052800208330155 test_loss: 0.10787999629974365
epoch: 124 training_loss 0.10738679951056838 test_loss: 0.11769360303878784
epoch: 125 training_loss 0.11810506906360388 test_loss: 0.11295714378356933
epoch: 126 training_loss 0.11772088730707765 test_loss: 0.10704580545425416
epoch: 127 training_loss 0.1060906557366252 test_loss: 0.11433316469192505
epoch: 128 training_loss 0.11790901783853769 test_loss: 0.1161351203918457
epoch: 129 training_loss 0.11204716373234987 test_loss: 0.10129131078720092
epoch: 130 training_loss 0.12038991887122392 test_loss: 0.11699857711791992
epoch: 131 training_loss 0.11990933205932379 test_loss: 0.11662180423736572
epoch: 132 training_loss 0.11156635574996471 test_loss: 0.11818587779998779
epoch: 133 training_loss 0.11535938661545515 test_loss: 0.09744601249694824
epoch: 134 training_loss 0.1149739871174097 test_loss: 0.11603808403015137
epoch: 135 training_loss 0.11489322911947966 test_loss: 0.10713415145874024
epoch: 136 training_loss 0.11004020635038614 test_loss: 0.13760398626327514
epoch: 137 training_loss 0.10590827200561762 test_loss: 0.1138684630393982
epoch: 138 training_loss 0.11605544511228799 test_loss: 0.12990729808807372
epoch: 139 training_loss 0.11842827431857586 test_loss: 0.12794363498687744
epoch: 140 training_loss 0.10941567478701472 test_loss: 0.11793342828750611
epoch: 141 training_loss 0.11432748436927795 test_loss: 0.12167268991470337
epoch: 142 training_loss 0.11471029907464982 test_loss: 0.11071943044662476
epoch: 143 training_loss 0.11109021414071321 test_loss: 0.11857025623321533
epoch: 144 training_loss 0.11674642495810986 test_loss: 0.12121256589889526
epoch: 145 training_loss 0.11575830597430467 test_loss: 0.12533910274505616
epoch: 146 training_loss 0.10608743270859122 test_loss: 0.11407216787338256
epoch: 147 training_loss 0.11792785525321961 test_loss: 0.11065019369125366
epoch: 148 training_loss 0.11729868158698081 test_loss: 0.11008176803588868
epoch: 149 training_loss 0.11188444592058659 test_loss: 0.13561813831329345
epoch: 0 training_loss 0.2652276346087456 test_loss: 0.2171947956085205
epoch: 1 training_loss 0.1871980708092451 test_loss: 0.15830774307250978
epoch: 2 training_loss 0.17052853833884002 test_loss: 0.19211553335189818
epoch: 3 training_loss 0.16968968659639358 test_loss: 0.1748402953147888
epoch: 4 training_loss 0.15275243896991014 test_loss: 0.14789836406707763
epoch: 5 training_loss 0.15768822852522135 test_loss: 0.14717093706130982
epoch: 6 training_loss 0.1451210440322757 test_loss: 0.14424488544464112
epoch: 7 training_loss 0.15270047906786202 test_loss: 0.1551659107208252
epoch: 8 training_loss 0.14045654967427254 test_loss: 0.12952433824539183
epoch: 9 training_loss 0.14321787122637034 test_loss: 0.12521760463714598
epoch: 10 training_loss 0.1325009487941861 test_loss: 0.15066550970077514
epoch: 11 training_loss 0.1310547449067235 test_loss: 0.1445665955543518
epoch: 12 training_loss 0.13348055291920902 test_loss: 0.1487409234046936
epoch: 13 training_loss 0.1376265177130699 test_loss: 0.1431326150894165
epoch: 14 training_loss 0.1269432630762458 test_loss: 0.14118808507919312
epoch: 15 training_loss 0.13393088087439536 test_loss: 0.1477250337600708
epoch: 16 training_loss 0.12693532764911652 test_loss: 0.13859472274780274
epoch: 17 training_loss 0.12724828209728004 test_loss: 0.1404491424560547
epoch: 18 training_loss 0.13286487985402345 test_loss: 0.1558797240257263
epoch: 19 training_loss 0.1360149809718132 test_loss: 0.13386045694351195
epoch: 20 training_loss 0.12388605969026685 test_loss: 0.12833683490753173
epoch: 21 training_loss 0.1226779779046774 test_loss: 0.11820573806762695
epoch: 22 training_loss 0.12230940978974104 test_loss: 0.13456844091415404
epoch: 23 training_loss 0.12242901533842086 test_loss: 0.12766729593276976
epoch: 24 training_loss 0.12383200004696845 test_loss: 0.1337297558784485
epoch: 25 training_loss 0.12018647970631718 test_loss: 0.11917417049407959
epoch: 26 training_loss 0.12485127676278353 test_loss: 0.13611166477203368
epoch: 27 training_loss 0.12679130006581546 test_loss: 0.12924542427062988
epoch: 28 training_loss 0.1345522141084075 test_loss: 0.14217582941055298
epoch: 29 training_loss 0.1253039376810193 test_loss: 0.14142218828201295
epoch: 30 training_loss 0.13082666397094728 test_loss: 0.12905899286270142
epoch: 31 training_loss 0.12224253207445145 test_loss: 0.13484454154968262
epoch: 32 training_loss 0.12242996007204056 test_loss: 0.15860408544540405
epoch: 33 training_loss 0.11360943781211973 test_loss: 0.1425703287124634
epoch: 34 training_loss 0.1141457991115749 test_loss: 0.12726123332977296
epoch: 35 training_loss 0.12435808841139079 test_loss: 0.12600200176239013
epoch: 36 training_loss 0.1154083782248199 test_loss: 0.14673149585723877
epoch: 37 training_loss 0.11757435441017151 test_loss: 0.12710244655609132
epoch: 38 training_loss 0.13180889189243317 test_loss: 0.1350970983505249
epoch: 39 training_loss 0.11857465885579586 test_loss: 0.14636316299438476
epoch: 40 training_loss 0.11806782487779856 test_loss: 0.13323349952697755
epoch: 41 training_loss 0.12527247630059718 test_loss: 0.1437330961227417
epoch: 42 training_loss 0.12115060187876224 test_loss: 0.12024832963943481
epoch: 43 training_loss 0.11596635933965445 test_loss: 0.11769009828567505
epoch: 44 training_loss 0.12280708591453732 test_loss: 0.1333785891532898
epoch: 45 training_loss 0.12409221868962049 test_loss: 0.1158257007598877
epoch: 46 training_loss 0.11718221435323357 test_loss: 0.1267501473426819
epoch: 47 training_loss 0.11536389425396919 test_loss: 0.12736887931823732
epoch: 48 training_loss 0.1123123299703002 test_loss: 0.1347745895385742
epoch: 49 training_loss 0.1247720666974783 test_loss: 0.1258683681488037
epoch: 50 training_loss 0.12569837845861911 test_loss: 0.12191416025161743
epoch: 51 training_loss 0.12029570762068033 test_loss: 0.11545413732528687
epoch: 52 training_loss 0.1259331527352333 test_loss: 0.13800599575042724
epoch: 53 training_loss 0.12667447492480277 test_loss: 0.12013627290725708
epoch: 54 training_loss 0.11647195342928171 test_loss: 0.13039422035217285
epoch: 55 training_loss 0.12150264326483011 test_loss: 0.13221440315246583
epoch: 56 training_loss 0.11233939357101917 test_loss: 0.13646734952926637
epoch: 57 training_loss 0.11656712392345071 test_loss: 0.1123235583305359
epoch: 58 training_loss 0.1179668184183538 test_loss: 0.1485777735710144
epoch: 59 training_loss 0.12707990158349275 test_loss: 0.13003391027450562
epoch: 60 training_loss 0.11167044218629599 test_loss: 0.1345156192779541
epoch: 61 training_loss 0.11042128838598728 test_loss: 0.12231227159500122
epoch: 62 training_loss 0.10747747028246522 test_loss: 0.11016514301300048
epoch: 63 training_loss 0.12070844385772944 test_loss: 0.11864159107208253
epoch: 64 training_loss 0.1069571766629815 test_loss: 0.11756534576416015
epoch: 65 training_loss 0.11324367694556713 test_loss: 0.12441787719726563
epoch: 66 training_loss 0.12253455441445112 test_loss: 0.11410388946533204
epoch: 67 training_loss 0.11985612716525793 test_loss: 0.13055167198181153
epoch: 68 training_loss 0.12345027003437281 test_loss: 0.1269379138946533
epoch: 69 training_loss 0.1157493656873703 test_loss: 0.1458137273788452
epoch: 70 training_loss 0.1135529045201838 test_loss: 0.11348035335540771
epoch: 71 training_loss 0.11232856696471571 test_loss: 0.14647735357284547
epoch: 72 training_loss 0.11241865854710341 test_loss: 0.1290177583694458
epoch: 73 training_loss 0.11313296934589744 test_loss: 0.15052626132965088
epoch: 74 training_loss 0.11174284979701042 test_loss: 0.13412872552871705
epoch: 75 training_loss 0.11605045635253192 test_loss: 0.14527640342712403
epoch: 76 training_loss 0.11451889242976904 test_loss: 0.12668946981430054
epoch: 77 training_loss 0.11663136918097734 test_loss: 0.12155152559280395
epoch: 78 training_loss 0.10999270502477884 test_loss: 0.11944082975387574
epoch: 79 training_loss 0.11356349833309651 test_loss: 0.14213063716888427
epoch: 80 training_loss 0.1094647116586566 test_loss: 0.12230799198150635
epoch: 81 training_loss 0.1138848065584898 test_loss: 0.14139463901519775
epoch: 82 training_loss 0.12407093957066535 test_loss: 0.1247178554534912
epoch: 83 training_loss 0.11401890663430095 test_loss: 0.12140039205551148
epoch: 84 training_loss 0.11962336532771588 test_loss: 0.11819227933883666
epoch: 85 training_loss 0.11678906995803118 test_loss: 0.10537108182907104
epoch: 86 training_loss 0.11179529912769795 test_loss: 0.12974901199340821
epoch: 87 training_loss 0.11677687004208565 test_loss: 0.14181814193725586
epoch: 88 training_loss 0.112383156940341 test_loss: 0.1313697099685669
epoch: 89 training_loss 0.1190650049969554 test_loss: 0.11910070180892944
epoch: 90 training_loss 0.12064513932913541 test_loss: 0.13061872720718384
epoch: 91 training_loss 0.11875515088438987 test_loss: 0.1110646367073059
epoch: 92 training_loss 0.11744481567293405 test_loss: 0.11809875965118408
epoch: 93 training_loss 0.11068325959146023 test_loss: 0.12293146848678589
epoch: 94 training_loss 0.11029303634539246 test_loss: 0.13680477142333985
epoch: 95 training_loss 0.122416417542845 test_loss: 0.10503875017166138
epoch: 96 training_loss 0.12047400690615177 test_loss: 0.13050336837768556
epoch: 97 training_loss 0.11732161037623882 test_loss: 0.12878066301345825
epoch: 98 training_loss 0.11763983583077789 test_loss: 0.12564212083816528
epoch: 99 training_loss 0.10086290733888745 test_loss: 0.11633890867233276
epoch: 100 training_loss 0.12743875585496425 test_loss: 0.1302361845970154
epoch: 101 training_loss 0.10931350015103818 test_loss: 0.12761040925979614
epoch: 102 training_loss 0.11085455201566219 test_loss: 0.1464290976524353
epoch: 103 training_loss 0.12349626053124667 test_loss: 0.1265757441520691
epoch: 104 training_loss 0.11549203135073186 test_loss: 0.12997021675109863
epoch: 105 training_loss 0.10868792917579412 test_loss: 0.11812754869461059
epoch: 106 training_loss 0.11520179534330964 test_loss: 0.15656054019927979
epoch: 107 training_loss 0.11200859602540732 test_loss: 0.13241788148880004
epoch: 108 training_loss 0.11234554681926966 test_loss: 0.14450684785842896
epoch: 109 training_loss 0.11587751541286707 test_loss: 0.12403936386108398
epoch: 110 training_loss 0.10492979785427452 test_loss: 0.1415343165397644
epoch: 111 training_loss 0.10693800231441855 test_loss: 0.12666828632354737
epoch: 112 training_loss 0.10914082646369934 test_loss: 0.13045588731765748
epoch: 113 training_loss 0.10490664929151534 test_loss: 0.118213951587677
epoch: 114 training_loss 0.110153219550848 test_loss: 0.11784161329269409
epoch: 115 training_loss 0.11979689119383692 test_loss: 0.11381814479827881
epoch: 116 training_loss 0.11618238411843777 test_loss: 0.1424974799156189
epoch: 117 training_loss 0.10794413682073355 test_loss: 0.11668450832366943
epoch: 118 training_loss 0.11156943457201124 test_loss: 0.14444879293441773
epoch: 119 training_loss 0.1210376139357686 test_loss: 0.11796718835830688
epoch: 120 training_loss 0.1156077367812395 test_loss: 0.142436683177948
epoch: 121 training_loss 0.11203856956213713 test_loss: 0.14372422695159912
epoch: 122 training_loss 0.1240654107183218 test_loss: 0.12282487154006957
epoch: 123 training_loss 0.1110801201313734 test_loss: 0.11898961067199706
epoch: 124 training_loss 0.11766886297613383 test_loss: 0.12924503087997435
epoch: 125 training_loss 0.11169392559677363 test_loss: 0.11042757034301758
epoch: 126 training_loss 0.10725404437631368 test_loss: 0.13517975807189941
epoch: 127 training_loss 0.11475903380662203 test_loss: 0.13174935579299926
epoch: 128 training_loss 0.11601350491866469 test_loss: 0.12199403047561645
epoch: 129 training_loss 0.11564019951969386 test_loss: 0.11545279026031494
epoch: 130 training_loss 0.10678368452936411 test_loss: 0.11678564548492432
epoch: 131 training_loss 0.11478541549295188 test_loss: 0.13192371129989625
epoch: 132 training_loss 0.11290739823132753 test_loss: 0.1445408821105957
epoch: 133 training_loss 0.11954548642039299 test_loss: 0.10900962352752686
epoch: 134 training_loss 0.11210815111175179 test_loss: 0.11692736148834229
epoch: 135 training_loss 0.1098291738703847 test_loss: 0.14517818689346312
epoch: 136 training_loss 0.1189807390794158 test_loss: 0.12035351991653442
epoch: 137 training_loss 0.10701835960149765 test_loss: 0.12910760641098024
epoch: 138 training_loss 0.11307784531265497 test_loss: 0.12112464904785156
epoch: 139 training_loss 0.12296493079513311 test_loss: 0.11636154651641846
epoch: 140 training_loss 0.11673223797231913 test_loss: 0.1139917254447937
epoch: 141 training_loss 0.11274928763508797 test_loss: 0.14091553688049316
epoch: 142 training_loss 0.1125853031873703 test_loss: 0.11704555749893189
epoch: 143 training_loss 0.11269452776759863 test_loss: 0.12586305141448975
epoch: 144 training_loss 0.11549522206187249 test_loss: 0.11758569478988648
epoch: 145 training_loss 0.11706258788704872 test_loss: 0.12012592554092408
epoch: 146 training_loss 0.11531598333269358 test_loss: 0.13989830017089844
epoch: 147 training_loss 0.11472300443798304 test_loss: 0.14105355739593506
epoch: 148 training_loss 0.10475288800895215 test_loss: 0.14314261674880982
epoch: 149 training_loss 0.11506673470139503 test_loss: 0.10917161703109741
epoch: 0 training_loss 0.27541712366044524 test_loss: 0.2099665880203247
epoch: 1 training_loss 0.19394448027014732 test_loss: 0.16625142097473145
epoch: 2 training_loss 0.16378132987767458 test_loss: 0.14415611028671266
epoch: 3 training_loss 0.1658249718695879 test_loss: 0.1567897915840149
epoch: 4 training_loss 0.1513754663616419 test_loss: 0.16999459266662598
epoch: 5 training_loss 0.14627327919006347 test_loss: 0.18896769285202025
epoch: 6 training_loss 0.14847623649984598 test_loss: 0.1616509199142456
epoch: 7 training_loss 0.1424615179747343 test_loss: 0.12676446437835692
epoch: 8 training_loss 0.13961656473577022 test_loss: 0.1459120512008667
epoch: 9 training_loss 0.13577861243858932 test_loss: 0.1432763695716858
epoch: 10 training_loss 0.14031466618180274 test_loss: 0.1398207426071167
epoch: 11 training_loss 0.14533371292054653 test_loss: 0.14235754013061525
epoch: 12 training_loss 0.12848346445709466 test_loss: 0.13779643774032593
epoch: 13 training_loss 0.14461547177284956 test_loss: 0.12312942743301392
epoch: 14 training_loss 0.1275274884328246 test_loss: 0.11715073585510254
epoch: 15 training_loss 0.13118029691278935 test_loss: 0.13951435089111328
epoch: 16 training_loss 0.13128487326204777 test_loss: 0.13008402585983275
epoch: 17 training_loss 0.12027956377714873 test_loss: 0.12437410354614258
epoch: 18 training_loss 0.13136124651879072 test_loss: 0.12170149087905884
epoch: 19 training_loss 0.13229873776435852 test_loss: 0.11883918046951295
epoch: 20 training_loss 0.12687002629041672 test_loss: 0.12768361568450928
epoch: 21 training_loss 0.13300147717818617 test_loss: 0.14832214117050171
epoch: 22 training_loss 0.11622677531093359 test_loss: 0.11813917160034179
epoch: 23 training_loss 0.1185207348689437 test_loss: 0.1401537537574768
epoch: 24 training_loss 0.12735043477267027 test_loss: 0.11751327514648438
epoch: 25 training_loss 0.1223714710958302 test_loss: 0.1318935513496399
epoch: 26 training_loss 0.12626839369535447 test_loss: 0.1211856484413147
epoch: 27 training_loss 0.12052380554378032 test_loss: 0.11581243276596069
epoch: 28 training_loss 0.11955164916813374 test_loss: 0.1430330753326416
epoch: 29 training_loss 0.12327976498752832 test_loss: 0.13275840282440185
epoch: 30 training_loss 0.1306820045784116 test_loss: 0.12311245203018188
epoch: 31 training_loss 0.1175633305311203 test_loss: 0.1268961787223816
epoch: 32 training_loss 0.11630966503173112 test_loss: 0.13086647987365724
epoch: 33 training_loss 0.11720954250544309 test_loss: 0.14294742345809935
epoch: 34 training_loss 0.11960693094879389 test_loss: 0.1196818232536316
epoch: 35 training_loss 0.12577803891152142 test_loss: 0.11666656732559204
epoch: 36 training_loss 0.11762850433588028 test_loss: 0.12508472204208373
epoch: 37 training_loss 0.11872310768812895 test_loss: 0.13222991228103637
epoch: 38 training_loss 0.12839640431106092 test_loss: 0.1373790979385376
epoch: 39 training_loss 0.11703573394566774 test_loss: 0.12632566690444946
epoch: 40 training_loss 0.12355364456772805 test_loss: 0.12602945566177368
epoch: 41 training_loss 0.1287751435302198 test_loss: 0.14875080585479736
epoch: 42 training_loss 0.12415731064975262 test_loss: 0.1350459337234497
epoch: 43 training_loss 0.12157074850052595 test_loss: 0.12962765693664552
epoch: 44 training_loss 0.11709580667316914 test_loss: 0.13886651992797852
epoch: 45 training_loss 0.1213277456909418 test_loss: 0.13067883253097534
epoch: 46 training_loss 0.12173615466803313 test_loss: 0.12253665924072266
epoch: 47 training_loss 0.12225461628288031 test_loss: 0.1280922532081604
epoch: 48 training_loss 0.12552369764074683 test_loss: 0.09950008988380432
epoch: 49 training_loss 0.11876852672547102 test_loss: 0.12539750337600708
epoch: 50 training_loss 0.1316465376317501 test_loss: 0.12082226276397705
epoch: 51 training_loss 0.12250135406851768 test_loss: 0.11859183311462403
epoch: 52 training_loss 0.12384148232638836 test_loss: 0.12230896949768066
epoch: 53 training_loss 0.1135674401372671 test_loss: 0.13121938705444336
epoch: 54 training_loss 0.11638538166880608 test_loss: 0.14656147956848145
epoch: 55 training_loss 0.11574895586818457 test_loss: 0.11231716871261596
epoch: 56 training_loss 0.11962527096271515 test_loss: 0.10135847330093384
epoch: 57 training_loss 0.11960854511708022 test_loss: 0.11402784585952759
epoch: 58 training_loss 0.11200423035770654 test_loss: 0.11196564435958863
epoch: 59 training_loss 0.11805037066340446 test_loss: 0.13412861824035643
epoch: 60 training_loss 0.11218274487182497 test_loss: 0.11050128936767578
epoch: 61 training_loss 0.11084888188168407 test_loss: 0.13021799325942993
epoch: 62 training_loss 0.11266575470566749 test_loss: 0.12899774312973022
epoch: 63 training_loss 0.1165725664049387 test_loss: 0.11857143640518189
epoch: 64 training_loss 0.11484513603150845 test_loss: 0.12389823198318481
epoch: 65 training_loss 0.11585569716989993 test_loss: 0.12947431802749634
epoch: 66 training_loss 0.11789504066109657 test_loss: 0.1136825442314148
epoch: 67 training_loss 0.11404042160138488 test_loss: 0.11913765668869018
epoch: 68 training_loss 0.1149581727012992 test_loss: 0.12337272167205811
epoch: 69 training_loss 0.11588517002761364 test_loss: 0.12453300952911377
epoch: 70 training_loss 0.11903554860502481 test_loss: 0.11216082572937011
epoch: 71 training_loss 0.11387830216437578 test_loss: 0.12638252973556519
epoch: 72 training_loss 0.11796288918703794 test_loss: 0.13384172916412354
epoch: 73 training_loss 0.11737937645986676 test_loss: 0.10124192237854004
epoch: 74 training_loss 0.12034694388508797 test_loss: 0.11729000806808472
epoch: 75 training_loss 0.11297446068376303 test_loss: 0.11788204908370972
epoch: 76 training_loss 0.11555793385952712 test_loss: 0.1182555913925171
epoch: 77 training_loss 0.10655326630920171 test_loss: 0.12133215665817261
epoch: 78 training_loss 0.11780132111161948 test_loss: 0.1157884955406189
epoch: 79 training_loss 0.1206894899904728 test_loss: 0.13587939739227295
epoch: 80 training_loss 0.1211559546738863 test_loss: 0.1164980411529541
epoch: 81 training_loss 0.11715268881991506 test_loss: 0.11754482984542847
epoch: 82 training_loss 0.11575725411996246 test_loss: 0.13238489627838135
epoch: 83 training_loss 0.11076696019619703 test_loss: 0.12046314477920532
epoch: 84 training_loss 0.12639040902256965 test_loss: 0.11422944068908691
epoch: 85 training_loss 0.12160333164036274 test_loss: 0.11544513702392578
epoch: 86 training_loss 0.11097097739577294 test_loss: 0.10715556144714355
epoch: 87 training_loss 0.11406723327934742 test_loss: 0.13084765672683715
epoch: 88 training_loss 0.11805185582488775 test_loss: 0.11472828388214111
epoch: 89 training_loss 0.1121665594354272 test_loss: 0.12089998722076416
epoch: 90 training_loss 0.11447812020778655 test_loss: 0.15226777791976928
epoch: 91 training_loss 0.11301683686673641 test_loss: 0.13993197679519653
epoch: 92 training_loss 0.12074349213391543 test_loss: 0.10663294792175293
epoch: 93 training_loss 0.11062592405825854 test_loss: 0.12314469814300537
epoch: 94 training_loss 0.10955190306529403 test_loss: 0.11639980077743531
epoch: 95 training_loss 0.11463334262371064 test_loss: 0.12254842519760131
epoch: 96 training_loss 0.11441537175327539 test_loss: 0.12874505519866944
epoch: 97 training_loss 0.12354106437414884 test_loss: 0.12157801389694214
epoch: 98 training_loss 0.11720010675489903 test_loss: 0.11175717115402221
epoch: 99 training_loss 0.11296711347997189 test_loss: 0.12392188310623169
epoch: 100 training_loss 0.11962422147393227 test_loss: 0.11367212533950806
epoch: 101 training_loss 0.11634120170027018 test_loss: 0.1435442328453064
epoch: 102 training_loss 0.1058594754897058 test_loss: 0.12708780765533448
epoch: 103 training_loss 0.11348925326019525 test_loss: 0.11196212768554688
epoch: 104 training_loss 0.1239059415459633 test_loss: 0.1089428186416626
epoch: 105 training_loss 0.12489449698477983 test_loss: 0.13057183027267455
epoch: 106 training_loss 0.1280272848904133 test_loss: 0.1137574553489685
epoch: 107 training_loss 0.11369171915575862 test_loss: 0.09968618154525757
epoch: 108 training_loss 0.12355684598907829 test_loss: 0.1374130964279175
epoch: 109 training_loss 0.126066563911736 test_loss: 0.12365666627883912
epoch: 110 training_loss 0.11342680409550666 test_loss: 0.11729415655136108
epoch: 111 training_loss 0.124789136685431 test_loss: 0.12061649560928345
epoch: 112 training_loss 0.1125509094633162 test_loss: 0.12467832565307617
epoch: 113 training_loss 0.10881201896816492 test_loss: 0.1290225625038147
epoch: 114 training_loss 0.11664642088115215 test_loss: 0.13785176277160643
epoch: 115 training_loss 0.11018958788365125 test_loss: 0.12346808910369873
epoch: 116 training_loss 0.1151217707619071 test_loss: 0.12281373739242554
epoch: 117 training_loss 0.11634090177714825 test_loss: 0.12545651197433472
epoch: 118 training_loss 0.1114966742694378 test_loss: 0.11014739274978638
epoch: 119 training_loss 0.11376697808504105 test_loss: 0.12194020748138427
epoch: 120 training_loss 0.1212124452739954 test_loss: 0.10895938873291015
epoch: 121 training_loss 0.11484360609203577 test_loss: 0.116252863407135
epoch: 122 training_loss 0.11522237669676542 test_loss: 0.10593782663345337
epoch: 123 training_loss 0.12864395271986723 test_loss: 0.13017730712890624
epoch: 124 training_loss 0.11845310050994158 test_loss: 0.1057200312614441
epoch: 125 training_loss 0.11216106379404664 test_loss: 0.13256131410598754
epoch: 126 training_loss 0.11675448209047318 test_loss: 0.1266951560974121
epoch: 127 training_loss 0.11530512504279614 test_loss: 0.12883005142211915
epoch: 128 training_loss 0.11895560618489981 test_loss: 0.13920055627822875
epoch: 129 training_loss 0.11831350335851312 test_loss: 0.12221676111221313
epoch: 130 training_loss 0.11289655461907387 test_loss: 0.12270703315734863
epoch: 131 training_loss 0.11438103552907705 test_loss: 0.1234437108039856
epoch: 132 training_loss 0.11338751524686813 test_loss: 0.11521744728088379
epoch: 133 training_loss 0.11630435973405838 test_loss: 0.12734543085098265
epoch: 134 training_loss 0.12063319487497211 test_loss: 0.12078218460083008
epoch: 135 training_loss 0.10925915360450744 test_loss: 0.12195819616317749
epoch: 136 training_loss 0.11689631782472133 test_loss: 0.1310031771659851
epoch: 137 training_loss 0.11485489685088396 test_loss: 0.11954045295715332
epoch: 138 training_loss 0.1144158991985023 test_loss: 0.11695046424865722
epoch: 139 training_loss 0.1173836431838572 test_loss: 0.12220524549484253
epoch: 140 training_loss 0.1175807112082839 test_loss: 0.11658384799957275
epoch: 141 training_loss 0.1208770740032196 test_loss: 0.10839788913726807
epoch: 142 training_loss 0.11841965552419424 test_loss: 0.10409890413284302
epoch: 143 training_loss 0.11803229831159115 test_loss: 0.10339467525482178
epoch: 144 training_loss 0.11434095900505781 test_loss: 0.12320687770843505
epoch: 145 training_loss 0.11635633926838636 test_loss: 0.14182502031326294
epoch: 146 training_loss 0.11623706541955471 test_loss: 0.1233772873878479
epoch: 147 training_loss 0.12390497721731662 test_loss: 0.11610314846038819
epoch: 148 training_loss 0.11270747937262059 test_loss: 0.10218611955642701
epoch: 149 training_loss 0.10968758180737495 test_loss: 0.122661554813385
episode: 0 training return: -1531.2533804007417
episode: 1 training return: -1564.006097277557
episode: 2 training return: -1679.568491461568
episode: 3 training return: -1493.0659998830743
epoch: 1 test_true_pfm: -61.423120286978225 sim_pfm: -1038.5278588832823
episode: 4 training return: -1510.1345243549722
episode: 5 training return: -1143.35545812219
episode: 6 training return: -1270.9743243627943
episode: 7 training return: -1418.8133104686576
epoch: 2 test_true_pfm: 378.23537986287766 sim_pfm: -906.9429437795403
episode: 8 training return: -1210.4473711419014
episode: 9 training return: -1514.597653034998
episode: 10 training return: -1046.5337106286531
episode: 11 training return: -1025.2617937677019
epoch: 3 test_true_pfm: -36.14464069688089 sim_pfm: -968.5478236489774
episode: 12 training return: -1014.6671493645737
episode: 13 training return: -1023.4007145631452
episode: 14 training return: -1082.3280952115563
episode: 15 training return: -1036.5827380434785
epoch: 4 test_true_pfm: 354.9573161001841 sim_pfm: -902.4547339342438
episode: 16 training return: -1024.6261003659672
episode: 17 training return: -1058.0742534243166
episode: 18 training return: -1046.9854370011874
episode: 19 training return: -1589.8477476095802
epoch: 5 test_true_pfm: -36.59808799959987 sim_pfm: -1626.180548062819
episode: 20 training return: -1282.8540409434347
episode: 21 training return: -1468.3598978460473
episode: 22 training return: -982.5811587494894
episode: 23 training return: -983.3676767349217
epoch: 6 test_true_pfm: -12.894801938028673 sim_pfm: -985.5272230032898
episode: 24 training return: -1062.1242441918916
episode: 25 training return: -1205.6668968347797
episode: 26 training return: -1070.830295943549
episode: 27 training return: -1044.0003367924255
epoch: 7 test_true_pfm: 115.84004887522978 sim_pfm: -983.5644760871047
episode: 28 training return: -990.1804536959514
episode: 29 training return: -1015.8566423927442
episode: 30 training return: -1036.0241700963788
episode: 31 training return: -984.1358861234393
epoch: 8 test_true_pfm: -139.19128037555768 sim_pfm: -1095.200964886747
episode: 32 training return: -990.9311897153632
episode: 33 training return: -992.2858141731856
episode: 34 training return: -1095.1264466236446
episode: 35 training return: -983.2700865692747
epoch: 9 test_true_pfm: -63.293618740078394 sim_pfm: -943.2123703743223
episode: 36 training return: -988.1371562213584
episode: 37 training return: -995.2014517893035
episode: 38 training return: -1041.6424451819703
episode: 39 training return: -1080.4843653555897
epoch: 10 test_true_pfm: 6.624500136622236 sim_pfm: -964.9221223461994
episode: 40 training return: -1099.0486498954526
episode: 41 training return: -1163.7671202915526
episode: 42 training return: -1030.3424940574316
episode: 43 training return: -1102.8608725996173
epoch: 11 test_true_pfm: -164.1737055155832 sim_pfm: -1095.0784196807563
episode: 44 training return: -1056.664452041386
episode: 45 training return: -1272.8217000092961
episode: 46 training return: -1477.3703857768332
episode: 47 training return: -1602.5252778022254
epoch: 12 test_true_pfm: -183.9957072384108 sim_pfm: -1639.2014001831092
episode: 48 training return: -1326.3102740670608
episode: 49 training return: -1381.580907547114
episode: 50 training return: -1200.0245139589051
episode: 51 training return: -2098.3297380048803
epoch: 13 test_true_pfm: -80.4123327176621 sim_pfm: -994.8797379985612
episode: 52 training return: -1017.5981805671645
episode: 53 training return: -1372.9204229629004
episode: 54 training return: -1313.3460599040466
episode: 55 training return: -1263.8268488750982
epoch: 14 test_true_pfm: -23.64408650942833 sim_pfm: -1343.9645743048445
episode: 56 training return: -1610.7254082632496
episode: 57 training return: -1051.7087607120766
episode: 58 training return: -1057.1072478748883
episode: 59 training return: -1042.2540396998759
epoch: 15 test_true_pfm: -69.43214350009781 sim_pfm: -1101.15547097565
episode: 60 training return: -1190.2384472427543
episode: 61 training return: -1667.2533535206594
episode: 62 training return: -1170.6659050641015
episode: 63 training return: -1146.876136360975
epoch: 16 test_true_pfm: 287.3249933848626 sim_pfm: -969.8640978300482
episode: 64 training return: -1072.0611093535103
episode: 65 training return: -1059.6903475405966
episode: 66 training return: -1171.2579456829878
episode: 67 training return: -1109.7834269945213
epoch: 17 test_true_pfm: -85.13398431323155 sim_pfm: -960.6114119549408
episode: 68 training return: -1042.67736621592
episode: 69 training return: -1077.821028144959
episode: 70 training return: -1040.3864886620206
episode: 71 training return: -1058.058037235608
epoch: 18 test_true_pfm: -63.66910565672737 sim_pfm: -956.0439491178731
episode: 72 training return: -1042.3663269048366
episode: 73 training return: -1047.2469918058546
episode: 74 training return: -1044.8081580219337
episode: 75 training return: -1040.3399568951563
epoch: 19 test_true_pfm: -48.52981693444764 sim_pfm: -943.4112283687854
episode: 76 training return: -1033.822044984292
episode: 77 training return: -1024.1502829146636
episode: 78 training return: -1020.6921530639897
episode: 79 training return: -1034.8280824734397
epoch: 20 test_true_pfm: 452.0525867355832 sim_pfm: -940.1300035618141
episode: 80 training return: -1027.7412798172124
episode: 81 training return: -1033.6900825146956
episode: 82 training return: -1028.0736230381708
episode: 83 training return: -1026.214700174905
epoch: 21 test_true_pfm: 389.4789991539971 sim_pfm: -952.0064290525355
episode: 84 training return: -1025.406856692074
episode: 85 training return: -1020.5599810881117
episode: 86 training return: -1033.2448813985698
episode: 87 training return: -1031.6340454049418
epoch: 22 test_true_pfm: 393.2333324643266 sim_pfm: -901.0387079856628
episode: 88 training return: -1052.8543302395308
episode: 89 training return: -1050.2101453346204
episode: 90 training return: -1030.6746888035461
episode: 91 training return: -1020.4554922323741
epoch: 23 test_true_pfm: -8.315064018057392 sim_pfm: -954.0186652569058
episode: 92 training return: -1023.6393241746052
episode: 93 training return: -1026.7076149319778
episode: 94 training return: -1025.9015737389486
episode: 95 training return: -1020.7725700886865
epoch: 24 test_true_pfm: 378.9628235937662 sim_pfm: -956.8222251538156
episode: 96 training return: -1028.8717183917115
episode: 97 training return: -1019.7766023330627
episode: 98 training return: -1031.1052922591555
episode: 99 training return: -1023.1807668608303
epoch: 25 test_true_pfm: 260.6656862554403 sim_pfm: -961.0309653005023
episode: 100 training return: -978.0423369713833
episode: 101 training return: -1022.8755583048188
episode: 102 training return: -1029.5040141360682
episode: 103 training return: -1024.1782410156643
epoch: 26 test_true_pfm: 280.97103641742854 sim_pfm: -964.8367719858712
episode: 104 training return: -1044.2589912699564
episode: 105 training return: -1015.257949940285
episode: 106 training return: -1019.6049968752087
episode: 107 training return: -1019.2783594814803
epoch: 27 test_true_pfm: 92.43586085360322 sim_pfm: -958.5482259628498
episode: 108 training return: -1030.7431273085635
episode: 109 training return: -1019.7539185346885
episode: 110 training return: -1022.6302136284379
episode: 111 training return: -1026.8742350970515
epoch: 28 test_true_pfm: 345.9968808387486 sim_pfm: -903.2298494280313
episode: 112 training return: -1039.3080591557023
episode: 113 training return: -1005.5096179172109
episode: 114 training return: -973.8468139057467
episode: 115 training return: -1024.93804409851
epoch: 29 test_true_pfm: 128.89204020520015 sim_pfm: -958.9404004575659
episode: 116 training return: -1009.317835542313
episode: 117 training return: -1023.0910021816485
episode: 118 training return: -1034.4073596017938
episode: 119 training return: -1007.4327384102575
epoch: 30 test_true_pfm: 52.52920482352348 sim_pfm: -958.4573383872779
episode: 120 training return: -1026.611362514884
episode: 121 training return: -1029.7474275750253
episode: 122 training return: -999.8693621151541
episode: 123 training return: -1026.5676331392863
epoch: 31 test_true_pfm: 350.6247078945555 sim_pfm: -962.1377949973029
episode: 124 training return: -1013.742372228816
episode: 125 training return: -1023.58887945991
episode: 126 training return: -1020.2447069629827
episode: 127 training return: -1022.3333415138826
epoch: 32 test_true_pfm: 26.49098166600864 sim_pfm: -957.1171815785187
episode: 128 training return: -1025.5255109415962
episode: 129 training return: -1021.6717145991222
episode: 130 training return: -1011.4293864743648
episode: 131 training return: -1018.2361821209355
epoch: 33 test_true_pfm: 33.15641089192321 sim_pfm: -958.7436194991536
episode: 132 training return: -1023.1680077844644
episode: 133 training return: -1023.8875384605001
episode: 134 training return: -1017.4602374982613
episode: 135 training return: -1024.4165370470894
epoch: 34 test_true_pfm: 9.646674928791775 sim_pfm: -957.6439247566259
episode: 136 training return: -1020.4303877908063
episode: 137 training return: -989.1386394984322
episode: 138 training return: -989.8217156086167
episode: 139 training return: -979.7023418500868
epoch: 35 test_true_pfm: 18.105289992434138 sim_pfm: -959.9412359867251
episode: 140 training return: -1014.4458856295771
episode: 141 training return: -1022.798900050068
episode: 142 training return: -1016.9501848217885
episode: 143 training return: -1020.539988339068
epoch: 36 test_true_pfm: 18.344598024567542 sim_pfm: -958.1407754548259
episode: 144 training return: -1011.0499331264353
episode: 145 training return: -1007.5747310470848
episode: 146 training return: -1023.7657373524333
episode: 147 training return: -993.149749198548
epoch: 37 test_true_pfm: 14.563706788674034 sim_pfm: -958.5772089281821
episode: 148 training return: -970.5401312538941
episode: 149 training return: -1017.9117280747539
episode: 150 training return: -993.0618039821654
episode: 151 training return: -1012.1932834656769
epoch: 38 test_true_pfm: 16.797868882406462 sim_pfm: -958.3501095916686
episode: 152 training return: -1020.2155034932124
episode: 153 training return: -973.6625730263504
episode: 154 training return: -1011.205924490492
episode: 155 training return: -1010.0545802498872
epoch: 39 test_true_pfm: 16.789872772412025 sim_pfm: -957.1691428959647
episode: 156 training return: -1020.9480919968477
episode: 157 training return: -1013.6974326209908
episode: 158 training return: -1003.5201410406603
episode: 159 training return: -976.4360207095702
epoch: 40 test_true_pfm: 6.4129998073229535 sim_pfm: -958.5600472942547
episode: 160 training return: -1025.9395031014171
episode: 161 training return: -978.295056889647
episode: 162 training return: -1026.6228205938307
episode: 163 training return: -1028.467005490727
epoch: 41 test_true_pfm: 22.67221793432204 sim_pfm: -957.3111338763753
episode: 164 training return: -1017.9586970117102
episode: 165 training return: -1002.30511131641
episode: 166 training return: -995.2215926689229
episode: 167 training return: -1000.7966268507045
epoch: 42 test_true_pfm: -0.4860471142738068 sim_pfm: -957.5497857069992
episode: 168 training return: -1016.188305112278
episode: 169 training return: -1023.2507206513411
episode: 170 training return: -1014.5396625160132
episode: 171 training return: -1014.5397050866444
epoch: 43 test_true_pfm: 11.576281569260352 sim_pfm: -958.845694972104
episode: 172 training return: -1030.6434780397867
episode: 173 training return: -1022.9173744058648
episode: 174 training return: -1011.944496968145
episode: 175 training return: -991.351153314486
epoch: 44 test_true_pfm: -4.978815088267303 sim_pfm: -957.100917207169
episode: 176 training return: -1017.0700961782396
episode: 177 training return: -1026.3151069572284
episode: 178 training return: -1021.6964782182652
episode: 179 training return: -1024.7929430771696
epoch: 45 test_true_pfm: 23.576682628999137 sim_pfm: -957.5564404233188
episode: 180 training return: -1020.138433209302
episode: 181 training return: -994.0926669234362
episode: 182 training return: -1020.484680648855
episode: 183 training return: -1020.3598265926444
epoch: 46 test_true_pfm: 32.84560623631288 sim_pfm: -957.0737683534168
episode: 184 training return: -1013.8070350434947
episode: 185 training return: -1013.6028949125026
episode: 186 training return: -1021.2552024885798
episode: 187 training return: -1021.4344478277944
epoch: 47 test_true_pfm: 53.217968167574526 sim_pfm: -956.7245474171685
episode: 188 training return: -976.8207293031505
episode: 189 training return: -1020.8855806651272
episode: 190 training return: -1024.6362720304287
episode: 191 training return: -1011.2776939858634
epoch: 48 test_true_pfm: 15.348326440190988 sim_pfm: -957.8100096163968
episode: 192 training return: -1017.719241322446
episode: 193 training return: -1010.7505547091619
episode: 194 training return: -986.7563775106047
episode: 195 training return: -979.0337363310972
epoch: 49 test_true_pfm: 16.079449981764892 sim_pfm: -956.836328280322
episode: 196 training return: -1014.0444899586632
episode: 197 training return: -1015.7871780424051
episode: 198 training return: -1005.9457297953601
episode: 199 training return: -1012.4845767773635
epoch: 50 test_true_pfm: 26.040956370636845 sim_pfm: -957.2866064371246
episode: 200 training return: -982.1117274331193
episode: 201 training return: -1023.6918932538586
episode: 202 training return: -1012.444477070585
episode: 203 training return: -950.9903528164213
epoch: 51 test_true_pfm: 13.458675125767286 sim_pfm: -957.3060852442696
episode: 204 training return: -1010.309503103605
episode: 205 training return: -1019.3754086881535
episode: 206 training return: -1001.0812092287935
episode: 207 training return: -977.7706777755046
epoch: 52 test_true_pfm: 92.20972558409598 sim_pfm: -956.1483147414295
episode: 208 training return: -992.3338635510829
episode: 209 training return: -972.4622417482339
episode: 210 training return: -1021.0808587961923
episode: 211 training return: -1004.0219223560526
epoch: 53 test_true_pfm: 3.346749190120795 sim_pfm: -957.4977246975149
episode: 212 training return: -992.8001295457522
episode: 213 training return: -1010.2623257337966
episode: 214 training return: -974.0561595340647
episode: 215 training return: -968.9011518697736
epoch: 54 test_true_pfm: 1.7531382314550257 sim_pfm: -957.2397259665271
episode: 216 training return: -1013.1789359993288
episode: 217 training return: -1022.8725254875891
episode: 218 training return: -973.2060707292121
episode: 219 training return: -976.1507287903553
epoch: 55 test_true_pfm: 12.035369213409515 sim_pfm: -957.9828003587733
episode: 220 training return: -995.5123588081974
episode: 221 training return: -1020.450614317067
episode: 222 training return: -1027.5139975710515
episode: 223 training return: -975.0662849261613
epoch: 56 test_true_pfm: 17.84631227916079 sim_pfm: -957.2608813691335
episode: 224 training return: -1027.828948762507
episode: 225 training return: -974.6898137140311
episode: 226 training return: -988.9507773409608
episode: 227 training return: -993.4722034972867
epoch: 57 test_true_pfm: 5.549074407806419 sim_pfm: -957.5882761186425
episode: 228 training return: -1028.0260414884583
episode: 229 training return: -983.1376385529727
episode: 230 training return: -1005.3828576974987
episode: 231 training return: -1024.5045269706245
epoch: 58 test_true_pfm: 37.02639847676775 sim_pfm: -956.8795329243397
episode: 232 training return: -1018.1020735579273
episode: 233 training return: -1006.3885114180304
episode: 234 training return: -989.6715829789368
episode: 235 training return: -1012.9635314153777
epoch: 59 test_true_pfm: 26.20473780594904 sim_pfm: -957.0241020160225
episode: 236 training return: -987.6470783752246
episode: 237 training return: -1024.9367457046362
episode: 238 training return: -969.0692747853163
episode: 239 training return: -963.5302236333114
epoch: 60 test_true_pfm: 12.83100926346975 sim_pfm: -957.5887793050615
episode: 240 training return: -1018.1410851822051
episode: 241 training return: -976.903809220458
episode: 242 training return: -1019.2593424788746
episode: 243 training return: -992.9126519088142
epoch: 61 test_true_pfm: 23.20858054921507 sim_pfm: -956.7642946482027
episode: 244 training return: -986.212660852406
episode: 245 training return: -1020.5084981961329
episode: 246 training return: -1014.1538280468257
episode: 247 training return: -979.0151868718875
epoch: 62 test_true_pfm: 14.511956698485726 sim_pfm: -957.90808212929
episode: 248 training return: -992.0082228980605
episode: 249 training return: -1028.4475011004597
episode: 250 training return: -976.5280408832017
episode: 251 training return: -1000.9523003134337
epoch: 63 test_true_pfm: 293.06844272588177 sim_pfm: -955.3178638214391
episode: 252 training return: -979.3357844290326
episode: 253 training return: -1016.3506401352954
episode: 254 training return: -1014.4488572012344
episode: 255 training return: -1020.3019442423724
epoch: 64 test_true_pfm: 25.389375539388624 sim_pfm: -957.9283669721545
episode: 256 training return: -1019.2248329732415
episode: 257 training return: -1012.2633531646488
episode: 258 training return: -1011.5122359650671
episode: 259 training return: -995.2774638048899
epoch: 65 test_true_pfm: 28.505498447492187 sim_pfm: -957.3260735345735
episode: 260 training return: -997.0834041417837
episode: 261 training return: -974.4891723122826
episode: 262 training return: -1012.6884912433954
episode: 263 training return: -1007.2796972270568
epoch: 66 test_true_pfm: 9.816571984214274 sim_pfm: -956.9812679689634
episode: 264 training return: -997.4781404821366
episode: 265 training return: -996.2486378363233
episode: 266 training return: -1029.8106616264388
episode: 267 training return: -1015.8162884830714
epoch: 67 test_true_pfm: 7.06693441875403 sim_pfm: -957.0337693757497
episode: 268 training return: -969.7991104560302
episode: 269 training return: -953.4521765197148
episode: 270 training return: -1011.7659790315853
episode: 271 training return: -1019.7692217222071
epoch: 68 test_true_pfm: 11.122280234678414 sim_pfm: -957.9093830474044
episode: 272 training return: -1014.8200880033747
episode: 273 training return: -1012.113398928875
episode: 274 training return: -1014.2706776222903
episode: 275 training return: -1025.3833069885804
epoch: 69 test_true_pfm: 26.595892409012762 sim_pfm: -957.4106985448161
episode: 276 training return: -1011.6441252882994
episode: 277 training return: -1021.7643388178876
episode: 278 training return: -1016.262266317482
episode: 279 training return: -961.2522222984901
epoch: 70 test_true_pfm: -8.421296641856216 sim_pfm: -956.8293296361576
episode: 280 training return: -1017.0205338339049
episode: 281 training return: -976.6602181380069
episode: 282 training return: -983.5193853318267
episode: 283 training return: -1020.7310378847054
epoch: 71 test_true_pfm: 21.199860423886506 sim_pfm: -957.0495225949222
episode: 284 training return: -991.0293563336946
episode: 285 training return: -1005.7039257888692
episode: 286 training return: -1021.0629482913445
episode: 287 training return: -1013.7590731768418
epoch: 72 test_true_pfm: 35.808965140128 sim_pfm: -955.6566083672187
episode: 288 training return: -976.4768670811419
episode: 289 training return: -1016.6500694228804
episode: 290 training return: -1006.6653819602135
episode: 291 training return: -1015.1925816419313
epoch: 73 test_true_pfm: 33.5311726751361 sim_pfm: -956.5950962730752
episode: 292 training return: -1008.1909087628612
episode: 293 training return: -1014.3213601493459
episode: 294 training return: -1022.5375327550493
episode: 295 training return: -1014.1173706418734
epoch: 74 test_true_pfm: -8.663529184304977 sim_pfm: -956.4203967624711
episode: 296 training return: -1022.4019773921256
episode: 297 training return: -1014.5386849421325
episode: 298 training return: -1021.8903556370599
episode: 299 training return: -1024.954342264217
epoch: 75 test_true_pfm: 273.5055211616605 sim_pfm: -971.2626678984943
episode: 300 training return: -1021.6570489082192
episode: 301 training return: -985.1081173678863
episode: 302 training return: -982.4802913481084
episode: 303 training return: -987.1111989288501
epoch: 76 test_true_pfm: 5.1833535563015785 sim_pfm: -957.2372120425904
episode: 304 training return: -1024.9629455687514
episode: 305 training return: -1017.370348777016
episode: 306 training return: -983.3411967758302
episode: 307 training return: -973.5304726710132
epoch: 77 test_true_pfm: -19.026485855247703 sim_pfm: -957.3060497816601
episode: 308 training return: -1021.645319453551
episode: 309 training return: -1013.2176269577624
episode: 310 training return: -1017.3189213799726
episode: 311 training return: -1013.8307770477398
epoch: 78 test_true_pfm: 12.95552214650389 sim_pfm: -957.0439079089607
episode: 312 training return: -1017.4502429437376
episode: 313 training return: -1013.8208526153346
episode: 314 training return: -970.4218449104992
episode: 315 training return: -1007.3674603534707
epoch: 79 test_true_pfm: -1.2583054715467694 sim_pfm: -957.4833487240621
episode: 316 training return: -984.3899987095558
episode: 317 training return: -976.5385600476509
episode: 318 training return: -1017.1397800022083
episode: 319 training return: -982.8607893033376
epoch: 80 test_true_pfm: 43.44452862784766 sim_pfm: -957.2507419790431
episode: 320 training return: -1023.00619724571
episode: 321 training return: -1024.5039460283476
episode: 322 training return: -977.2735284737493
episode: 323 training return: -1004.3164933195442
epoch: 81 test_true_pfm: 4.136785782695495 sim_pfm: -956.7211123029252
episode: 324 training return: -1022.7205425475029
episode: 325 training return: -1009.4475733408949
episode: 326 training return: -1030.0260238017415
episode: 327 training return: -1014.3852631407543
epoch: 82 test_true_pfm: 27.576884019278253 sim_pfm: -956.595906657947
episode: 328 training return: -1015.4059090419639
episode: 329 training return: -1015.5847633111232
episode: 330 training return: -1029.0458161097606
episode: 331 training return: -991.7266355740661
epoch: 83 test_true_pfm: 29.984481801529384 sim_pfm: -955.90636128579
episode: 332 training return: -959.0506538714392
episode: 333 training return: -1011.2316735824676
episode: 334 training return: -1008.1249520786719
episode: 335 training return: -1028.9587701205992
epoch: 84 test_true_pfm: 288.39032624729805 sim_pfm: -950.6109966927106
episode: 336 training return: -1009.6976691243322
episode: 337 training return: -1022.257371308437
episode: 338 training return: -1019.53493651236
episode: 339 training return: -1005.7059367629652
epoch: 85 test_true_pfm: 143.04492023876603 sim_pfm: -944.9951015366681
episode: 340 training return: -1014.1181476291687
episode: 341 training return: -983.1821482663344
episode: 342 training return: -981.6453675790715
episode: 343 training return: -1014.5680918462308
epoch: 86 test_true_pfm: 4.362470415589354 sim_pfm: -957.4889951742692
episode: 344 training return: -989.612836619019
episode: 345 training return: -1009.8416385878494
episode: 346 training return: -1003.6126392846076
episode: 347 training return: -1018.8467935711773
epoch: 87 test_true_pfm: -5.691313408011251 sim_pfm: -959.032422319622
episode: 348 training return: -1000.8360282677977
episode: 349 training return: -991.4934113632221
episode: 350 training return: -1019.2897244269708
episode: 351 training return: -1014.8128887556292
epoch: 88 test_true_pfm: 13.736419428533472 sim_pfm: -956.8453513019955
episode: 352 training return: -975.9307882744775
episode: 353 training return: -1018.8109428699814
episode: 354 training return: -1017.6193887778923
episode: 355 training return: -986.5510059742097
epoch: 89 test_true_pfm: -2.7344299685179183 sim_pfm: -956.7915086309422
episode: 356 training return: -977.3148591893489
episode: 357 training return: -1013.5631857536462
episode: 358 training return: -997.6118162271925
episode: 359 training return: -976.1750962803507
epoch: 90 test_true_pfm: -0.25830400780394785 sim_pfm: -957.8265080125675
episode: 360 training return: -994.801765056383
episode: 361 training return: -1003.819258055725
episode: 362 training return: -1015.6554100327637
episode: 363 training return: -1015.6117098953288
epoch: 91 test_true_pfm: -0.2742281620397648 sim_pfm: -956.7193146449195
episode: 364 training return: -979.5625568471514
episode: 365 training return: -1007.2548867401849
episode: 366 training return: -970.1934827693112
episode: 367 training return: -1024.0842836307645
epoch: 92 test_true_pfm: 16.119393516246973 sim_pfm: -956.7356345426375
episode: 368 training return: -1022.6653037177806
episode: 369 training return: -1010.5234323075407
episode: 370 training return: -963.6309918376136
episode: 371 training return: -1009.1159764026024
epoch: 93 test_true_pfm: 183.4540211761729 sim_pfm: -966.7255681178732
episode: 372 training return: -968.6447714391755
episode: 373 training return: -995.7143480129951
episode: 374 training return: -963.0093067720087
episode: 375 training return: -1015.8064533104276
epoch: 94 test_true_pfm: 174.96004805982307 sim_pfm: -954.9007873122438
episode: 376 training return: -1018.1362937937374
episode: 377 training return: -993.6668038581232
episode: 378 training return: -1021.3139519037062
episode: 379 training return: -1009.7443979614671
epoch: 95 test_true_pfm: -11.28047894287654 sim_pfm: -957.2804284669722
episode: 380 training return: -943.1493123577384
episode: 381 training return: -1014.7222789696458
episode: 382 training return: -1012.7433522637405
episode: 383 training return: -1018.3540770421657
epoch: 96 test_true_pfm: 10.893733750945506 sim_pfm: -956.930793909135
episode: 384 training return: -955.8707661534418
episode: 385 training return: -1018.9438755148761
episode: 386 training return: -967.8792935461
episode: 387 training return: -1011.6862782701119
epoch: 97 test_true_pfm: -7.15895821697902 sim_pfm: -956.6929229997758
episode: 388 training return: -1022.1694167252946
episode: 389 training return: -968.6203614982243
episode: 390 training return: -953.1424379003317
episode: 391 training return: -953.8078077963196
epoch: 98 test_true_pfm: -0.2452582210857911 sim_pfm: -956.6782800062559
episode: 392 training return: -1014.9484765233257
episode: 393 training return: -974.3308056112645
episode: 394 training return: -980.8812421534058
episode: 395 training return: -1005.0916147159212
epoch: 99 test_true_pfm: -8.384900838432348 sim_pfm: -956.6087281256168
episode: 396 training return: -961.4976827305914
episode: 397 training return: -963.7260634108997
episode: 398 training return: -962.8881932447604
episode: 399 training return: -966.4212498566935
epoch: 100 test_true_pfm: -10.887609807559258 sim_pfm: -957.293108873786
episode: 400 training return: -1017.1033196706616
episode: 401 training return: -1015.6298018478396
episode: 402 training return: -1017.2483133672425
episode: 403 training return: -980.4998219021844
epoch: 101 test_true_pfm: -4.48467072167938 sim_pfm: -957.192255167693
episode: 404 training return: -977.5902236027486
episode: 405 training return: -956.2607208972892
episode: 406 training return: -1030.0047960361453
episode: 407 training return: -1027.9015788349586
epoch: 102 test_true_pfm: -14.036742114117066 sim_pfm: -956.6980830096041
episode: 408 training return: -980.3423377598579
episode: 409 training return: -1018.9240416295169
episode: 410 training return: -995.5156567837853
episode: 411 training return: -1016.623037373026
epoch: 103 test_true_pfm: 9.350123374826715 sim_pfm: -956.699451073453
episode: 412 training return: -969.0082171104855
episode: 413 training return: -1008.7519061788154
episode: 414 training return: -986.943339210981
episode: 415 training return: -979.938406138333
epoch: 104 test_true_pfm: -2.3567957617716844 sim_pfm: -957.7288367560617
episode: 416 training return: -1028.408919693122
episode: 417 training return: -966.85931624752
episode: 418 training return: -950.6011054584474
episode: 419 training return: -980.9505267256983
epoch: 105 test_true_pfm: -0.5959221599628566 sim_pfm: -957.9643480271637
episode: 420 training return: -1008.1708465885803
episode: 421 training return: -1007.8620192156257
episode: 422 training return: -979.2861267283793
episode: 423 training return: -1028.1695300239662
epoch: 106 test_true_pfm: 9.07174540508173 sim_pfm: -956.8162031254184
episode: 424 training return: -987.7992315780167
episode: 425 training return: -971.1833327115008
episode: 426 training return: -1037.8485867535094
episode: 427 training return: -1013.1337551160375
epoch: 107 test_true_pfm: -1.5665888550788474 sim_pfm: -957.1563799269483
episode: 428 training return: -966.6628556270846
episode: 429 training return: -979.1885066875299
episode: 430 training return: -1019.2957167360344
episode: 431 training return: -1023.2184945673679
epoch: 108 test_true_pfm: 3.53321541642666 sim_pfm: -957.09776669275
episode: 432 training return: -979.8844934957627
episode: 433 training return: -973.2820941099181
episode: 434 training return: -979.7320287561375
episode: 435 training return: -1011.0125921584388
epoch: 109 test_true_pfm: 28.952414981435165 sim_pfm: -957.0146079039495
episode: 436 training return: -1023.6757764034055
episode: 437 training return: -983.1499224231068
episode: 438 training return: -959.7744023219323
episode: 439 training return: -985.8697012003173
epoch: 110 test_true_pfm: -1.1573880888834547 sim_pfm: -957.4797832277201
episode: 440 training return: -1014.8369811292796
episode: 441 training return: -992.9028105566331
episode: 442 training return: -1019.9990851184193
episode: 443 training return: -983.2423502665108
epoch: 111 test_true_pfm: 5.072566756449361 sim_pfm: -957.4352460987303
episode: 444 training return: -1006.0080142177395
episode: 445 training return: -952.4804095164219
episode: 446 training return: -962.2962126021796
episode: 447 training return: -971.4745005361599
epoch: 112 test_true_pfm: 1.305065814347988 sim_pfm: -956.6176975122472
episode: 448 training return: -1016.3627383272054
episode: 449 training return: -999.0608180601131
episode: 450 training return: -974.1997767434355
episode: 451 training return: -1011.3459516259531
epoch: 113 test_true_pfm: -10.945003879802323 sim_pfm: -958.2024139221445
episode: 452 training return: -965.4474811573717
episode: 453 training return: -963.3145704486378
episode: 454 training return: -978.5987304002554
episode: 455 training return: -1009.6317457332292
epoch: 114 test_true_pfm: -18.751905711719726 sim_pfm: -958.775863898236
episode: 456 training return: -1016.774658866691
episode: 457 training return: -1019.2027007218812
episode: 458 training return: -966.1271433386543
episode: 459 training return: -969.4687120874418
epoch: 115 test_true_pfm: 15.625832235524959 sim_pfm: -956.3536481788805
episode: 460 training return: -966.7425679159414
episode: 461 training return: -955.6768128303132
episode: 462 training return: -970.2208577513735
episode: 463 training return: -1011.8588980047367
epoch: 116 test_true_pfm: 138.09231019972674 sim_pfm: -942.4155279342417
episode: 464 training return: -1012.7873690498376
episode: 465 training return: -958.4494082392886
episode: 466 training return: -1021.5755030912193
episode: 467 training return: -1019.0163701046254
epoch: 117 test_true_pfm: -27.725835253032738 sim_pfm: -960.3215593873634
episode: 468 training return: -974.8227039758416
episode: 469 training return: -986.4130017533698
episode: 470 training return: -985.6414313917802
episode: 471 training return: -1004.9386460718213
epoch: 118 test_true_pfm: 169.0366052265708 sim_pfm: -946.8324522729063
episode: 472 training return: -965.8318160277735
episode: 473 training return: -955.41241820368
episode: 474 training return: -970.9834348735138
episode: 475 training return: -1020.5859846277481
epoch: 119 test_true_pfm: 1.7816827906302148 sim_pfm: -957.8860182822215
episode: 476 training return: -1021.1317651273471
episode: 477 training return: -1011.1885540632649
episode: 478 training return: -1014.5981355621311
episode: 479 training return: -1004.683688470647
epoch: 120 test_true_pfm: -11.383556538673849 sim_pfm: -957.8703805802053
episode: 480 training return: -1020.2284041412549
episode: 481 training return: -971.5845331056842
episode: 482 training return: -1012.7992398800255
episode: 483 training return: -1020.4938938622357
epoch: 121 test_true_pfm: -7.465504182610999 sim_pfm: -957.6176427590563
episode: 484 training return: -966.2411871200374
episode: 485 training return: -1016.1557139880758
episode: 486 training return: -1018.1542758405749
episode: 487 training return: -982.5353836887812
epoch: 122 test_true_pfm: -15.998884393119285 sim_pfm: -956.4044793194379
episode: 488 training return: -953.1396609337569
episode: 489 training return: -981.7692737485925
episode: 490 training return: -1012.2012574627006
episode: 491 training return: -963.5781220428595
epoch: 123 test_true_pfm: 1.4957382931714687 sim_pfm: -957.6071329231289
episode: 492 training return: -966.6684514978996
episode: 493 training return: -980.5953786232282
episode: 494 training return: -1026.7967512091438
episode: 495 training return: -1038.0949876106722
epoch: 124 test_true_pfm: 183.13612213850115 sim_pfm: -947.9743256501557
episode: 496 training return: -975.7200771027137
episode: 497 training return: -1012.4162697358536
episode: 498 training return: -1017.8783424583349
episode: 499 training return: -972.2552489372687
epoch: 125 test_true_pfm: 218.3400340422438 sim_pfm: -935.4675080699785
episode: 500 training return: -1016.6565229364236
episode: 501 training return: -987.3720328707687
episode: 502 training return: -1010.0348983603558
episode: 503 training return: -1015.5725181231123
epoch: 126 test_true_pfm: -3.7197155927075567 sim_pfm: -956.5805485528317
episode: 504 training return: -1013.6918189908251
episode: 505 training return: -1022.085734314984
episode: 506 training return: -1012.9712466054775
episode: 507 training return: -969.7611866826064
epoch: 127 test_true_pfm: 4.016026304730725 sim_pfm: -957.5211516170642
episode: 508 training return: -967.4961640795299
episode: 509 training return: -984.9192277938465
episode: 510 training return: -1019.0380335797978
episode: 511 training return: -964.3126708377547
epoch: 128 test_true_pfm: 46.38147081415704 sim_pfm: -959.4099909497114
episode: 512 training return: -981.5588762220393
episode: 513 training return: -957.7367765073997
episode: 514 training return: -1029.1812680355736
episode: 515 training return: -973.7626135847231
epoch: 129 test_true_pfm: 1.0030057711721687 sim_pfm: -956.6123941835896
episode: 516 training return: -964.6982575102124
episode: 517 training return: -1013.4507940839284
episode: 518 training return: -977.9863011347429
episode: 519 training return: -1015.7723375907907
epoch: 130 test_true_pfm: 1.4008773721341783 sim_pfm: -956.3609352612362
episode: 520 training return: -1015.0896469271994
episode: 521 training return: -950.0493892794884
episode: 522 training return: -1020.71612548251
episode: 523 training return: -1020.1666158022458
epoch: 131 test_true_pfm: -28.568434805965328 sim_pfm: -957.9489795130971
episode: 524 training return: -958.10219042201
episode: 525 training return: -949.7987815447312
episode: 526 training return: -957.4148188480589
episode: 527 training return: -1000.3856084896546
epoch: 132 test_true_pfm: 162.1065446468317 sim_pfm: -931.5153787375156
episode: 528 training return: -970.0735262927585
episode: 529 training return: -1013.2869891863944
episode: 530 training return: -1011.9165232436368
episode: 531 training return: -1012.6578541084314
epoch: 133 test_true_pfm: -5.013976834009541 sim_pfm: -956.360597399302
episode: 532 training return: -1021.3511491457356
episode: 533 training return: -1012.3683329730717
episode: 534 training return: -1022.7401129357996
episode: 535 training return: -991.0088802429594
epoch: 134 test_true_pfm: 118.79417705787989 sim_pfm: -956.5273603679019
episode: 536 training return: -966.048640081826
episode: 537 training return: -1015.2490311491351
episode: 538 training return: -966.3486546550805
episode: 539 training return: -963.5655028440248
epoch: 135 test_true_pfm: 184.79884494857734 sim_pfm: -938.4820870685757
episode: 540 training return: -969.290373106464
episode: 541 training return: -960.9365386858972
episode: 542 training return: -948.998883964794
episode: 543 training return: -1008.5445999380204
epoch: 136 test_true_pfm: 153.72673016384462 sim_pfm: -937.4336346511451
episode: 544 training return: -968.1453331543038
episode: 545 training return: -1015.5256359147504
episode: 546 training return: -959.0995133679808
episode: 547 training return: -1014.5689131668114
epoch: 137 test_true_pfm: 181.57631704751603 sim_pfm: -943.9684478042956
episode: 548 training return: -1017.0337103184687
episode: 549 training return: -959.1060898577499
episode: 550 training return: -986.379609529357
episode: 551 training return: -1022.9682739131422
epoch: 138 test_true_pfm: 137.2731144950559 sim_pfm: -940.501308819477
episode: 552 training return: -965.2215070399081
episode: 553 training return: -1021.0756231697272
episode: 554 training return: -1025.0886483210109
episode: 555 training return: -1020.4241829199182
epoch: 139 test_true_pfm: -7.980799468498805 sim_pfm: -958.6088749824584
episode: 556 training return: -955.4747402800285
episode: 557 training return: -1017.7068151279427
episode: 558 training return: -958.2625923786803
episode: 559 training return: -948.7507468689776
epoch: 140 test_true_pfm: 0.4033728765292115 sim_pfm: -956.1465302318139
episode: 560 training return: -973.7090695627434
episode: 561 training return: -1014.5927985284851
episode: 562 training return: -960.7643022435733
episode: 563 training return: -981.4152809523948
epoch: 141 test_true_pfm: 58.47486115037117 sim_pfm: -954.2413459242254
episode: 564 training return: -1017.7240882637585
episode: 565 training return: -956.6229277127148
episode: 566 training return: -1009.3445913668074
episode: 567 training return: -963.4712164373879
epoch: 142 test_true_pfm: 200.35754696064086 sim_pfm: -938.3132597380135
episode: 568 training return: -962.8669609225881
episode: 569 training return: -960.725442180827
episode: 570 training return: -973.0611976367418
episode: 571 training return: -970.7246577374457
epoch: 143 test_true_pfm: 108.68374316063762 sim_pfm: -946.836388527463
episode: 572 training return: -959.1498368039764
episode: 573 training return: -967.2920520401589
episode: 574 training return: -959.9038014138772
episode: 575 training return: -1016.3451019284587
epoch: 144 test_true_pfm: -4.271546220430299 sim_pfm: -956.4508131895727
episode: 576 training return: -967.6319141808781
episode: 577 training return: -991.3733105912174
episode: 578 training return: -963.8811487890322
episode: 579 training return: -963.3346861855352
epoch: 145 test_true_pfm: -2.2801971974955464 sim_pfm: -957.009827153759
episode: 580 training return: -980.2084235222684
episode: 581 training return: -982.3053400599056
episode: 582 training return: -959.7841844220573
episode: 583 training return: -960.3333804812552
epoch: 146 test_true_pfm: 147.24238301432018 sim_pfm: -948.4065389107674
episode: 584 training return: -1021.78342159431
episode: 585 training return: -961.1677533733377
episode: 586 training return: -972.2741203965203
episode: 587 training return: -970.2689580811555
epoch: 147 test_true_pfm: 149.246530621964 sim_pfm: -935.4588899835607
episode: 588 training return: -956.9211682117
episode: 589 training return: -977.7298630636112
episode: 590 training return: -1015.2070235630293
episode: 591 training return: -1009.3488080737195
epoch: 148 test_true_pfm: 143.88133616035137 sim_pfm: -955.1579452793352
episode: 592 training return: -975.6181207608744
episode: 593 training return: -964.0968382989921
episode: 594 training return: -1017.840991054851
episode: 595 training return: -968.0202638144009
epoch: 149 test_true_pfm: 177.2032549206759 sim_pfm: -939.6523114928588
episode: 596 training return: -975.9226478827183
episode: 597 training return: -973.4349078956349
episode: 598 training return: -954.5586980525437
episode: 599 training return: -1019.47937122918
epoch: 150 test_true_pfm: 226.71748879669948 sim_pfm: -942.5572474832946
