epoch: 0 training_loss 0.21443555474281312 test_loss: 0.19616440534591675
epoch: 1 training_loss 0.19756248608231544 test_loss: 0.16969070434570313
epoch: 2 training_loss 0.1827977643162012 test_loss: 0.20081474781036376
epoch: 3 training_loss 0.19936486817896365 test_loss: 0.21688382625579833
epoch: 4 training_loss 0.187185470610857 test_loss: 0.21543195247650146
epoch: 5 training_loss 0.19362892359495162 test_loss: 0.15850530862808226
epoch: 6 training_loss 0.18302621871232985 test_loss: 0.19834647178649903
epoch: 7 training_loss 0.19437634080648422 test_loss: 0.22176458835601806
epoch: 8 training_loss 0.1828568246215582 test_loss: 0.19846664667129515
epoch: 9 training_loss 0.1915593272447586 test_loss: 0.18834232091903685
epoch: 10 training_loss 0.1910945212841034 test_loss: 0.23035712242126466
epoch: 11 training_loss 0.18838697575032712 test_loss: 0.17566635608673095
epoch: 12 training_loss 0.1848930626362562 test_loss: 0.18890877962112426
epoch: 13 training_loss 0.193515864610672 test_loss: 0.20231692790985106
epoch: 14 training_loss 0.19892557755112647 test_loss: 0.16572219133377075
epoch: 15 training_loss 0.18129715360701085 test_loss: 0.1577388048171997
epoch: 16 training_loss 0.1795859932154417 test_loss: 0.20006158351898193
epoch: 17 training_loss 0.1874752275645733 test_loss: 0.19412049055099487
epoch: 18 training_loss 0.18123937919735908 test_loss: 0.1854739785194397
epoch: 19 training_loss 0.1779506880044937 test_loss: 0.17846962213516235
epoch: 20 training_loss 0.19109573394060134 test_loss: 0.1839011311531067
epoch: 21 training_loss 0.19175549305975437 test_loss: 0.2091052532196045
epoch: 22 training_loss 0.18228768207132817 test_loss: 0.1831250309944153
epoch: 23 training_loss 0.18570376358926297 test_loss: 0.15860466957092284
epoch: 24 training_loss 0.18406401596963406 test_loss: 0.22280662059783934
epoch: 25 training_loss 0.1987072940915823 test_loss: 0.16451836824417115
epoch: 26 training_loss 0.19704233944416047 test_loss: 0.19161330461502074
epoch: 27 training_loss 0.18988106615841388 test_loss: 0.2248067855834961
epoch: 28 training_loss 0.1928667351603508 test_loss: 0.2053218126296997
epoch: 29 training_loss 0.18821413159370423 test_loss: 0.19228086471557618
epoch: 30 training_loss 0.19533224426209927 test_loss: 0.1656077027320862
epoch: 31 training_loss 0.19120714351534843 test_loss: 0.173276686668396
epoch: 32 training_loss 0.1799191328883171 test_loss: 0.21337602138519288
epoch: 33 training_loss 0.18015271544456482 test_loss: 0.19216808080673217
epoch: 34 training_loss 0.18536527503281833 test_loss: 0.18008967638015747
epoch: 35 training_loss 0.18427417322993278 test_loss: 0.19720743894577025
epoch: 36 training_loss 0.19018910475075246 test_loss: 0.22420969009399414
epoch: 37 training_loss 0.18271726831793786 test_loss: 0.17784110307693482
epoch: 38 training_loss 0.1888290748745203 test_loss: 0.1998750686645508
epoch: 39 training_loss 0.19412536934018135 test_loss: 0.18741834163665771
epoch: 40 training_loss 0.17875688679516316 test_loss: 0.22676608562469483
epoch: 41 training_loss 0.18937974907457827 test_loss: 0.18929294347763062
epoch: 42 training_loss 0.18897490605711936 test_loss: 0.18839559555053711
epoch: 43 training_loss 0.18325023509562016 test_loss: 0.183530592918396
epoch: 44 training_loss 0.18665307097136974 test_loss: 0.1810448169708252
epoch: 45 training_loss 0.18751911036670207 test_loss: 0.17805771827697753
epoch: 46 training_loss 0.18703136160969736 test_loss: 0.2115417718887329
epoch: 47 training_loss 0.17471170745790005 test_loss: 0.15558961629867554
epoch: 48 training_loss 0.18247659973800182 test_loss: 0.14920382499694823
epoch: 49 training_loss 0.17608505621552467 test_loss: 0.16650092601776123
epoch: 50 training_loss 0.1767132519558072 test_loss: 0.17940771579742432
epoch: 51 training_loss 0.1898531850427389 test_loss: 0.19638386964797974
epoch: 52 training_loss 0.188854421377182 test_loss: 0.18800367116928102
epoch: 53 training_loss 0.1815926843136549 test_loss: 0.18771920204162598
epoch: 54 training_loss 0.19110165402293205 test_loss: 0.17009174823760986
epoch: 55 training_loss 0.1876894987747073 test_loss: 0.18947890996932984
epoch: 56 training_loss 0.18023413255810738 test_loss: 0.1723460555076599
epoch: 57 training_loss 0.19420579321682452 test_loss: 0.19542236328125
epoch: 58 training_loss 0.19728201664984227 test_loss: 0.23640599250793456
epoch: 59 training_loss 0.18161007307469845 test_loss: 0.18226226568222045
epoch: 60 training_loss 0.18336520597338676 test_loss: 0.17564084529876708
epoch: 61 training_loss 0.185111937597394 test_loss: 0.18379335403442382
epoch: 62 training_loss 0.18782187566161157 test_loss: 0.21711089611053466
epoch: 63 training_loss 0.18314411900937558 test_loss: 0.18397212028503418
epoch: 64 training_loss 0.18299499347805978 test_loss: 0.17366257905960084
epoch: 65 training_loss 0.18788105249404907 test_loss: 0.17483205795288087
epoch: 66 training_loss 0.1952930413931608 test_loss: 0.1863385558128357
epoch: 67 training_loss 0.1867982368916273 test_loss: 0.1827643632888794
epoch: 68 training_loss 0.18164144538342952 test_loss: 0.2114612340927124
epoch: 69 training_loss 0.18875401817262172 test_loss: 0.18818938732147217
epoch: 70 training_loss 0.1839865805953741 test_loss: 0.17090004682540894
epoch: 71 training_loss 0.19263681583106518 test_loss: 0.20228466987609864
epoch: 72 training_loss 0.18181118853390216 test_loss: 0.16253421306610108
epoch: 73 training_loss 0.19083058565855027 test_loss: 0.20723340511322022
epoch: 74 training_loss 0.17172956690192223 test_loss: 0.18470392227172852
epoch: 75 training_loss 0.18712121136486531 test_loss: 0.1811589002609253
epoch: 76 training_loss 0.1812071865051985 test_loss: 0.19643301963806153
epoch: 77 training_loss 0.18860685393214227 test_loss: 0.1871282935142517
epoch: 78 training_loss 0.1827678095549345 test_loss: 0.15822944641113282
epoch: 79 training_loss 0.18906184881925583 test_loss: 0.18546102046966553
epoch: 80 training_loss 0.1768390839546919 test_loss: 0.1764780640602112
epoch: 81 training_loss 0.17950051553547383 test_loss: 0.1879740595817566
epoch: 82 training_loss 0.18896296381950378 test_loss: 0.17646989822387696
epoch: 83 training_loss 0.1904055391997099 test_loss: 0.14441304206848143
epoch: 84 training_loss 0.17913931235671043 test_loss: 0.17632874250411987
epoch: 85 training_loss 0.18981821902096271 test_loss: 0.17879073619842528
epoch: 86 training_loss 0.18187810018658637 test_loss: 0.16593146324157715
epoch: 87 training_loss 0.19088140308856963 test_loss: 0.2044079065322876
epoch: 88 training_loss 0.17931024253368377 test_loss: 0.18548979759216308
epoch: 89 training_loss 0.18558935433626175 test_loss: 0.18330905437469483
epoch: 90 training_loss 0.18928224943578242 test_loss: 0.15244381427764891
epoch: 91 training_loss 0.17918825566768645 test_loss: 0.15282773971557617
epoch: 92 training_loss 0.1745825669169426 test_loss: 0.20661649703979493
epoch: 93 training_loss 0.17750323116779326 test_loss: 0.20348784923553467
epoch: 94 training_loss 0.19105517461895943 test_loss: 0.16919974088668824
epoch: 95 training_loss 0.1842426961660385 test_loss: 0.22376656532287598
epoch: 96 training_loss 0.17490901447832585 test_loss: 0.1845882534980774
epoch: 97 training_loss 0.19124928683042527 test_loss: 0.19129406213760375
epoch: 98 training_loss 0.18287476766854524 test_loss: 0.1962018609046936
epoch: 99 training_loss 0.18240482442080974 test_loss: 0.16891993284225465
epoch: 100 training_loss 0.18937273919582367 test_loss: 0.17753369808197023
epoch: 101 training_loss 0.18737137630581857 test_loss: 0.17658798694610595
epoch: 102 training_loss 0.1786340269446373 test_loss: 0.1950933814048767
epoch: 103 training_loss 0.1871107943356037 test_loss: 0.17035672664642335
epoch: 104 training_loss 0.18226587906479835 test_loss: 0.17082045078277588
epoch: 105 training_loss 0.1826964507997036 test_loss: 0.18244290351867676
epoch: 106 training_loss 0.18479576371610165 test_loss: 0.19967217445373536
epoch: 107 training_loss 0.17827541679143905 test_loss: 0.1978463888168335
epoch: 108 training_loss 0.18566586062312126 test_loss: 0.18406816720962524
epoch: 109 training_loss 0.19359218090772629 test_loss: 0.21974120140075684
epoch: 110 training_loss 0.18946822136640548 test_loss: 0.16915228366851806
epoch: 111 training_loss 0.18067407682538034 test_loss: 0.18562315702438353
epoch: 112 training_loss 0.19353072680532932 test_loss: 0.17625385522842407
epoch: 113 training_loss 0.17682686425745486 test_loss: 0.17790476083755494
epoch: 114 training_loss 0.17902117393910885 test_loss: 0.21171369552612304
epoch: 115 training_loss 0.1848753347247839 test_loss: 0.19353387355804444
epoch: 116 training_loss 0.18872837718576194 test_loss: 0.18118879795074463
epoch: 117 training_loss 0.1810786109417677 test_loss: 0.18442608118057252
epoch: 118 training_loss 0.18770776592195035 test_loss: 0.1919390916824341
epoch: 119 training_loss 0.19269022077322007 test_loss: 0.16135382652282715
epoch: 0 training_loss 19.773155689239502 test_loss: 13.235324096679687
epoch: 1 training_loss 10.38863049030304 test_loss: 8.532917022705078
epoch: 2 training_loss 7.38607382774353 test_loss: 6.510719299316406
epoch: 3 training_loss 6.132440528869629 test_loss: 5.701506042480469
epoch: 4 training_loss 5.392343106269837 test_loss: 5.013308334350586
epoch: 5 training_loss 4.8901823616027835 test_loss: 4.484473800659179
epoch: 6 training_loss 4.139214136600494 test_loss: 4.051242828369141
epoch: 7 training_loss 3.7262509846687317 test_loss: 3.407672119140625
epoch: 8 training_loss 3.4678360629081726 test_loss: 3.369190979003906
epoch: 9 training_loss 3.273606233596802 test_loss: 3.119925308227539
epoch: 10 training_loss 3.1194875121116636 test_loss: 3.062381935119629
epoch: 11 training_loss 2.971097693443298 test_loss: 3.0880680084228516
epoch: 12 training_loss 2.886067118644714 test_loss: 2.7662567138671874
epoch: 13 training_loss 2.7256678342819214 test_loss: 2.650956153869629
epoch: 14 training_loss 2.5786776971817016 test_loss: 2.457447814941406
epoch: 15 training_loss 2.47776158452034 test_loss: 2.430547904968262
epoch: 16 training_loss 2.4005785751342774 test_loss: 2.4313650131225586
epoch: 17 training_loss 2.329762592315674 test_loss: 2.2491315841674804
epoch: 18 training_loss 2.3013131117820738 test_loss: 2.38635311126709
epoch: 19 training_loss 2.160010526180267 test_loss: 2.1347530364990233
epoch: 20 training_loss 2.1199863243103025 test_loss: 2.129163932800293
epoch: 21 training_loss 2.054948922395706 test_loss: 2.1156538009643553
epoch: 22 training_loss 2.0171241641044615 test_loss: 2.0204397201538087
epoch: 23 training_loss 1.9871882224082946 test_loss: 1.9011045455932618
epoch: 24 training_loss 1.869666703939438 test_loss: 1.7600540161132812
epoch: 25 training_loss 1.8479421508312226 test_loss: 1.9061269760131836
epoch: 26 training_loss 1.8487054336071014 test_loss: 1.8091066360473633
epoch: 27 training_loss 1.7677756083011626 test_loss: 1.755905532836914
epoch: 28 training_loss 1.743390247821808 test_loss: 1.721170997619629
epoch: 29 training_loss 1.7589665055274963 test_loss: 1.6995252609252929
epoch: 30 training_loss 1.672056521177292 test_loss: 1.596885871887207
epoch: 31 training_loss 1.587131620645523 test_loss: 1.7381492614746095
epoch: 32 training_loss 1.5674148416519165 test_loss: 1.5800457000732422
epoch: 33 training_loss 1.5866696894168855 test_loss: 1.5652581214904786
epoch: 34 training_loss 1.555872641801834 test_loss: 1.5813247680664062
epoch: 35 training_loss 1.6884484279155731 test_loss: 1.6595495223999024
epoch: 36 training_loss 1.5783070194721223 test_loss: 1.6096872329711913
epoch: 37 training_loss 1.467825402021408 test_loss: 1.688010025024414
epoch: 38 training_loss 1.4580929791927337 test_loss: 1.3452831268310548
epoch: 39 training_loss 1.4879915773868562 test_loss: 1.347523021697998
epoch: 40 training_loss 1.442508192062378 test_loss: 1.3319205284118651
epoch: 41 training_loss 1.416160719394684 test_loss: 1.274620819091797
epoch: 42 training_loss 1.3844050443172455 test_loss: 1.2407316207885741
epoch: 43 training_loss 1.3754356694221497 test_loss: 1.3622787475585938
epoch: 44 training_loss 1.2941674590110779 test_loss: 1.2332167625427246
epoch: 45 training_loss 1.3005943977832795 test_loss: 1.446186351776123
epoch: 46 training_loss 1.2643004769086839 test_loss: 1.2066377639770507
epoch: 47 training_loss 1.311897436976433 test_loss: 1.2329809188842773
epoch: 48 training_loss 1.2856962275505066 test_loss: 1.3907007217407226
epoch: 49 training_loss 1.2513073241710664 test_loss: 1.307985782623291
epoch: 50 training_loss 1.2194223898649215 test_loss: 1.315317153930664
epoch: 51 training_loss 1.2151026636362077 test_loss: 1.264280414581299
epoch: 52 training_loss 1.190740264058113 test_loss: 1.2729568481445312
epoch: 53 training_loss 1.2152015072107316 test_loss: 1.3052473068237305
epoch: 54 training_loss 1.1987785518169403 test_loss: 1.0988827705383302
epoch: 55 training_loss 1.1211058962345124 test_loss: 1.1530156135559082
epoch: 56 training_loss 1.1510508716106416 test_loss: 1.0701858520507812
epoch: 57 training_loss 1.1239764004945756 test_loss: 1.0738535881042481
epoch: 58 training_loss 1.1360698008537293 test_loss: 1.2079816818237306
epoch: 59 training_loss 1.1879171597957612 test_loss: 1.046663188934326
epoch: 60 training_loss 1.092824214696884 test_loss: 1.0791547775268555
epoch: 61 training_loss 1.079415437579155 test_loss: 1.148286724090576
epoch: 62 training_loss 1.1027889895439147 test_loss: 1.0721936225891113
epoch: 63 training_loss 1.1410160905122757 test_loss: 1.1784927368164062
epoch: 64 training_loss 1.1322715711593627 test_loss: 1.1146440505981445
epoch: 65 training_loss 1.1187754309177398 test_loss: 1.039125347137451
epoch: 66 training_loss 1.0356767719984055 test_loss: 1.1421735763549805
epoch: 67 training_loss 1.1384970080852508 test_loss: 1.0743139266967774
epoch: 68 training_loss 1.060522770881653 test_loss: 1.0245608329772948
epoch: 69 training_loss 1.1104275894165039 test_loss: 1.126601791381836
epoch: 70 training_loss 1.0793901890516282 test_loss: 1.065365982055664
epoch: 71 training_loss 1.0668888503313065 test_loss: 1.3013712882995605
epoch: 72 training_loss 1.0364987432956696 test_loss: 1.0487398147583007
epoch: 73 training_loss 1.0365265536308288 test_loss: 1.0898163795471192
epoch: 74 training_loss 1.0125569146871567 test_loss: 1.0269420623779297
epoch: 75 training_loss 1.0470124453306198 test_loss: 1.021057415008545
epoch: 76 training_loss 0.9911654973030091 test_loss: 0.9484095573425293
epoch: 77 training_loss 1.0247147184610368 test_loss: 0.9253684997558593
epoch: 78 training_loss 1.0660343104600907 test_loss: 0.9704747200012207
epoch: 79 training_loss 0.9704244357347488 test_loss: 0.9505420684814453
epoch: 80 training_loss 1.0280352985858918 test_loss: 0.9847206115722656
epoch: 81 training_loss 0.9847562783956527 test_loss: 1.0915163993835448
epoch: 82 training_loss 0.9962967658042907 test_loss: 0.9285112380981445
epoch: 83 training_loss 0.9627903699874878 test_loss: 0.9869772911071777
epoch: 84 training_loss 0.9764663445949554 test_loss: 0.8759918212890625
epoch: 85 training_loss 0.9748309522867202 test_loss: 0.9803917884826661
epoch: 86 training_loss 0.9694111371040344 test_loss: 0.8299702644348145
epoch: 87 training_loss 0.9373914015293121 test_loss: 0.9343103408813477
epoch: 88 training_loss 0.9474764603376389 test_loss: 1.0754484176635741
epoch: 89 training_loss 0.9286138910055161 test_loss: 0.8858116149902344
epoch: 90 training_loss 0.97466577231884 test_loss: 0.9192657470703125
epoch: 91 training_loss 0.938569375872612 test_loss: 0.8931788444519043
epoch: 92 training_loss 0.9633964830636979 test_loss: 1.0957764625549316
epoch: 93 training_loss 0.9463121330738068 test_loss: 0.9839454650878906
epoch: 94 training_loss 0.9774905949831009 test_loss: 0.8853798866271972
epoch: 95 training_loss 0.9447977101802826 test_loss: 0.9677054405212402
epoch: 96 training_loss 0.9151950752735138 test_loss: 0.9681044578552246
epoch: 97 training_loss 0.884662190079689 test_loss: 0.9397146224975585
epoch: 98 training_loss 0.9063698542118073 test_loss: 0.8556331634521485
epoch: 99 training_loss 0.9218778276443481 test_loss: 0.9344312667846679
epoch: 100 training_loss 0.869698783159256 test_loss: 0.8489948272705078
epoch: 101 training_loss 0.8718373364210129 test_loss: 0.8742761611938477
epoch: 102 training_loss 0.8685825216770172 test_loss: 0.7956146717071533
epoch: 103 training_loss 0.8920978009700775 test_loss: 0.7945681571960449
epoch: 104 training_loss 0.8884975498914719 test_loss: 0.9581061363220215
epoch: 105 training_loss 0.8617227590084076 test_loss: 0.9719056129455567
epoch: 106 training_loss 0.9796205288171769 test_loss: 0.9114392280578614
epoch: 107 training_loss 0.8243625265359878 test_loss: 0.9199413299560547
epoch: 108 training_loss 0.8322037297487259 test_loss: 0.9638285636901855
epoch: 109 training_loss 0.86930992603302 test_loss: 0.8923539161682129
epoch: 110 training_loss 0.922086706161499 test_loss: 0.9609376907348632
epoch: 111 training_loss 0.913380759358406 test_loss: 0.8661443710327148
epoch: 112 training_loss 0.8860409879684448 test_loss: 0.8701954841613769
epoch: 113 training_loss 0.8348043996095658 test_loss: 0.8368074417114257
epoch: 114 training_loss 0.8945763170719147 test_loss: 1.0328804969787597
epoch: 115 training_loss 0.8911298954486847 test_loss: 1.0130838394165038
epoch: 116 training_loss 0.8998392695188522 test_loss: 0.8257290840148925
epoch: 117 training_loss 0.8718416619300843 test_loss: 0.899293327331543
epoch: 118 training_loss 0.8509005272388458 test_loss: 0.8259138107299805
epoch: 119 training_loss 0.8429625833034515 test_loss: 1.0278223037719727
138.73462997322417
episode: 0 training return: tensor(106.8446, device='cuda:0')
episode: 1 training return: tensor(105.3580, device='cuda:0')
episode: 2 training return: tensor(106.4685, device='cuda:0')
episode: 3 training return: tensor(104.9064, device='cuda:0')
