epoch: 0 training_loss 0.4272258706390858 test_loss: 0.30503923892974855 test_wrong: 0.1359375
epoch: 1 training_loss 0.2572020202875137 test_loss: 0.26053073406219485 test_wrong: 0.115625
epoch: 2 training_loss 0.20135739475488662 test_loss: 0.23676679134368897 test_wrong: 0.1046875
epoch: 3 training_loss 0.17658530615270138 test_loss: 0.19627711772918702 test_wrong: 0.0890625
epoch: 4 training_loss 0.1636682828515768 test_loss: 0.18081624507904054 test_wrong: 0.0859375
epoch: 5 training_loss 0.1537559775263071 test_loss: 0.16655969619750977 test_wrong: 0.0765625
epoch: 6 training_loss 0.14133841380476952 test_loss: 0.17287061214447022 test_wrong: 0.0671875
epoch: 7 training_loss 0.1362388638779521 test_loss: 0.17523415088653566 test_wrong: 0.06875
epoch: 8 training_loss 0.13357348173856734 test_loss: 0.15878294706344603 test_wrong: 0.06875
epoch: 9 training_loss 0.13595237467437982 test_loss: 0.14707489013671876 test_wrong: 0.0640625
epoch: 10 training_loss 0.12002267267554999 test_loss: 0.17014673948287964 test_wrong: 0.078125
epoch: 11 training_loss 0.12785785868763924 test_loss: 0.14542055130004883 test_wrong: 0.0609375
epoch: 12 training_loss 0.11271499037742615 test_loss: 0.17917196750640868 test_wrong: 0.08125
epoch: 13 training_loss 0.12025306750088931 test_loss: 0.16329684257507324 test_wrong: 0.06875
epoch: 14 training_loss 0.1209717059507966 test_loss: 0.14978812932968139 test_wrong: 0.0640625
epoch: 15 training_loss 0.11379260256886482 test_loss: 0.18174917697906495 test_wrong: 0.078125
epoch: 16 training_loss 0.10676891837269067 test_loss: 0.14304757118225098 test_wrong: 0.0578125
epoch: 17 training_loss 0.11078078787773847 test_loss: 0.13969922065734863 test_wrong: 0.06875
epoch: 18 training_loss 0.09769099928438664 test_loss: 0.14161362648010253 test_wrong: 0.059375
epoch: 19 training_loss 0.10341069824993611 test_loss: 0.15557916164398194 test_wrong: 0.0703125
epoch: 20 training_loss 0.10821931302547455 test_loss: 0.1696668028831482 test_wrong: 0.0734375
epoch: 21 training_loss 0.10168747451156378 test_loss: 0.14394975900650026 test_wrong: 0.0515625
epoch: 22 training_loss 0.0974671696126461 test_loss: 0.13455859422683716 test_wrong: 0.0546875
epoch: 23 training_loss 0.09753456603735686 test_loss: 0.16499099731445313 test_wrong: 0.06875
epoch: 24 training_loss 0.09258192721754313 test_loss: 0.17358726263046265 test_wrong: 0.0796875
epoch: 25 training_loss 0.09445220397785306 test_loss: 0.15090469121932984 test_wrong: 0.06875
epoch: 26 training_loss 0.09029743464663625 test_loss: 0.15934768915176392 test_wrong: 0.0734375
epoch: 27 training_loss 0.09319060280919075 test_loss: 0.14962166547775269 test_wrong: 0.0640625
epoch: 28 training_loss 0.0872173454426229 test_loss: 0.15580214262008668 test_wrong: 0.0640625
epoch: 29 training_loss 0.08416955437511206 test_loss: 0.1931193709373474 test_wrong: 0.0890625
epoch: 30 training_loss 0.09587831532582641 test_loss: 0.14007035493850709 test_wrong: 0.06875
epoch: 31 training_loss 0.08365423122420906 test_loss: 0.17605654001235962 test_wrong: 0.071875
epoch: 32 training_loss 0.08252586608752609 test_loss: 0.18144968748092652 test_wrong: 0.0796875
epoch: 33 training_loss 0.08828686464577913 test_loss: 0.1511852979660034 test_wrong: 0.0640625
epoch: 34 training_loss 0.08460445199161767 test_loss: 0.16646562814712523 test_wrong: 0.065625
epoch: 35 training_loss 0.08890388701111078 test_loss: 0.16569722890853883 test_wrong: 0.0765625
epoch: 36 training_loss 0.08604481603950262 test_loss: 0.16174107789993286 test_wrong: 0.071875
epoch: 37 training_loss 0.08574161127209663 test_loss: 0.15393469333648682 test_wrong: 0.0578125
epoch: 38 training_loss 0.08575549807399512 test_loss: 0.15093082189559937 test_wrong: 0.071875
epoch: 39 training_loss 0.08518690619617701 test_loss: 0.17004493474960328 test_wrong: 0.0734375
epoch: 40 training_loss 0.0799357670545578 test_loss: 0.1761545419692993 test_wrong: 0.0859375
epoch: 41 training_loss 0.08265373783186078 test_loss: 0.18485282659530639 test_wrong: 0.075
epoch: 42 training_loss 0.07606563370674849 test_loss: 0.17895458936691283 test_wrong: 0.075
epoch: 43 training_loss 0.07782628249377012 test_loss: 0.16716657876968383 test_wrong: 0.078125
epoch: 44 training_loss 0.07675092611461878 test_loss: 0.16080954074859619 test_wrong: 0.0796875
epoch: 45 training_loss 0.08093817751854658 test_loss: 0.18745253086090088 test_wrong: 0.0875
epoch: 46 training_loss 0.07811051566153765 test_loss: 0.18934913873672485 test_wrong: 0.0828125
epoch: 47 training_loss 0.07576866643503308 test_loss: 0.2094796657562256 test_wrong: 0.0890625
epoch: 48 training_loss 0.08291407169774175 test_loss: 0.16590620279312135 test_wrong: 0.0671875
epoch: 49 training_loss 0.07340896714478731 test_loss: 0.17375705242156983 test_wrong: 0.075
episode: 0 training return: tensor(2.5336e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 1 training return: tensor(2.5877e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 2 training return: tensor(2.5200e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 3 training return: tensor(1.8656e-07, device='cuda:0', grad_fn=<AddBackward0>)
