epoch: 0 training_loss 53.86927043914795 test_loss: 38.172665405273435
epoch: 1 training_loss 33.78309829711914 test_loss: 30.20224609375
epoch: 2 training_loss 28.527053718566894 test_loss: 26.5896728515625
epoch: 3 training_loss 24.721780872344972 test_loss: 23.412939453125
epoch: 4 training_loss 22.22244577407837 test_loss: 20.964833068847657
epoch: 5 training_loss 20.17371597290039 test_loss: 19.632565307617188
epoch: 6 training_loss 18.94516342163086 test_loss: 18.774810791015625
epoch: 7 training_loss 18.070906410217287 test_loss: 18.16287841796875
epoch: 8 training_loss 17.22526661872864 test_loss: 16.984373474121092
epoch: 9 training_loss 16.554383630752564 test_loss: 16.985319519042967
epoch: 10 training_loss 16.06554651260376 test_loss: 16.415489196777344
epoch: 11 training_loss 15.528207101821899 test_loss: 15.323146057128906
epoch: 12 training_loss 15.253663139343262 test_loss: 14.766744995117188
epoch: 13 training_loss 14.688610277175904 test_loss: 14.872207641601562
epoch: 14 training_loss 14.450116815567016 test_loss: 14.588270568847657
epoch: 15 training_loss 14.357461395263671 test_loss: 14.178250122070313
epoch: 16 training_loss 13.963277883529663 test_loss: 14.054533386230469
epoch: 17 training_loss 13.782290029525758 test_loss: 13.8073974609375
epoch: 18 training_loss 13.678110122680664 test_loss: 14.250448608398438
epoch: 19 training_loss 13.432004680633545 test_loss: 13.822355651855469
epoch: 20 training_loss 13.392079343795777 test_loss: 13.375169372558593
epoch: 21 training_loss 13.12163537979126 test_loss: 13.012538146972656
epoch: 22 training_loss 12.99602879524231 test_loss: 13.118061828613282
epoch: 23 training_loss 12.822873277664184 test_loss: 13.003904724121094
epoch: 24 training_loss 12.74745054244995 test_loss: 12.592680358886719
epoch: 25 training_loss 12.64210699081421 test_loss: 12.484490966796875
epoch: 26 training_loss 12.4201433467865 test_loss: 12.597193908691406
epoch: 27 training_loss 12.413989334106445 test_loss: 12.14654083251953
epoch: 28 training_loss 12.221440963745117 test_loss: 12.053366088867188
epoch: 29 training_loss 12.239820442199708 test_loss: 11.955359649658202
episode: 0 training return: tensor(-456.9191, device='cuda:0', grad_fn=<AddBackward0>)
episode: 1 training return: tensor(-499.6862, device='cuda:0', grad_fn=<AddBackward0>)
episode: 2 training return: tensor(-474.9275, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 1 test_true_pfm: 2408.8542685953485
episode: 3 training return: tensor(-499.8835, device='cuda:0', grad_fn=<AddBackward0>)
episode: 4 training return: tensor(-499.9301, device='cuda:0', grad_fn=<AddBackward0>)
episode: 5 training return: tensor(-499.9348, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 2 test_true_pfm: 2802.2131579640204
episode: 6 training return: tensor(-498.1075, device='cuda:0', grad_fn=<AddBackward0>)
episode: 7 training return: tensor(-499.9066, device='cuda:0', grad_fn=<AddBackward0>)
episode: 8 training return: tensor(-498.2077, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 3 test_true_pfm: 2545.8052642956313
episode: 9 training return: tensor(-499.8709, device='cuda:0', grad_fn=<AddBackward0>)
episode: 10 training return: tensor(-499.7837, device='cuda:0', grad_fn=<AddBackward0>)
