epoch: 0 training_loss 0.3527313961088657 test_loss: 0.25292904376983644
epoch: 1 training_loss 0.24032984033226967 test_loss: 0.22574334144592284
epoch: 2 training_loss 0.20440718241035938 test_loss: 0.16811685562133788
epoch: 3 training_loss 0.19333649471402167 test_loss: 0.1869440793991089
epoch: 4 training_loss 0.17442781798541546 test_loss: 0.14698238372802735
epoch: 5 training_loss 0.16156515188515186 test_loss: 0.1500503897666931
epoch: 6 training_loss 0.153873360902071 test_loss: 0.17443755865097046
epoch: 7 training_loss 0.1449947102740407 test_loss: 0.169126296043396
epoch: 8 training_loss 0.1380048103630543 test_loss: 0.13068432807922364
epoch: 9 training_loss 0.14175980370491742 test_loss: 0.11368767023086548
epoch: 10 training_loss 0.13036634158343077 test_loss: 0.14325881004333496
epoch: 11 training_loss 0.14226795680820942 test_loss: 0.1329794406890869
epoch: 12 training_loss 0.13641212340444325 test_loss: 0.14440540075302125
epoch: 13 training_loss 0.12173449303954839 test_loss: 0.12274762392044067
epoch: 14 training_loss 0.12698094613850117 test_loss: 0.11111905574798583
epoch: 15 training_loss 0.12146180447191 test_loss: 0.11385852098464966
epoch: 16 training_loss 0.12348520625382661 test_loss: 0.11825419664382934
epoch: 17 training_loss 0.12009006265550852 test_loss: 0.09966166019439697
epoch: 18 training_loss 0.12923603884875776 test_loss: 0.10901559591293335
epoch: 19 training_loss 0.13150663286447525 test_loss: 0.10798557996749877
epoch: 20 training_loss 0.11750289469957352 test_loss: 0.10286556482315064
epoch: 21 training_loss 0.11949302736669778 test_loss: 0.14323384761810304
epoch: 22 training_loss 0.11911146342754364 test_loss: 0.12954748868942262
epoch: 23 training_loss 0.1282296445220709 test_loss: 0.13768303394317627
epoch: 24 training_loss 0.11801522810012102 test_loss: 0.11201075315475464
epoch: 25 training_loss 0.12044821351766587 test_loss: 0.10551329851150512
epoch: 26 training_loss 0.11776343774050474 test_loss: 0.10250886678695678
epoch: 27 training_loss 0.11874172938987612 test_loss: 0.12752811908721923
epoch: 28 training_loss 0.12455122090876103 test_loss: 0.12216038703918457
epoch: 29 training_loss 0.11283942215144634 test_loss: 0.1215593934059143
epoch: 30 training_loss 0.12064574249088764 test_loss: 0.1218820333480835
epoch: 31 training_loss 0.11260020904242993 test_loss: 0.12795082330703736
epoch: 32 training_loss 0.11976620147004724 test_loss: 0.12445436716079712
epoch: 33 training_loss 0.11859686225652695 test_loss: 0.11090853214263915
epoch: 34 training_loss 0.1098756093159318 test_loss: 0.13228726387023926
epoch: 35 training_loss 0.11770676802843809 test_loss: 0.09484055042266845
epoch: 36 training_loss 0.11484282504767179 test_loss: 0.10910500288009643
epoch: 37 training_loss 0.11574523881077767 test_loss: 0.12719063758850097
epoch: 38 training_loss 0.10676728256046772 test_loss: 0.09775269627571107
epoch: 39 training_loss 0.12597192883491515 test_loss: 0.1311905264854431
epoch: 40 training_loss 0.1132279467023909 test_loss: 0.10450178384780884
epoch: 41 training_loss 0.12034075826406479 test_loss: 0.10992339849472046
epoch: 42 training_loss 0.11111569698899984 test_loss: 0.11824295520782471
epoch: 43 training_loss 0.12315866004675627 test_loss: 0.15662994384765624
epoch: 44 training_loss 0.11333900947123766 test_loss: 0.12662575244903565
epoch: 45 training_loss 0.1078021165728569 test_loss: 0.10886471271514893
epoch: 46 training_loss 0.11975433990359306 test_loss: 0.09597574472427368
epoch: 47 training_loss 0.11136094722896814 test_loss: 0.11143643856048584
epoch: 48 training_loss 0.1084326096251607 test_loss: 0.11406289339065552
epoch: 49 training_loss 0.11926620066165924 test_loss: 0.09023358225822449
epoch: 0 training_loss 47.90740697860718 test_loss: 27.22423400878906
epoch: 1 training_loss 21.700465335845948 test_loss: 18.53555908203125
epoch: 2 training_loss 16.51126997947693 test_loss: 15.168577575683594
epoch: 3 training_loss 13.917714595794678 test_loss: 13.128163146972657
epoch: 4 training_loss 12.114368801116944 test_loss: 11.02280502319336
epoch: 5 training_loss 10.606451787948608 test_loss: 9.97838134765625
epoch: 6 training_loss 9.615950241088868 test_loss: 9.323400115966797
epoch: 7 training_loss 8.776040472984313 test_loss: 8.520110321044921
epoch: 8 training_loss 8.044725451469422 test_loss: 7.7983451843261715
epoch: 9 training_loss 7.563604321479797 test_loss: 7.251773834228516
epoch: 10 training_loss 7.008719654083252 test_loss: 6.626239013671875
epoch: 11 training_loss 6.77729594707489 test_loss: 6.616936492919922
epoch: 12 training_loss 6.341642184257507 test_loss: 6.360016632080078
epoch: 13 training_loss 6.0546795272827145 test_loss: 5.568196868896484
epoch: 14 training_loss 5.909555506706238 test_loss: 5.791236114501953
epoch: 15 training_loss 5.684283018112183 test_loss: 5.966484451293946
epoch: 16 training_loss 5.442228126525879 test_loss: 5.245715713500976
epoch: 17 training_loss 5.286623692512512 test_loss: 5.291480636596679
epoch: 18 training_loss 5.064123389720916 test_loss: 5.056448745727539
epoch: 19 training_loss 5.074127089977265 test_loss: 5.122642898559571
epoch: 20 training_loss 4.971686103343964 test_loss: 5.129660797119141
epoch: 21 training_loss 4.7426091265678405 test_loss: 4.471678161621094
epoch: 22 training_loss 4.67917587518692 test_loss: 4.9679309844970705
epoch: 23 training_loss 4.538064918518066 test_loss: 4.396543884277344
epoch: 24 training_loss 4.518495302200318 test_loss: 4.623411178588867
epoch: 25 training_loss 4.4095820760726925 test_loss: 4.3418937683105465
epoch: 26 training_loss 4.381287484169007 test_loss: 4.358493423461914
epoch: 27 training_loss 4.266903734207153 test_loss: 4.080576705932617
epoch: 28 training_loss 4.119623816013336 test_loss: 4.319168472290039
epoch: 29 training_loss 4.076959376335144 test_loss: 3.8892311096191405
epoch: 30 training_loss 3.9943392419815065 test_loss: 4.060551452636719
epoch: 31 training_loss 3.929586577415466 test_loss: 3.9719310760498048
epoch: 32 training_loss 3.8705852603912354 test_loss: 3.943877410888672
epoch: 33 training_loss 3.951509747505188 test_loss: 3.6593894958496094
epoch: 34 training_loss 3.751569755077362 test_loss: 3.584752655029297
epoch: 35 training_loss 3.775890691280365 test_loss: 3.61706657409668
epoch: 36 training_loss 3.742919023036957 test_loss: 3.613652801513672
epoch: 37 training_loss 3.6057755184173583 test_loss: 3.8496932983398438
epoch: 38 training_loss 3.503906421661377 test_loss: 3.5233131408691407
epoch: 39 training_loss 3.526462676525116 test_loss: 3.608676528930664
epoch: 40 training_loss 3.480081148147583 test_loss: 3.601482391357422
epoch: 41 training_loss 3.403789072036743 test_loss: 3.3555522918701173
epoch: 42 training_loss 3.3524238753318785 test_loss: 3.206608200073242
epoch: 43 training_loss 3.3730220127105714 test_loss: 3.3700321197509764
epoch: 44 training_loss 3.3067838048934934 test_loss: 3.2982547760009764
epoch: 45 training_loss 3.298020317554474 test_loss: 3.2929008483886717
epoch: 46 training_loss 3.1752641248703 test_loss: 3.324155807495117
epoch: 47 training_loss 3.160875554084778 test_loss: 3.27225341796875
epoch: 48 training_loss 3.137062952518463 test_loss: 3.058491516113281
epoch: 49 training_loss 3.071159996986389 test_loss: 2.8241909027099608
episode: 0 training return: tensor(-999.8804, device='cuda:0')
episode: 1 training return: tensor(-996.2942, device='cuda:0')
episode: 2 training return: tensor(-999.2093, device='cuda:0')
epoch: 1 test_true_pfm: 2419.089932211848
episode: 3 training return: tensor(-973.1857, device='cuda:0')
episode: 4 training return: tensor(-997.6235, device='cuda:0')
episode: 5 training return: tensor(-998.4356, device='cuda:0')
epoch: 2 test_true_pfm: 3183.3552989469135
episode: 6 training return: tensor(-913.5780, device='cuda:0')
episode: 7 training return: tensor(-989.4485, device='cuda:0')
episode: 8 training return: tensor(-996.8310, device='cuda:0')
epoch: 3 test_true_pfm: 3555.355188057973
episode: 9 training return: tensor(-999.7021, device='cuda:0')
episode: 10 training return: tensor(-992.8950, device='cuda:0')
episode: 11 training return: tensor(-997.5545, device='cuda:0')
epoch: 4 test_true_pfm: 2873.0884732001537
episode: 12 training return: tensor(-997.3792, device='cuda:0')
episode: 13 training return: tensor(-999.8692, device='cuda:0')
episode: 14 training return: tensor(-957.7866, device='cuda:0')
epoch: 5 test_true_pfm: 2347.630354846768
episode: 15 training return: tensor(-995.6335, device='cuda:0')
episode: 16 training return: tensor(-999.7140, device='cuda:0')
episode: 17 training return: tensor(-972.2834, device='cuda:0')
epoch: 6 test_true_pfm: 3216.093000419662
episode: 18 training return: tensor(-992.5648, device='cuda:0')
episode: 19 training return: tensor(-996.4945, device='cuda:0')
episode: 20 training return: tensor(-977.8702, device='cuda:0')
epoch: 7 test_true_pfm: 3388.850064184033
episode: 21 training return: tensor(-998.6735, device='cuda:0')
episode: 22 training return: tensor(-991.7838, device='cuda:0')
episode: 23 training return: tensor(-997.0964, device='cuda:0')
epoch: 8 test_true_pfm: 2507.6791378957237
episode: 24 training return: tensor(-992.6461, device='cuda:0')
episode: 25 training return: tensor(-989.0402, device='cuda:0')
episode: 26 training return: tensor(-968.5472, device='cuda:0')
epoch: 9 test_true_pfm: 412.52721964637846
episode: 27 training return: tensor(-999.5629, device='cuda:0')
episode: 28 training return: tensor(-993.7883, device='cuda:0')
episode: 29 training return: tensor(-999.5838, device='cuda:0')
epoch: 10 test_true_pfm: 3081.5304776563266
episode: 30 training return: tensor(-997.5168, device='cuda:0')
episode: 31 training return: tensor(-998.6956, device='cuda:0')
episode: 32 training return: tensor(-919.7896, device='cuda:0')
epoch: 11 test_true_pfm: 1380.5764297277995
episode: 33 training return: tensor(-989.6196, device='cuda:0')
episode: 34 training return: tensor(-999.9836, device='cuda:0')
episode: 35 training return: tensor(-999.6224, device='cuda:0')
epoch: 12 test_true_pfm: 2314.587599039056
episode: 36 training return: tensor(-935.3484, device='cuda:0')
episode: 37 training return: tensor(-999.8633, device='cuda:0')
episode: 38 training return: tensor(-937.2584, device='cuda:0')
epoch: 13 test_true_pfm: 1904.2538252556508
episode: 39 training return: tensor(-996.2709, device='cuda:0')
episode: 40 training return: tensor(-995.3906, device='cuda:0')
episode: 41 training return: tensor(-992.9953, device='cuda:0')
epoch: 14 test_true_pfm: 2041.9495880972474
episode: 42 training return: tensor(-999.5807, device='cuda:0')
episode: 43 training return: tensor(-993.1258, device='cuda:0')
episode: 44 training return: tensor(-929.1262, device='cuda:0')
epoch: 15 test_true_pfm: 2124.4033246321433
episode: 45 training return: tensor(-995.2419, device='cuda:0')
episode: 46 training return: tensor(-999.3223, device='cuda:0')
