epoch: 0 training_loss 0.36642074808478353 test_loss: 0.25909647941589353
epoch: 1 training_loss 0.22939372196793556 test_loss: 0.19440507888793945
epoch: 2 training_loss 0.1950168811529875 test_loss: 0.17317438125610352
epoch: 3 training_loss 0.17872655227780343 test_loss: 0.16169203519821168
epoch: 4 training_loss 0.18880037002265454 test_loss: 0.19587504863739014
epoch: 5 training_loss 0.15189001236110924 test_loss: 0.14470664262771607
epoch: 6 training_loss 0.1474027270078659 test_loss: 0.15173707008361817
epoch: 7 training_loss 0.14299055762588977 test_loss: 0.14175779819488527
epoch: 8 training_loss 0.1503444681689143 test_loss: 0.14066632986068725
epoch: 9 training_loss 0.14267724733799697 test_loss: 0.12917447090148926
epoch: 10 training_loss 0.13756270796060563 test_loss: 0.14181629419326783
epoch: 11 training_loss 0.12644193459302186 test_loss: 0.12473211288452149
epoch: 12 training_loss 0.14059751130640508 test_loss: 0.14443626403808593
epoch: 13 training_loss 0.12439715653657914 test_loss: 0.1409367322921753
epoch: 14 training_loss 0.12485983230173588 test_loss: 0.15150339603424073
epoch: 15 training_loss 0.13187620431184768 test_loss: 0.14222298860549926
epoch: 16 training_loss 0.12647486355155707 test_loss: 0.1396968960762024
epoch: 17 training_loss 0.12621900357306004 test_loss: 0.14002896547317506
epoch: 18 training_loss 0.12343978494405747 test_loss: 0.14673467874526977
epoch: 19 training_loss 0.11876472361385822 test_loss: 0.12579461336135864
epoch: 20 training_loss 0.12309037420898676 test_loss: 0.1182945966720581
epoch: 21 training_loss 0.11989537734538316 test_loss: 0.1332848310470581
epoch: 22 training_loss 0.12387116108089685 test_loss: 0.12449679374694825
epoch: 23 training_loss 0.12015428896993399 test_loss: 0.12738026380538942
epoch: 24 training_loss 0.1215619383752346 test_loss: 0.13825913667678832
epoch: 25 training_loss 0.12506778854876757 test_loss: 0.10857603549957276
epoch: 26 training_loss 0.11577984388917685 test_loss: 0.12475272417068481
epoch: 27 training_loss 0.1222317399829626 test_loss: 0.1065286636352539
epoch: 28 training_loss 0.11714006450027227 test_loss: 0.1024215579032898
epoch: 29 training_loss 0.11774295445531607 test_loss: 0.10876110792160035
epoch: 30 training_loss 0.11817363407462836 test_loss: 0.14329575300216674
epoch: 31 training_loss 0.12378307186067104 test_loss: 0.11102327108383178
epoch: 32 training_loss 0.11828979896381497 test_loss: 0.11983258724212646
epoch: 33 training_loss 0.1148057622089982 test_loss: 0.0990341305732727
epoch: 34 training_loss 0.11258456468582154 test_loss: 0.1329362154006958
epoch: 35 training_loss 0.12148447901010513 test_loss: 0.12256258726119995
epoch: 36 training_loss 0.11495334275066853 test_loss: 0.12293791770935059
epoch: 37 training_loss 0.11866510480642319 test_loss: 0.11888794898986817
epoch: 38 training_loss 0.11032789070159196 test_loss: 0.09528126120567322
epoch: 39 training_loss 0.11113784395158291 test_loss: 0.10172343254089355
epoch: 40 training_loss 0.11788804709911346 test_loss: 0.099493008852005
epoch: 41 training_loss 0.11482830379158258 test_loss: 0.12464464902877807
epoch: 42 training_loss 0.11582130003720521 test_loss: 0.11776585578918457
epoch: 43 training_loss 0.12009332291781902 test_loss: 0.12702640295028686
epoch: 44 training_loss 0.11776978943496942 test_loss: 0.10747441053390502
epoch: 45 training_loss 0.11150402050465345 test_loss: 0.1046730637550354
epoch: 46 training_loss 0.1068443576991558 test_loss: 0.1010482907295227
epoch: 47 training_loss 0.1129650497995317 test_loss: 0.10977553129196167
epoch: 48 training_loss 0.10578414004296065 test_loss: 0.11355072259902954
epoch: 49 training_loss 0.11394108690321446 test_loss: 0.11322453022003173
epoch: 50 training_loss 0.11976377159357071 test_loss: 0.09736812710762024
epoch: 51 training_loss 0.11131839122623205 test_loss: 0.10437091588973998
epoch: 52 training_loss 0.11483325764536857 test_loss: 0.13499119281768798
epoch: 53 training_loss 0.11329464379698038 test_loss: 0.10484069585800171
epoch: 54 training_loss 0.10936724156141281 test_loss: 0.11207618713378906
epoch: 55 training_loss 0.10452583812177181 test_loss: 0.09577795267105102
epoch: 56 training_loss 0.11427611213177442 test_loss: 0.09816779494285584
epoch: 57 training_loss 0.11250252813100815 test_loss: 0.12936617136001588
epoch: 58 training_loss 0.12013148158788681 test_loss: 0.12167465686798096
epoch: 59 training_loss 0.11348929507657886 test_loss: 0.11908857822418213
epoch: 60 training_loss 0.10989781275391579 test_loss: 0.10439257621765137
epoch: 61 training_loss 0.11579794205725193 test_loss: 0.10752958059310913
epoch: 62 training_loss 0.11088013090193272 test_loss: 0.12379827499389648
epoch: 63 training_loss 0.1125356799736619 test_loss: 0.10473090410232544
epoch: 64 training_loss 0.11218245400115848 test_loss: 0.1281239628791809
epoch: 65 training_loss 0.10104406550526619 test_loss: 0.11204419136047364
epoch: 66 training_loss 0.10835470475256442 test_loss: 0.10725709199905395
epoch: 67 training_loss 0.11207273218780756 test_loss: 0.10412245988845825
epoch: 68 training_loss 0.12317801836878062 test_loss: 0.10133295059204102
epoch: 69 training_loss 0.11263670865446329 test_loss: 0.08948172330856323
epoch: 70 training_loss 0.10950382936745882 test_loss: 0.11239036321640014
epoch: 71 training_loss 0.11109271267428994 test_loss: 0.12740434408187867
epoch: 72 training_loss 0.10575635705143213 test_loss: 0.13642611503601074
epoch: 73 training_loss 0.10546152040362358 test_loss: 0.12380635738372803
epoch: 74 training_loss 0.11049891766160727 test_loss: 0.10564185380935669
epoch: 75 training_loss 0.11338641662150621 test_loss: 0.09361153841018677
epoch: 76 training_loss 0.11726715806871653 test_loss: 0.10065826177597045
epoch: 77 training_loss 0.10197947081178427 test_loss: 0.10917124748229981
epoch: 78 training_loss 0.11606379333883524 test_loss: 0.1115793228149414
epoch: 79 training_loss 0.10519651291891932 test_loss: 0.10352611541748047
epoch: 0 training_loss 46.526232681274415 test_loss: 25.448454284667967
epoch: 1 training_loss 20.408465480804445 test_loss: 17.598870849609376
epoch: 2 training_loss 15.487905340194702 test_loss: 13.91742401123047
epoch: 3 training_loss 13.209528160095214 test_loss: 11.867242431640625
epoch: 4 training_loss 11.176541748046875 test_loss: 10.197458648681641
epoch: 5 training_loss 9.983458576202393 test_loss: 9.279743194580078
epoch: 6 training_loss 9.017415661811828 test_loss: 8.819379425048828
epoch: 7 training_loss 8.42128276348114 test_loss: 7.888124084472656
epoch: 8 training_loss 7.82298095703125 test_loss: 7.75252685546875
epoch: 9 training_loss 7.286172165870666 test_loss: 7.049974822998047
epoch: 10 training_loss 6.731408796310425 test_loss: 6.777486419677734
epoch: 11 training_loss 6.39884379863739 test_loss: 6.5571342468261715
epoch: 12 training_loss 6.259481124877929 test_loss: 6.276884841918945
epoch: 13 training_loss 5.855252461433411 test_loss: 5.733847427368164
epoch: 14 training_loss 5.6325657939910885 test_loss: 5.423342895507813
epoch: 15 training_loss 5.504256505966186 test_loss: 5.424605941772461
epoch: 16 training_loss 5.2273542380332945 test_loss: 4.946695327758789
epoch: 17 training_loss 5.150362882614136 test_loss: 5.1415046691894535
epoch: 18 training_loss 5.000877633094787 test_loss: 4.729228591918945
epoch: 19 training_loss 4.784867279529571 test_loss: 4.332981109619141
epoch: 20 training_loss 4.721083133220673 test_loss: 4.635647201538086
epoch: 21 training_loss 4.631119003295899 test_loss: 4.665398025512696
epoch: 22 training_loss 4.438603665828705 test_loss: 4.490016174316406
epoch: 23 training_loss 4.364649300575256 test_loss: 4.300963592529297
epoch: 24 training_loss 4.281998164653778 test_loss: 4.209743881225586
epoch: 25 training_loss 4.239007387161255 test_loss: 4.110314559936524
epoch: 26 training_loss 4.144165303707123 test_loss: 3.8410575866699217
epoch: 27 training_loss 4.045386803150177 test_loss: 3.7393444061279295
epoch: 28 training_loss 4.012858216762543 test_loss: 4.040102767944336
epoch: 29 training_loss 3.812198438644409 test_loss: 3.7757617950439455
epoch: 30 training_loss 3.801327474117279 test_loss: 4.074447250366211
epoch: 31 training_loss 3.7057845830917358 test_loss: 3.501483154296875
epoch: 32 training_loss 3.710136978626251 test_loss: 3.6241744995117187
epoch: 33 training_loss 3.6465143036842345 test_loss: 3.731085205078125
epoch: 34 training_loss 3.4672527289390564 test_loss: 3.5751880645751952
epoch: 35 training_loss 3.5734226298332215 test_loss: 3.4582523345947265
epoch: 36 training_loss 3.5318692350387573 test_loss: 3.6147857666015626
epoch: 37 training_loss 3.384351258277893 test_loss: 3.713679885864258
epoch: 38 training_loss 3.3335423254966736 test_loss: 3.302402114868164
epoch: 39 training_loss 3.3797727632522583 test_loss: 3.3955917358398438
epoch: 40 training_loss 3.390071792602539 test_loss: 3.262283706665039
epoch: 41 training_loss 3.327751324176788 test_loss: 3.39920654296875
epoch: 42 training_loss 3.2015138173103335 test_loss: 3.122760772705078
epoch: 43 training_loss 3.2444481372833254 test_loss: 3.155793380737305
epoch: 44 training_loss 3.1551714038848875 test_loss: 3.0682445526123048
epoch: 45 training_loss 3.095456051826477 test_loss: 3.068851661682129
epoch: 46 training_loss 3.1390422916412355 test_loss: 3.161338233947754
epoch: 47 training_loss 3.0814110159873964 test_loss: 3.0658185958862303
epoch: 48 training_loss 2.98454802274704 test_loss: 2.9484113693237304
epoch: 49 training_loss 3.043086812496185 test_loss: 2.8769432067871095
epoch: 50 training_loss 2.973708689212799 test_loss: 2.9376073837280274
epoch: 51 training_loss 2.811376292705536 test_loss: 2.826478385925293
epoch: 52 training_loss 2.87367146730423 test_loss: 2.9956686019897463
epoch: 53 training_loss 2.944916751384735 test_loss: 2.695969009399414
epoch: 54 training_loss 2.8257970333099367 test_loss: 2.863165092468262
epoch: 55 training_loss 2.8186158442497256 test_loss: 2.7520578384399412
epoch: 56 training_loss 2.8961667442321777 test_loss: 2.756029510498047
epoch: 57 training_loss 2.72553897857666 test_loss: 2.7152019500732423
epoch: 58 training_loss 2.775622537136078 test_loss: 2.787892723083496
epoch: 59 training_loss 2.7315602684020996 test_loss: 2.785944938659668
epoch: 60 training_loss 2.7332398986816404 test_loss: 2.634599494934082
epoch: 61 training_loss 2.6831966614723206 test_loss: 2.8780593872070312
epoch: 62 training_loss 2.6922004771232606 test_loss: 2.7266759872436523
epoch: 63 training_loss 2.650859920978546 test_loss: 2.684982681274414
epoch: 64 training_loss 2.650355200767517 test_loss: 2.613094520568848
epoch: 65 training_loss 2.6050942492485047 test_loss: 2.533197593688965
epoch: 66 training_loss 2.574101483821869 test_loss: 2.5271699905395506
epoch: 67 training_loss 2.55962477684021 test_loss: 2.5603870391845702
epoch: 68 training_loss 2.5465431571006776 test_loss: 2.4526836395263674
epoch: 69 training_loss 2.5034748589992524 test_loss: 2.4735279083251953
epoch: 70 training_loss 2.509058692455292 test_loss: 2.5403608322143554
epoch: 71 training_loss 2.514312769174576 test_loss: 2.4526174545288084
epoch: 72 training_loss 2.4701903355121613 test_loss: 2.4709835052490234
epoch: 73 training_loss 2.4037687253952025 test_loss: 2.474788475036621
epoch: 74 training_loss 2.4640166401863097 test_loss: 2.446294975280762
epoch: 75 training_loss 2.4669875037670135 test_loss: 2.3400482177734374
epoch: 76 training_loss 2.4338179314136505 test_loss: 2.440051460266113
epoch: 77 training_loss 2.375364533662796 test_loss: 2.4259462356567383
epoch: 78 training_loss 2.3798347413539886 test_loss: 2.4250701904296874
epoch: 79 training_loss 2.346799863576889 test_loss: 2.4054746627807617
5839.482465334755
episode: 0 training return: tensor(-650.1243, device='cuda:0')
episode: 1 training return: tensor(-650.3644, device='cuda:0')
episode: 2 training return: tensor(-999.9918, device='cuda:0')
episode: 3 training return: tensor(-667.0170, device='cuda:0')
epoch: 1 test_true_pfm: 8760.930293827909
episode: 4 training return: tensor(-969.4671, device='cuda:0')
episode: 5 training return: tensor(-884.4455, device='cuda:0')
episode: 6 training return: tensor(-615.5614, device='cuda:0')
episode: 7 training return: tensor(-676.6204, device='cuda:0')
epoch: 2 test_true_pfm: 8826.820025298453
episode: 8 training return: tensor(-565.1550, device='cuda:0')
episode: 9 training return: tensor(-999.8879, device='cuda:0')
episode: 10 training return: tensor(-639.7028, device='cuda:0')
episode: 11 training return: tensor(-914.0075, device='cuda:0')
epoch: 3 test_true_pfm: 8314.16029123627
episode: 12 training return: tensor(-899.7796, device='cuda:0')
episode: 13 training return: tensor(-559.9816, device='cuda:0')
episode: 14 training return: tensor(-565.6889, device='cuda:0')
episode: 15 training return: tensor(-526.2262, device='cuda:0')
epoch: 4 test_true_pfm: 6170.158988569743
episode: 16 training return: tensor(-635.1541, device='cuda:0')
episode: 17 training return: tensor(-758.8673, device='cuda:0')
episode: 18 training return: tensor(-431.0066, device='cuda:0')
episode: 19 training return: tensor(-583.0390, device='cuda:0')
epoch: 5 test_true_pfm: 5497.74922261477
episode: 20 training return: tensor(-688.4333, device='cuda:0')
episode: 21 training return: tensor(-861.1406, device='cuda:0')
episode: 22 training return: tensor(-715.5449, device='cuda:0')
episode: 23 training return: tensor(-981.7066, device='cuda:0')
epoch: 6 test_true_pfm: 3575.5687060353644
episode: 24 training return: tensor(-829.8038, device='cuda:0')
episode: 25 training return: tensor(-564.1254, device='cuda:0')
episode: 26 training return: tensor(-772.9544, device='cuda:0')
episode: 27 training return: tensor(-918.4034, device='cuda:0')
epoch: 7 test_true_pfm: 7967.492891356014
episode: 28 training return: tensor(-943.7031, device='cuda:0')
episode: 29 training return: tensor(-940.4497, device='cuda:0')
episode: 30 training return: tensor(-999.7594, device='cuda:0')
episode: 31 training return: tensor(-999.9346, device='cuda:0')
epoch: 8 test_true_pfm: 6074.399038718931
episode: 32 training return: tensor(-657.6288, device='cuda:0')
episode: 33 training return: tensor(-999.9764, device='cuda:0')
episode: 34 training return: tensor(-476.0793, device='cuda:0')
episode: 35 training return: tensor(-994.3868, device='cuda:0')
epoch: 9 test_true_pfm: 4569.565535887353
episode: 36 training return: tensor(-999.9632, device='cuda:0')
episode: 37 training return: tensor(-294.2010, device='cuda:0')
episode: 38 training return: tensor(-935.3158, device='cuda:0')
episode: 39 training return: tensor(-925.8033, device='cuda:0')
epoch: 10 test_true_pfm: 2246.0303396124114
episode: 40 training return: tensor(-312.4865, device='cuda:0')
episode: 41 training return: tensor(-688.0231, device='cuda:0')
episode: 42 training return: tensor(-377.3686, device='cuda:0')
episode: 43 training return: tensor(-993.2969, device='cuda:0')
epoch: 11 test_true_pfm: 7610.799052567142
episode: 44 training return: tensor(-737.0360, device='cuda:0')
episode: 45 training return: tensor(-520.4452, device='cuda:0')
episode: 46 training return: tensor(-686.3616, device='cuda:0')
episode: 47 training return: tensor(-809.1640, device='cuda:0')
epoch: 12 test_true_pfm: 6067.860347619986
episode: 48 training return: tensor(-656.5930, device='cuda:0')
episode: 49 training return: tensor(-921.2368, device='cuda:0')
episode: 50 training return: tensor(-976.8411, device='cuda:0')
episode: 51 training return: tensor(-998.1185, device='cuda:0')
epoch: 13 test_true_pfm: 4855.620966245453
episode: 52 training return: tensor(-999.9993, device='cuda:0')
episode: 53 training return: tensor(-278.0823, device='cuda:0')
episode: 54 training return: tensor(-727.9097, device='cuda:0')
episode: 55 training return: tensor(-804.0188, device='cuda:0')
epoch: 14 test_true_pfm: 4647.563184207012
episode: 56 training return: tensor(-563.1721, device='cuda:0')
episode: 57 training return: tensor(-864.5024, device='cuda:0')
episode: 58 training return: tensor(-495.8710, device='cuda:0')
episode: 59 training return: tensor(-792.2659, device='cuda:0')
epoch: 15 test_true_pfm: 6293.234530099485
episode: 60 training return: tensor(-372.5697, device='cuda:0')
episode: 61 training return: tensor(-999.2747, device='cuda:0')
episode: 62 training return: tensor(-693.2878, device='cuda:0')
episode: 63 training return: tensor(-714.1727, device='cuda:0')
epoch: 16 test_true_pfm: 4707.141845568749
episode: 64 training return: tensor(-629.9661, device='cuda:0')
episode: 65 training return: tensor(-848.6056, device='cuda:0')
episode: 66 training return: tensor(-499.9399, device='cuda:0')
episode: 67 training return: tensor(-690.5640, device='cuda:0')
epoch: 17 test_true_pfm: 6908.524270901808
episode: 68 training return: tensor(-754.7272, device='cuda:0')
episode: 69 training return: tensor(-999.4062, device='cuda:0')
episode: 70 training return: tensor(-432.2395, device='cuda:0')
episode: 71 training return: tensor(-893.6024, device='cuda:0')
epoch: 18 test_true_pfm: 5410.995139813133
episode: 72 training return: tensor(-999.9567, device='cuda:0')
episode: 73 training return: tensor(-703.5233, device='cuda:0')
episode: 74 training return: tensor(-789.1450, device='cuda:0')
episode: 75 training return: tensor(-74.5816, device='cuda:0')
epoch: 19 test_true_pfm: 3769.419292182773
episode: 76 training return: tensor(-721.2365, device='cuda:0')
episode: 77 training return: tensor(-747.9697, device='cuda:0')
episode: 78 training return: tensor(-509.1497, device='cuda:0')
episode: 79 training return: tensor(-541.8792, device='cuda:0')
epoch: 20 test_true_pfm: 6051.56152576386
episode: 80 training return: tensor(-159.6693, device='cuda:0')
episode: 81 training return: tensor(-856.8642, device='cuda:0')
episode: 82 training return: tensor(-983.3629, device='cuda:0')
episode: 83 training return: tensor(-540.0244, device='cuda:0')
epoch: 21 test_true_pfm: 9376.557070623496
episode: 84 training return: tensor(-267.9417, device='cuda:0')
episode: 85 training return: tensor(-927.5967, device='cuda:0')
episode: 86 training return: tensor(-816.1268, device='cuda:0')
episode: 87 training return: tensor(-999.6476, device='cuda:0')
epoch: 22 test_true_pfm: 6852.2995689463705
episode: 88 training return: tensor(-937.4996, device='cuda:0')
episode: 89 training return: tensor(-486.9481, device='cuda:0')
episode: 90 training return: tensor(-798.2640, device='cuda:0')
episode: 91 training return: tensor(-999.9465, device='cuda:0')
epoch: 23 test_true_pfm: 7195.17225432895
episode: 92 training return: tensor(-918.8152, device='cuda:0')
episode: 93 training return: tensor(-682.7279, device='cuda:0')
episode: 94 training return: tensor(-589.3200, device='cuda:0')
episode: 95 training return: tensor(-789.1755, device='cuda:0')
epoch: 24 test_true_pfm: 4817.179349659924
episode: 96 training return: tensor(-695.8022, device='cuda:0')
episode: 97 training return: tensor(-315.1922, device='cuda:0')
episode: 98 training return: tensor(-777.8264, device='cuda:0')
episode: 99 training return: tensor(-718.2959, device='cuda:0')
epoch: 25 test_true_pfm: 7206.647609442979
episode: 100 training return: tensor(-862.0410, device='cuda:0')
episode: 101 training return: tensor(-398.7844, device='cuda:0')
episode: 102 training return: tensor(-671.4745, device='cuda:0')
episode: 103 training return: tensor(-998.8281, device='cuda:0')
epoch: 26 test_true_pfm: 8565.565301749457
episode: 104 training return: tensor(-413.7812, device='cuda:0')
episode: 105 training return: tensor(-667.6824, device='cuda:0')
episode: 106 training return: tensor(-969.9989, device='cuda:0')
episode: 107 training return: tensor(-593.0084, device='cuda:0')
epoch: 27 test_true_pfm: 6497.481902153067
episode: 108 training return: tensor(-953.2350, device='cuda:0')
episode: 109 training return: tensor(-693.4588, device='cuda:0')
episode: 110 training return: tensor(-802.3448, device='cuda:0')
episode: 111 training return: tensor(-938.5108, device='cuda:0')
epoch: 28 test_true_pfm: 6761.324495985758
episode: 112 training return: tensor(-711.3940, device='cuda:0')
episode: 113 training return: tensor(-494.1897, device='cuda:0')
episode: 114 training return: tensor(-998.1217, device='cuda:0')
episode: 115 training return: tensor(-808.2758, device='cuda:0')
epoch: 29 test_true_pfm: 4843.518806950817
episode: 116 training return: tensor(-569.7963, device='cuda:0')
episode: 117 training return: tensor(-970.9102, device='cuda:0')
episode: 118 training return: tensor(-511.1547, device='cuda:0')
episode: 119 training return: tensor(-751.2607, device='cuda:0')
epoch: 30 test_true_pfm: 3839.569979441496
episode: 120 training return: tensor(-588.8032, device='cuda:0')
episode: 121 training return: tensor(-581.3889, device='cuda:0')
episode: 122 training return: tensor(-456.5219, device='cuda:0')
episode: 123 training return: tensor(-607.5065, device='cuda:0')
epoch: 31 test_true_pfm: 6990.745660398083
episode: 124 training return: tensor(-985.3262, device='cuda:0')
episode: 125 training return: tensor(-851.0077, device='cuda:0')
episode: 126 training return: tensor(-999.9002, device='cuda:0')
episode: 127 training return: tensor(-702.0167, device='cuda:0')
epoch: 32 test_true_pfm: 8476.685990886328
episode: 128 training return: tensor(-936.4537, device='cuda:0')
episode: 129 training return: tensor(-221.4023, device='cuda:0')
episode: 130 training return: tensor(-408.8870, device='cuda:0')
episode: 131 training return: tensor(-843.9418, device='cuda:0')
epoch: 33 test_true_pfm: 4388.355099573294
episode: 132 training return: tensor(-451.5114, device='cuda:0')
episode: 133 training return: tensor(-910.3334, device='cuda:0')
episode: 134 training return: tensor(-525.7950, device='cuda:0')
episode: 135 training return: tensor(-999.9669, device='cuda:0')
epoch: 34 test_true_pfm: 5267.815700879386
episode: 136 training return: tensor(-378.1194, device='cuda:0')
episode: 137 training return: tensor(-320.8477, device='cuda:0')
episode: 138 training return: tensor(-941.7114, device='cuda:0')
episode: 139 training return: tensor(-999.9932, device='cuda:0')
epoch: 35 test_true_pfm: 3967.1940560611433
episode: 140 training return: tensor(-914.3892, device='cuda:0')
episode: 141 training return: tensor(-600.4628, device='cuda:0')
episode: 142 training return: tensor(-40.2047, device='cuda:0')
episode: 143 training return: tensor(-999.9514, device='cuda:0')
epoch: 36 test_true_pfm: 7830.744979756986
episode: 144 training return: tensor(-905.7549, device='cuda:0')
episode: 145 training return: tensor(-969.9185, device='cuda:0')
episode: 146 training return: tensor(-571.0418, device='cuda:0')
episode: 147 training return: tensor(-999.9398, device='cuda:0')
epoch: 37 test_true_pfm: 2907.7394310312397
episode: 148 training return: tensor(-221.3526, device='cuda:0')
episode: 149 training return: tensor(-213.2796, device='cuda:0')
episode: 150 training return: tensor(-462.3999, device='cuda:0')
episode: 151 training return: tensor(-922.2717, device='cuda:0')
epoch: 38 test_true_pfm: 3414.769739643547
episode: 152 training return: tensor(-940.2590, device='cuda:0')
episode: 153 training return: tensor(-786.5621, device='cuda:0')
episode: 154 training return: tensor(-762.6554, device='cuda:0')
episode: 155 training return: tensor(-982.0505, device='cuda:0')
epoch: 39 test_true_pfm: 4022.2196856217633
episode: 156 training return: tensor(-979.0848, device='cuda:0')
episode: 157 training return: tensor(-754.4633, device='cuda:0')
episode: 158 training return: tensor(-999.9783, device='cuda:0')
episode: 159 training return: tensor(-262.9889, device='cuda:0')
epoch: 40 test_true_pfm: 6269.829525424652
episode: 160 training return: tensor(-725.4932, device='cuda:0')
episode: 161 training return: tensor(-999.9842, device='cuda:0')
episode: 162 training return: tensor(-551.8047, device='cuda:0')
episode: 163 training return: tensor(-661.8373, device='cuda:0')
epoch: 41 test_true_pfm: 6094.477419210521
episode: 164 training return: tensor(-445.4913, device='cuda:0')
episode: 165 training return: tensor(-395.3370, device='cuda:0')
episode: 166 training return: tensor(-130.1842, device='cuda:0')
episode: 167 training return: tensor(-996.7107, device='cuda:0')
epoch: 42 test_true_pfm: 1706.4582071453035
episode: 168 training return: tensor(-898.6205, device='cuda:0')
episode: 169 training return: tensor(-976.7302, device='cuda:0')
episode: 170 training return: tensor(-378.2862, device='cuda:0')
episode: 171 training return: tensor(-840.2856, device='cuda:0')
epoch: 43 test_true_pfm: 5887.840022201082
episode: 172 training return: tensor(-867.0167, device='cuda:0')
episode: 173 training return: tensor(-903.4633, device='cuda:0')
episode: 174 training return: tensor(-674.3835, device='cuda:0')
episode: 175 training return: tensor(-270.2395, device='cuda:0')
epoch: 44 test_true_pfm: 7222.642978048346
episode: 176 training return: tensor(-681.0057, device='cuda:0')
episode: 177 training return: tensor(-999.9640, device='cuda:0')
episode: 178 training return: tensor(-999.9871, device='cuda:0')
episode: 179 training return: tensor(-820.7362, device='cuda:0')
epoch: 45 test_true_pfm: 8472.026325114013
episode: 180 training return: tensor(-856.4319, device='cuda:0')
episode: 181 training return: tensor(-690.0560, device='cuda:0')
episode: 182 training return: tensor(-779.4601, device='cuda:0')
episode: 183 training return: tensor(-290.8868, device='cuda:0')
epoch: 46 test_true_pfm: 4069.8822071901964
episode: 184 training return: tensor(-604.9111, device='cuda:0')
episode: 185 training return: tensor(-999.9036, device='cuda:0')
episode: 186 training return: tensor(-415.7485, device='cuda:0')
episode: 187 training return: tensor(-770.4656, device='cuda:0')
epoch: 47 test_true_pfm: 6920.432321517604
episode: 188 training return: tensor(-645.4683, device='cuda:0')
episode: 189 training return: tensor(-393.1210, device='cuda:0')
episode: 190 training return: tensor(-440.4715, device='cuda:0')
episode: 191 training return: tensor(-355.9371, device='cuda:0')
epoch: 48 test_true_pfm: 1282.1383531639615
episode: 192 training return: tensor(-997.2073, device='cuda:0')
episode: 193 training return: tensor(-363.6803, device='cuda:0')
episode: 194 training return: tensor(-281.6657, device='cuda:0')
episode: 195 training return: tensor(-715.9050, device='cuda:0')
epoch: 49 test_true_pfm: 5504.096607098986
episode: 196 training return: tensor(-999.8622, device='cuda:0')
episode: 197 training return: tensor(-337.0538, device='cuda:0')
episode: 198 training return: tensor(-742.3050, device='cuda:0')
episode: 199 training return: tensor(-406.9590, device='cuda:0')
epoch: 50 test_true_pfm: 3116.5920792312304
episode: 200 training return: tensor(-553.7710, device='cuda:0')
episode: 201 training return: tensor(-842.9861, device='cuda:0')
episode: 202 training return: tensor(-924.6747, device='cuda:0')
episode: 203 training return: tensor(-468.2254, device='cuda:0')
epoch: 51 test_true_pfm: 6543.124981406876
episode: 204 training return: tensor(-688.6418, device='cuda:0')
episode: 205 training return: tensor(-998.9575, device='cuda:0')
episode: 206 training return: tensor(-999.8790, device='cuda:0')
episode: 207 training return: tensor(-550.4036, device='cuda:0')
epoch: 52 test_true_pfm: 4361.288110582531
episode: 208 training return: tensor(-873.8979, device='cuda:0')
episode: 209 training return: tensor(-999.9983, device='cuda:0')
episode: 210 training return: tensor(-607.3685, device='cuda:0')
episode: 211 training return: tensor(-706.9752, device='cuda:0')
epoch: 53 test_true_pfm: 6862.246751189897
episode: 212 training return: tensor(-433.3506, device='cuda:0')
episode: 213 training return: tensor(-999.7089, device='cuda:0')
episode: 214 training return: tensor(-903.0460, device='cuda:0')
episode: 215 training return: tensor(-286.8294, device='cuda:0')
epoch: 54 test_true_pfm: 7346.340968161774
episode: 216 training return: tensor(-996.3024, device='cuda:0')
episode: 217 training return: tensor(-253.5072, device='cuda:0')
episode: 218 training return: tensor(-999.9494, device='cuda:0')
episode: 219 training return: tensor(-937.2810, device='cuda:0')
epoch: 55 test_true_pfm: 4893.582914700589
episode: 220 training return: tensor(-212.0291, device='cuda:0')
episode: 221 training return: tensor(-914.3183, device='cuda:0')
episode: 222 training return: tensor(-880.8058, device='cuda:0')
episode: 223 training return: tensor(-937.6021, device='cuda:0')
epoch: 56 test_true_pfm: 6163.826061905232
episode: 224 training return: tensor(-334.9400, device='cuda:0')
episode: 225 training return: tensor(-717.5885, device='cuda:0')
episode: 226 training return: tensor(-372.8741, device='cuda:0')
episode: 227 training return: tensor(-376.1024, device='cuda:0')
epoch: 57 test_true_pfm: 2671.8359465898006
episode: 228 training return: tensor(-931.8867, device='cuda:0')
episode: 229 training return: tensor(-406.5378, device='cuda:0')
episode: 230 training return: tensor(-999.9991, device='cuda:0')
episode: 231 training return: tensor(-45.4626, device='cuda:0')
epoch: 58 test_true_pfm: 7425.573199696392
episode: 232 training return: tensor(-152.6531, device='cuda:0')
episode: 233 training return: tensor(-691.4177, device='cuda:0')
episode: 234 training return: tensor(-402.1140, device='cuda:0')
episode: 235 training return: tensor(-789.9272, device='cuda:0')
epoch: 59 test_true_pfm: 5378.609404399406
episode: 236 training return: tensor(-436.1766, device='cuda:0')
episode: 237 training return: tensor(-171.3304, device='cuda:0')
episode: 238 training return: tensor(-537.6478, device='cuda:0')
episode: 239 training return: tensor(-365.9774, device='cuda:0')
epoch: 60 test_true_pfm: 6812.346751570528
episode: 240 training return: tensor(-976.2773, device='cuda:0')
episode: 241 training return: tensor(-869.3803, device='cuda:0')
episode: 242 training return: tensor(-630.7469, device='cuda:0')
episode: 243 training return: tensor(-729.0996, device='cuda:0')
epoch: 61 test_true_pfm: 4218.267331414054
episode: 244 training return: tensor(-992.5478, device='cuda:0')
episode: 245 training return: tensor(-999.9452, device='cuda:0')
episode: 246 training return: tensor(-737.2264, device='cuda:0')
episode: 247 training return: tensor(-302.5749, device='cuda:0')
epoch: 62 test_true_pfm: 9772.198984045095
episode: 248 training return: tensor(-702.9124, device='cuda:0')
episode: 249 training return: tensor(70.8595, device='cuda:0')
episode: 250 training return: tensor(-981.4394, device='cuda:0')
episode: 251 training return: tensor(-999.9516, device='cuda:0')
epoch: 63 test_true_pfm: 6320.71402432914
episode: 252 training return: tensor(-231.9506, device='cuda:0')
episode: 253 training return: tensor(-992.7172, device='cuda:0')
episode: 254 training return: tensor(-244.0414, device='cuda:0')
episode: 255 training return: tensor(-902.4877, device='cuda:0')
epoch: 64 test_true_pfm: 9868.18399307084
episode: 256 training return: tensor(-201.8105, device='cuda:0')
episode: 257 training return: tensor(-156.9967, device='cuda:0')
episode: 258 training return: tensor(-154.9446, device='cuda:0')
episode: 259 training return: tensor(-106.4894, device='cuda:0')
epoch: 65 test_true_pfm: 9434.738655608546
episode: 260 training return: tensor(-627.6968, device='cuda:0')
episode: 261 training return: tensor(-513.0544, device='cuda:0')
episode: 262 training return: tensor(-335.5892, device='cuda:0')
episode: 263 training return: tensor(79.5523, device='cuda:0')
epoch: 66 test_true_pfm: 5411.979511424047
episode: 264 training return: tensor(-713.2993, device='cuda:0')
episode: 265 training return: tensor(-777.4938, device='cuda:0')
episode: 266 training return: tensor(-562.7486, device='cuda:0')
episode: 267 training return: tensor(-523.1502, device='cuda:0')
epoch: 67 test_true_pfm: 9022.620982977629
episode: 268 training return: tensor(-640.2200, device='cuda:0')
episode: 269 training return: tensor(-205.0476, device='cuda:0')
episode: 270 training return: tensor(-716.1051, device='cuda:0')
episode: 271 training return: tensor(-189.9967, device='cuda:0')
epoch: 68 test_true_pfm: 8798.6656711144
episode: 272 training return: tensor(-253.9742, device='cuda:0')
episode: 273 training return: tensor(-999.9238, device='cuda:0')
episode: 274 training return: tensor(-650.3300, device='cuda:0')
episode: 275 training return: tensor(-437.6300, device='cuda:0')
epoch: 69 test_true_pfm: 9197.442653665252
episode: 276 training return: tensor(-454.3629, device='cuda:0')
episode: 277 training return: tensor(-605.7872, device='cuda:0')
episode: 278 training return: tensor(-405.2299, device='cuda:0')
episode: 279 training return: tensor(-428.9940, device='cuda:0')
epoch: 70 test_true_pfm: 5373.299273332547
episode: 280 training return: tensor(-232.9486, device='cuda:0')
episode: 281 training return: tensor(-418.4440, device='cuda:0')
episode: 282 training return: tensor(-652.7937, device='cuda:0')
episode: 283 training return: tensor(-773.5371, device='cuda:0')
epoch: 71 test_true_pfm: 9466.14870451284
episode: 284 training return: tensor(-744.2693, device='cuda:0')
episode: 285 training return: tensor(-736.8489, device='cuda:0')
episode: 286 training return: tensor(-352.7501, device='cuda:0')
episode: 287 training return: tensor(-385.3997, device='cuda:0')
epoch: 72 test_true_pfm: 1473.934320634379
episode: 288 training return: tensor(-338.3811, device='cuda:0')
episode: 289 training return: tensor(-213.9352, device='cuda:0')
episode: 290 training return: tensor(-733.9424, device='cuda:0')
episode: 291 training return: tensor(-208.3423, device='cuda:0')
epoch: 73 test_true_pfm: 5997.747561998657
episode: 292 training return: tensor(-752.9901, device='cuda:0')
episode: 293 training return: tensor(-963.3868, device='cuda:0')
episode: 294 training return: tensor(-524.6829, device='cuda:0')
episode: 295 training return: tensor(-999.4193, device='cuda:0')
epoch: 74 test_true_pfm: 9739.651467442023
episode: 296 training return: tensor(-312.6590, device='cuda:0')
episode: 297 training return: tensor(61.0761, device='cuda:0')
episode: 298 training return: tensor(-358.3078, device='cuda:0')
episode: 299 training return: tensor(-274.9266, device='cuda:0')
epoch: 75 test_true_pfm: 6152.466413081821
episode: 300 training return: tensor(-998.5172, device='cuda:0')
episode: 301 training return: tensor(-181.6342, device='cuda:0')
episode: 302 training return: tensor(-125.9742, device='cuda:0')
episode: 303 training return: tensor(-553.4468, device='cuda:0')
epoch: 76 test_true_pfm: 9029.616971512665
episode: 304 training return: tensor(-94.0708, device='cuda:0')
episode: 305 training return: tensor(-86.4474, device='cuda:0')
episode: 306 training return: tensor(-241.7419, device='cuda:0')
episode: 307 training return: tensor(-110.0311, device='cuda:0')
epoch: 77 test_true_pfm: 2712.7589356152503
episode: 308 training return: tensor(-998.3363, device='cuda:0')
episode: 309 training return: tensor(-353.7662, device='cuda:0')
episode: 310 training return: tensor(-910.3708, device='cuda:0')
episode: 311 training return: tensor(-28.5415, device='cuda:0')
epoch: 78 test_true_pfm: 5971.466994804633
episode: 312 training return: tensor(-873.5014, device='cuda:0')
episode: 313 training return: tensor(-263.4069, device='cuda:0')
episode: 314 training return: tensor(-193.6839, device='cuda:0')
episode: 315 training return: tensor(-766.5182, device='cuda:0')
epoch: 79 test_true_pfm: 5519.061671389353
episode: 316 training return: tensor(-591.9618, device='cuda:0')
episode: 317 training return: tensor(10.7626, device='cuda:0')
episode: 318 training return: tensor(-837.8840, device='cuda:0')
episode: 319 training return: tensor(-348.1550, device='cuda:0')
epoch: 80 test_true_pfm: 6832.193247585477
episode: 320 training return: tensor(-426.9234, device='cuda:0')
episode: 321 training return: tensor(-786.8484, device='cuda:0')
episode: 322 training return: tensor(86.7301, device='cuda:0')
episode: 323 training return: tensor(-558.9801, device='cuda:0')
epoch: 81 test_true_pfm: 3971.7327029653716
episode: 324 training return: tensor(-647.2062, device='cuda:0')
episode: 325 training return: tensor(-616.4898, device='cuda:0')
episode: 326 training return: tensor(-705.6273, device='cuda:0')
episode: 327 training return: tensor(-203.1475, device='cuda:0')
epoch: 82 test_true_pfm: 4690.614327500837
episode: 328 training return: tensor(-556.8203, device='cuda:0')
episode: 329 training return: tensor(-686.6476, device='cuda:0')
episode: 330 training return: tensor(-904.6831, device='cuda:0')
episode: 331 training return: tensor(-624.5139, device='cuda:0')
epoch: 83 test_true_pfm: 9073.55140025379
episode: 332 training return: tensor(-595.6165, device='cuda:0')
episode: 333 training return: tensor(-761.8124, device='cuda:0')
episode: 334 training return: tensor(-977.3632, device='cuda:0')
episode: 335 training return: tensor(-104.3680, device='cuda:0')
epoch: 84 test_true_pfm: 3325.1302170740823
episode: 336 training return: tensor(-408.6660, device='cuda:0')
episode: 337 training return: tensor(-84.9251, device='cuda:0')
episode: 338 training return: tensor(40.4488, device='cuda:0')
episode: 339 training return: tensor(-352.2474, device='cuda:0')
epoch: 85 test_true_pfm: 9167.378908028943
episode: 340 training return: tensor(-92.5767, device='cuda:0')
episode: 341 training return: tensor(-828.0495, device='cuda:0')
episode: 342 training return: tensor(-206.5966, device='cuda:0')
episode: 343 training return: tensor(-581.4750, device='cuda:0')
epoch: 86 test_true_pfm: 5567.434368470523
episode: 344 training return: tensor(-999.5659, device='cuda:0')
episode: 345 training return: tensor(-243.1970, device='cuda:0')
episode: 346 training return: tensor(-968.5015, device='cuda:0')
episode: 347 training return: tensor(-641.9394, device='cuda:0')
epoch: 87 test_true_pfm: 4846.999489275152
episode: 348 training return: tensor(-168.1084, device='cuda:0')
episode: 349 training return: tensor(-162.1171, device='cuda:0')
episode: 350 training return: tensor(-618.2540, device='cuda:0')
episode: 351 training return: tensor(32.3090, device='cuda:0')
epoch: 88 test_true_pfm: 8615.18462866838
episode: 352 training return: tensor(-743.6699, device='cuda:0')
episode: 353 training return: tensor(-528.6449, device='cuda:0')
episode: 354 training return: tensor(-189.4311, device='cuda:0')
episode: 355 training return: tensor(-774.8742, device='cuda:0')
epoch: 89 test_true_pfm: 7041.70164313554
episode: 356 training return: tensor(-91.2986, device='cuda:0')
episode: 357 training return: tensor(-180.2823, device='cuda:0')
episode: 358 training return: tensor(-219.6174, device='cuda:0')
episode: 359 training return: tensor(-537.2859, device='cuda:0')
epoch: 90 test_true_pfm: 6226.318771208619
episode: 360 training return: tensor(-183.1657, device='cuda:0')
episode: 361 training return: tensor(-386.7392, device='cuda:0')
episode: 362 training return: tensor(30.8076, device='cuda:0')
episode: 363 training return: tensor(-957.1313, device='cuda:0')
epoch: 91 test_true_pfm: 7798.851770892316
episode: 364 training return: tensor(-182.2691, device='cuda:0')
episode: 365 training return: tensor(-194.3896, device='cuda:0')
episode: 366 training return: tensor(16.3445, device='cuda:0')
episode: 367 training return: tensor(-331.2289, device='cuda:0')
epoch: 92 test_true_pfm: 6977.809110783214
episode: 368 training return: tensor(-999.9994, device='cuda:0')
episode: 369 training return: tensor(-220.9347, device='cuda:0')
episode: 370 training return: tensor(-772.0843, device='cuda:0')
episode: 371 training return: tensor(-633.9077, device='cuda:0')
epoch: 93 test_true_pfm: 7848.9718736973855
episode: 372 training return: tensor(-411.3126, device='cuda:0')
episode: 373 training return: tensor(-160.9796, device='cuda:0')
episode: 374 training return: tensor(-126.1073, device='cuda:0')
episode: 375 training return: tensor(-1.2435, device='cuda:0')
epoch: 94 test_true_pfm: 4602.312534176006
episode: 376 training return: tensor(-352.7605, device='cuda:0')
episode: 377 training return: tensor(-955.9608, device='cuda:0')
episode: 378 training return: tensor(-906.0607, device='cuda:0')
episode: 379 training return: tensor(-311.1578, device='cuda:0')
epoch: 95 test_true_pfm: 6810.584855374512
episode: 380 training return: tensor(-915.2272, device='cuda:0')
episode: 381 training return: tensor(-606.0950, device='cuda:0')
episode: 382 training return: tensor(-231.7157, device='cuda:0')
episode: 383 training return: tensor(-318.2812, device='cuda:0')
epoch: 96 test_true_pfm: 7296.733238958227
episode: 384 training return: tensor(-165.3237, device='cuda:0')
episode: 385 training return: tensor(-571.4910, device='cuda:0')
episode: 386 training return: tensor(-828.3399, device='cuda:0')
episode: 387 training return: tensor(-537.4047, device='cuda:0')
epoch: 97 test_true_pfm: 7537.924059510345
episode: 388 training return: tensor(-965.8533, device='cuda:0')
episode: 389 training return: tensor(-73.3970, device='cuda:0')
episode: 390 training return: tensor(-787.4310, device='cuda:0')
episode: 391 training return: tensor(-623.0521, device='cuda:0')
epoch: 98 test_true_pfm: 6655.933316847934
episode: 392 training return: tensor(-720.9379, device='cuda:0')
episode: 393 training return: tensor(-509.0694, device='cuda:0')
episode: 394 training return: tensor(-463.1382, device='cuda:0')
episode: 395 training return: tensor(-401.6076, device='cuda:0')
epoch: 99 test_true_pfm: 8294.992712572233
episode: 396 training return: tensor(-372.8760, device='cuda:0')
episode: 397 training return: tensor(-453.3069, device='cuda:0')
episode: 398 training return: tensor(-482.6811, device='cuda:0')
episode: 399 training return: tensor(-307.7633, device='cuda:0')
epoch: 100 test_true_pfm: 8112.481512495028
