['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'brac', '--traj', 'expert', '--seed', '3', '--data', '100000']
epoch: 0 training_loss 0.61800833794754 test_loss: -0.8557570457458497
epoch: 1 training_loss -1.807454298734665 test_loss: -1.5425546646118165
epoch: 2 training_loss -2.5946744549274445 test_loss: -3.3379493713378907
epoch: 3 training_loss -3.732255184650421 test_loss: -4.076070022583008
epoch: 4 training_loss -4.276615707874298 test_loss: -4.503575134277344
epoch: 5 training_loss -4.662180285453797 test_loss: -4.772164916992187
epoch: 6 training_loss -4.9665079593658445 test_loss: -5.040801239013672
epoch: 7 training_loss -5.20496817111969 test_loss: -5.350669097900391
epoch: 8 training_loss -5.473732795715332 test_loss: -5.6400505065917965
epoch: 9 training_loss -5.681757287979126 test_loss: -5.8736015319824215
epoch: 10 training_loss -5.83011489868164 test_loss: -5.80511474609375
epoch: 11 training_loss -5.97041042804718 test_loss: -6.011821365356445
epoch: 12 training_loss -6.114707517623901 test_loss: -6.247681045532227
epoch: 13 training_loss -6.243427619934082 test_loss: -6.2551124572753904
epoch: 14 training_loss -6.340964570045471 test_loss: -6.380403137207031
epoch: 15 training_loss -6.431624431610107 test_loss: -6.4614723205566404
epoch: 16 training_loss -6.492551150321961 test_loss: -6.6127372741699215
epoch: 17 training_loss -6.5559148025512695 test_loss: -6.522137451171875
epoch: 18 training_loss -6.635460300445557 test_loss: -6.758618927001953
epoch: 19 training_loss -6.677062525749206 test_loss: -6.771247863769531
epoch: 20 training_loss -6.764754328727722 test_loss: -6.797763824462891
epoch: 21 training_loss -6.799386930465698 test_loss: -6.807950592041015
epoch: 22 training_loss -6.87394980430603 test_loss: -6.951799774169922
epoch: 23 training_loss -6.915516204833985 test_loss: -6.991848754882812
epoch: 24 training_loss -6.922459964752197 test_loss: -6.96130142211914
epoch: 25 training_loss -6.9890551233291625 test_loss: -7.034889984130859
epoch: 26 training_loss -7.013639898300171 test_loss: -6.9407814025878904
epoch: 27 training_loss -7.0844479751586915 test_loss: -7.096508026123047
epoch: 28 training_loss -7.121052980422974 test_loss: -7.140109252929688
epoch: 29 training_loss -7.123759341239929 test_loss: -7.170863342285156
epoch: 30 training_loss -7.1376924896240235 test_loss: -7.244739532470703
epoch: 31 training_loss -7.180444116592407 test_loss: -7.158516693115234
epoch: 32 training_loss -7.1719161224365235 test_loss: -7.3399711608886715
epoch: 33 training_loss -7.22792191028595 test_loss: -7.336579132080078
epoch: 34 training_loss -7.292160358428955 test_loss: -7.283297729492188
epoch: 35 training_loss -7.319704151153564 test_loss: -7.411958312988281
epoch: 36 training_loss -7.286434283256531 test_loss: -7.383466339111328
epoch: 37 training_loss -7.3490527153015135 test_loss: -7.399871063232422
epoch: 38 training_loss -7.373480777740479 test_loss: -7.408937072753906
epoch: 39 training_loss -7.403907322883606 test_loss: -7.4027656555175785
epoch: 40 training_loss -7.389718718528748 test_loss: -7.342736053466797
epoch: 41 training_loss -7.383067083358765 test_loss: -7.481271362304687
epoch: 42 training_loss -7.411833415031433 test_loss: -7.4195068359375
epoch: 43 training_loss -7.465579586029053 test_loss: -7.447271728515625
epoch: 44 training_loss -7.492692198753357 test_loss: -7.398870849609375
epoch: 45 training_loss -7.50998206615448 test_loss: -7.554165649414062
epoch: 46 training_loss -7.507354440689087 test_loss: -7.4172721862792965
epoch: 47 training_loss -7.547215418815613 test_loss: -7.464511108398438
epoch: 48 training_loss -7.5383778047561645 test_loss: -7.396933746337891
epoch: 49 training_loss -7.536877579689026 test_loss: -7.49822769165039
epoch: 50 training_loss -7.568733830451965 test_loss: -7.653156280517578
epoch: 51 training_loss -7.5654050159454345 test_loss: -7.667813873291015
epoch: 52 training_loss -7.59468421459198 test_loss: -7.6340690612792965
epoch: 53 training_loss -7.637836818695068 test_loss: -7.6188911437988285
epoch: 54 training_loss -7.624325308799744 test_loss: -7.6368255615234375
epoch: 55 training_loss -7.611441721916199 test_loss: -7.638896179199219
epoch: 56 training_loss -7.675616698265076 test_loss: -7.6975761413574215
epoch: 57 training_loss -7.667989273071289 test_loss: -7.649464416503906
epoch: 58 training_loss -7.6468348932266235 test_loss: -7.7058837890625
epoch: 59 training_loss -7.698564715385437 test_loss: -7.634068298339844
epoch: 60 training_loss -7.67467963218689 test_loss: -7.78948974609375
epoch: 61 training_loss -7.713511519432068 test_loss: -7.6809814453125
epoch: 62 training_loss -7.73486129283905 test_loss: -7.741220855712891
epoch: 63 training_loss -7.684040946960449 test_loss: -7.698235321044922
epoch: 64 training_loss -7.73477457523346 test_loss: -7.697659301757812
epoch: 65 training_loss -7.709504942893982 test_loss: -7.767568969726563
epoch: 66 training_loss -7.733986659049988 test_loss: -7.775825500488281
epoch: 67 training_loss -7.77854133605957 test_loss: -7.783679962158203
epoch: 68 training_loss -7.776516494750976 test_loss: -7.85947265625
epoch: 69 training_loss -7.817791428565979 test_loss: -7.859928894042969
epoch: 70 training_loss -7.774476113319397 test_loss: -7.786248779296875
epoch: 71 training_loss -7.814332127571106 test_loss: -7.833085632324218
epoch: 72 training_loss -7.789511351585388 test_loss: -7.832712554931641
epoch: 73 training_loss -7.769610810279846 test_loss: -7.729055023193359
epoch: 74 training_loss -7.849066014289856 test_loss: -7.90538330078125
epoch: 75 training_loss -7.8466410779953 test_loss: -7.828955841064453
epoch: 76 training_loss -7.801103868484497 test_loss: -7.727028656005859
epoch: 77 training_loss -7.829646587371826 test_loss: -7.7782440185546875
epoch: 78 training_loss -7.872530059814453 test_loss: -7.817617034912109
epoch: 79 training_loss -7.868300752639771 test_loss: -7.881056213378907
epoch: 80 training_loss -7.818513469696045 test_loss: -7.927417755126953
epoch: 81 training_loss -7.866310033798218 test_loss: -7.869011688232422
epoch: 82 training_loss -7.869026713371277 test_loss: -7.924828338623047
epoch: 83 training_loss -7.857953205108642 test_loss: -7.844707489013672
epoch: 84 training_loss -7.859176540374756 test_loss: -7.797716522216797
epoch: 85 training_loss -7.900291652679443 test_loss: -7.79443588256836
epoch: 86 training_loss -7.886176843643188 test_loss: -7.999032592773437
epoch: 87 training_loss -7.881712698936463 test_loss: -7.933015441894531
epoch: 88 training_loss -7.927202115058899 test_loss: -7.86658935546875
epoch: 89 training_loss -7.947533206939697 test_loss: -7.982631683349609
epoch: 90 training_loss -7.928691782951355 test_loss: -7.918668365478515
epoch: 91 training_loss -7.9473922204971315 test_loss: -7.999305725097656
epoch: 92 training_loss -7.908565735816955 test_loss: -8.009049224853516
epoch: 93 training_loss -7.944795126914978 test_loss: -7.872043609619141
epoch: 94 training_loss -7.931048827171326 test_loss: -7.934998321533203
epoch: 95 training_loss -7.939647607803344 test_loss: -8.030565643310547
epoch: 96 training_loss -7.981832690238953 test_loss: -7.970152282714844
epoch: 97 training_loss -7.96278823852539 test_loss: -7.997821044921875
epoch: 98 training_loss -7.974760718345642 test_loss: -8.033851623535156
epoch: 99 training_loss -8.00801501750946 test_loss: -8.004225921630859
epoch: 100 training_loss -8.003154730796814 test_loss: -8.019612884521484
epoch: 101 training_loss -7.988865909576416 test_loss: -7.980740356445312
epoch: 102 training_loss -7.98790135383606 test_loss: -7.92755355834961
epoch: 103 training_loss -8.009370393753052 test_loss: -7.8833152770996096
epoch: 104 training_loss -7.993391051292419 test_loss: -8.064117431640625
epoch: 105 training_loss -7.998798561096192 test_loss: -7.995587921142578
epoch: 106 training_loss -8.005260171890258 test_loss: -7.965934753417969
epoch: 107 training_loss -8.023516645431519 test_loss: -8.002806854248046
epoch: 108 training_loss -8.027560539245606 test_loss: -8.067398071289062
epoch: 109 training_loss -8.013865766525269 test_loss: -8.043618774414062
epoch: 110 training_loss -8.033767466545106 test_loss: -8.025856018066406
epoch: 111 training_loss -8.04491853237152 test_loss: -8.085832977294922
epoch: 112 training_loss -8.050588541030884 test_loss: -7.992885589599609
epoch: 113 training_loss -8.023927221298218 test_loss: -8.036528778076171
epoch: 114 training_loss -8.043041167259217 test_loss: -8.062668609619141
epoch: 115 training_loss -8.092828841209412 test_loss: -8.032394409179688
epoch: 116 training_loss -8.051298513412476 test_loss: -8.02907485961914
epoch: 117 training_loss -8.080667929649353 test_loss: -7.995028686523438
epoch: 118 training_loss -8.021233620643615 test_loss: -7.953787231445313
epoch: 119 training_loss -8.092849535942078 test_loss: -8.14017333984375
epoch: 120 training_loss -8.057373490333557 test_loss: -8.04662628173828
epoch: 121 training_loss -8.10352303981781 test_loss: -8.112602233886719
epoch: 122 training_loss -8.07558524608612 test_loss: -8.050413513183594
epoch: 123 training_loss -8.101858839988708 test_loss: -8.021349334716797
epoch: 124 training_loss -8.093112626075744 test_loss: -8.145547485351562
epoch: 125 training_loss -8.12719982624054 test_loss: -8.052359771728515
epoch: 126 training_loss -8.14219512462616 test_loss: -8.103349304199218
epoch: 127 training_loss -8.115163812637329 test_loss: -8.185767364501952
epoch: 128 training_loss -8.088420848846436 test_loss: -7.982952117919922
epoch: 129 training_loss -8.12752537727356 test_loss: -8.139340209960938
epoch: 130 training_loss -8.164545664787292 test_loss: -8.116876220703125
epoch: 131 training_loss -8.112955532073975 test_loss: -8.026715087890626
epoch: 132 training_loss -8.114229030609131 test_loss: -8.104064178466796
epoch: 133 training_loss -8.144277572631836 test_loss: -8.183628845214844
epoch: 134 training_loss -8.155963439941406 test_loss: -8.091016387939453
epoch: 135 training_loss -8.155878429412843 test_loss: -8.103464508056641
epoch: 136 training_loss -8.150883388519286 test_loss: -8.056625366210938
epoch: 137 training_loss -8.132216749191285 test_loss: -8.191674041748048
epoch: 138 training_loss -8.153259863853455 test_loss: -8.104261779785157
epoch: 139 training_loss -8.160560841560363 test_loss: -8.145932006835938
epoch: 140 training_loss -8.138620085716248 test_loss: -8.086494445800781
epoch: 141 training_loss -8.176058211326598 test_loss: -8.12953872680664
epoch: 142 training_loss -8.182174019813537 test_loss: -8.167588806152343
epoch: 143 training_loss -8.16246217727661 test_loss: -8.139900970458985
epoch: 144 training_loss -8.148213562965394 test_loss: -8.119316101074219
epoch: 145 training_loss -8.176478209495544 test_loss: -8.205025482177735
epoch: 146 training_loss -8.103239555358886 test_loss: -8.197840881347656
epoch: 147 training_loss -8.192453861236572 test_loss: -8.131275939941407
epoch: 148 training_loss -8.181744723320007 test_loss: -8.190756225585938
epoch: 149 training_loss -8.174698395729065 test_loss: -8.163748168945313
2775.656661681436
episode: 0 training return: tensor(-5.0492e+17, device='cuda:0')
episode: 1 training return: tensor(-1.9595e+18, device='cuda:0')
episode: 2 training return: tensor(-1.2100e+17, device='cuda:0')
episode: 3 training return: tensor(-1.3213e+17, device='cuda:0')
epoch: 1 test_true_pfm: -52.99246487190023
episode: 4 training return: tensor(-1.4197e+17, device='cuda:0')
episode: 5 training return: tensor(-8.9923e+18, device='cuda:0')
episode: 6 training return: tensor(-2.5255e+09, device='cuda:0')
episode: 7 training return: tensor(-6.3054e+16, device='cuda:0')
epoch: 2 test_true_pfm: -22.0377018670336
episode: 8 training return: tensor(-188592.2500, device='cuda:0')
episode: 9 training return: tensor(-2.6392e+17, device='cuda:0')
episode: 10 training return: tensor(-1.5411e+15, device='cuda:0')
episode: 11 training return: tensor(-2.6852e+15, device='cuda:0')
epoch: 3 test_true_pfm: -22.47889699102708
episode: 12 training return: tensor(-2.8649e+15, device='cuda:0')
episode: 13 training return: tensor(-1.6312e+15, device='cuda:0')
episode: 14 training return: tensor(-2.7011e+15, device='cuda:0')
episode: 15 training return: tensor(-3.6259e+15, device='cuda:0')
epoch: 4 test_true_pfm: -22.249500146865405
episode: 16 training return: tensor(-2.7465e+15, device='cuda:0')
episode: 17 training return: tensor(-3.1027e+15, device='cuda:0')
episode: 18 training return: tensor(-3.3770e+15, device='cuda:0')
episode: 19 training return: tensor(-2.9508e+15, device='cuda:0')
epoch: 5 test_true_pfm: -21.90944057845994
episode: 20 training return: tensor(-3.3620e+15, device='cuda:0')
episode: 21 training return: tensor(-3.2489e+15, device='cuda:0')
episode: 22 training return: tensor(-3.0262e+15, device='cuda:0')
episode: 23 training return: tensor(-3.3629e+15, device='cuda:0')
epoch: 6 test_true_pfm: -21.703277873070377
episode: 24 training return: tensor(-2.7232e+15, device='cuda:0')
episode: 25 training return: tensor(-2.6378e+15, device='cuda:0')
episode: 26 training return: tensor(-2.7095e+15, device='cuda:0')
episode: 27 training return: tensor(-3.1110e+15, device='cuda:0')
epoch: 7 test_true_pfm: -22.342284198920265
episode: 28 training return: tensor(-3.2153e+15, device='cuda:0')
episode: 29 training return: tensor(-3.3814e+15, device='cuda:0')
episode: 30 training return: tensor(-2.8205e+15, device='cuda:0')
episode: 31 training return: tensor(-2.9507e+15, device='cuda:0')
epoch: 8 test_true_pfm: -22.366823416957498
episode: 32 training return: tensor(-3.2511e+15, device='cuda:0')
episode: 33 training return: tensor(-1.4191e+15, device='cuda:0')
episode: 34 training return: tensor(-3.6704e+15, device='cuda:0')
episode: 35 training return: tensor(-1.0262e+15, device='cuda:0')
epoch: 9 test_true_pfm: -22.444673323052985
episode: 36 training return: tensor(-3.2104e+15, device='cuda:0')
episode: 37 training return: tensor(-2.7404e+15, device='cuda:0')
episode: 38 training return: tensor(-3.1816e+15, device='cuda:0')
episode: 39 training return: tensor(-2.9485e+15, device='cuda:0')
epoch: 10 test_true_pfm: -22.218108260222767
episode: 40 training return: tensor(-3.2897e+15, device='cuda:0')
episode: 41 training return: tensor(-2.7237e+15, device='cuda:0')
episode: 42 training return: tensor(-2.8011e+15, device='cuda:0')
episode: 43 training return: tensor(-2.5789e+15, device='cuda:0')
epoch: 11 test_true_pfm: -22.110149996454282
episode: 44 training return: tensor(-3.4440e+15, device='cuda:0')
episode: 45 training return: tensor(-3.4499e+15, device='cuda:0')
episode: 46 training return: tensor(-2.9389e+15, device='cuda:0')
episode: 47 training return: tensor(-1.5466e+17, device='cuda:0')
epoch: 12 test_true_pfm: 40.097757248651895
episode: 48 training return: tensor(-1.8654e+17, device='cuda:0')
episode: 49 training return: tensor(-3.4372e+15, device='cuda:0')
episode: 50 training return: tensor(-3.2625e+15, device='cuda:0')
episode: 51 training return: tensor(-2.9411e+15, device='cuda:0')
epoch: 13 test_true_pfm: -22.34592924371997
episode: 52 training return: tensor(-8.4175e+14, device='cuda:0')
episode: 53 training return: tensor(-2.8430e+15, device='cuda:0')
episode: 54 training return: tensor(-3.4146e+15, device='cuda:0')
episode: 55 training return: tensor(-2.7247e+15, device='cuda:0')
epoch: 14 test_true_pfm: -22.019467979420735
episode: 56 training return: tensor(-3.0378e+15, device='cuda:0')
episode: 57 training return: tensor(-2.6688e+15, device='cuda:0')
episode: 58 training return: tensor(-3.0816e+15, device='cuda:0')
episode: 59 training return: tensor(-3.1743e+15, device='cuda:0')
epoch: 15 test_true_pfm: -22.270672710216235
episode: 60 training return: tensor(-3.0516e+15, device='cuda:0')
episode: 61 training return: tensor(-3.3126e+15, device='cuda:0')
episode: 62 training return: tensor(-3.0468e+15, device='cuda:0')
episode: 63 training return: tensor(-3.2532e+15, device='cuda:0')
epoch: 16 test_true_pfm: -53.3138064151508
episode: 64 training return: tensor(-6.4856e+14, device='cuda:0')
episode: 65 training return: tensor(-2.8387e+15, device='cuda:0')
episode: 66 training return: tensor(-3.3816e+15, device='cuda:0')
episode: 67 training return: tensor(-1.6145e+17, device='cuda:0')
epoch: 17 test_true_pfm: -21.933336195101038
episode: 68 training return: tensor(-2.6968e+15, device='cuda:0')
episode: 69 training return: tensor(-1.4941e+17, device='cuda:0')
episode: 70 training return: tensor(-2.4858e+15, device='cuda:0')
episode: 71 training return: tensor(-6.5034e+19, device='cuda:0')
epoch: 18 test_true_pfm: 22.438656693000905
episode: 72 training return: tensor(-6.5060e+19, device='cuda:0')
episode: 73 training return: tensor(-2.7035e+15, device='cuda:0')
episode: 74 training return: tensor(-3.1914e+15, device='cuda:0')
episode: 75 training return: tensor(-2.8171e+15, device='cuda:0')
epoch: 19 test_true_pfm: -8.34331764127527
episode: 76 training return: tensor(-2.7867e+15, device='cuda:0')
episode: 77 training return: tensor(-5.0528e+14, device='cuda:0')
episode: 78 training return: tensor(-3.1677e+15, device='cuda:0')
episode: 79 training return: tensor(-3.4262e+15, device='cuda:0')
epoch: 20 test_true_pfm: -19.6443720525268
episode: 80 training return: tensor(-4.3751e+14, device='cuda:0')
episode: 81 training return: tensor(-3.3491e+15, device='cuda:0')
episode: 82 training return: tensor(-3.0502e+15, device='cuda:0')
episode: 83 training return: tensor(-2.7144e+15, device='cuda:0')
epoch: 21 test_true_pfm: -22.08833325908179
episode: 84 training return: tensor(-2.8690e+15, device='cuda:0')
episode: 85 training return: tensor(-3.1491e+15, device='cuda:0')
episode: 86 training return: tensor(-6.1389e+14, device='cuda:0')
episode: 87 training return: tensor(-3.6319e+16, device='cuda:0')
epoch: 22 test_true_pfm: -22.12119781605173
episode: 88 training return: tensor(-2.6319e+15, device='cuda:0')
episode: 89 training return: tensor(-3.1925e+15, device='cuda:0')
episode: 90 training return: tensor(-2.6742e+15, device='cuda:0')
episode: 91 training return: tensor(-2.3080e+16, device='cuda:0')
epoch: 23 test_true_pfm: 14.19954038829592
episode: 92 training return: tensor(-4.5645e+16, device='cuda:0')
episode: 93 training return: tensor(-4.7977e+16, device='cuda:0')
episode: 94 training return: tensor(-3.3121e+15, device='cuda:0')
episode: 95 training return: tensor(-5.0558e+15, device='cuda:0')
epoch: 24 test_true_pfm: -22.45850761281396
episode: 96 training return: tensor(-3.0118e+15, device='cuda:0')
episode: 97 training return: tensor(-2.9736e+15, device='cuda:0')
episode: 98 training return: tensor(-1.2307e+15, device='cuda:0')
episode: 99 training return: tensor(-2.5994e+15, device='cuda:0')
epoch: 25 test_true_pfm: -22.354243540440837
episode: 100 training return: tensor(-3.0834e+15, device='cuda:0')
episode: 101 training return: tensor(-2.7481e+15, device='cuda:0')
episode: 102 training return: tensor(-2.9327e+15, device='cuda:0')
episode: 103 training return: tensor(-2.7300e+15, device='cuda:0')
epoch: 26 test_true_pfm: -31.346750839286
episode: 104 training return: tensor(-3.5907e+15, device='cuda:0')
episode: 105 training return: tensor(-2.3700e+17, device='cuda:0')
episode: 106 training return: tensor(-1.8995e+15, device='cuda:0')
episode: 107 training return: tensor(-1.1356e+15, device='cuda:0')
epoch: 27 test_true_pfm: 18.00620551711111
episode: 108 training return: tensor(-8.9488e+16, device='cuda:0')
episode: 109 training return: tensor(-4.8999e+16, device='cuda:0')
episode: 110 training return: tensor(-9.3399e+15, device='cuda:0')
episode: 111 training return: tensor(-9.2084e+19, device='cuda:0')
epoch: 28 test_true_pfm: -61.03516781942634
episode: 112 training return: tensor(-6.9775e+14, device='cuda:0')
episode: 113 training return: tensor(-3.0399e+15, device='cuda:0')
episode: 114 training return: tensor(-3.0881e+15, device='cuda:0')
episode: 115 training return: tensor(-2.5681e+15, device='cuda:0')
epoch: 29 test_true_pfm: -22.547441236030735
episode: 116 training return: tensor(-2.2085e+15, device='cuda:0')
episode: 117 training return: tensor(-2.7218e+15, device='cuda:0')
episode: 118 training return: tensor(-3.1644e+15, device='cuda:0')
episode: 119 training return: tensor(-3.3040e+15, device='cuda:0')
epoch: 30 test_true_pfm: -13.36597332454707
episode: 120 training return: tensor(-2.5984e+15, device='cuda:0')
episode: 121 training return: tensor(-3.2165e+15, device='cuda:0')
episode: 122 training return: tensor(-3.8826e+15, device='cuda:0')
episode: 123 training return: tensor(-3.9956e+15, device='cuda:0')
epoch: 31 test_true_pfm: -31.098819409435418
episode: 124 training return: tensor(-3.5092e+15, device='cuda:0')
episode: 125 training return: tensor(-2.8281e+15, device='cuda:0')
episode: 126 training return: tensor(-3.1965e+15, device='cuda:0')
episode: 127 training return: tensor(-3.4694e+15, device='cuda:0')
epoch: 32 test_true_pfm: -22.351073649736843
episode: 128 training return: tensor(-2.8330e+15, device='cuda:0')
episode: 129 training return: tensor(-2.6266e+15, device='cuda:0')
episode: 130 training return: tensor(-3.8729e+16, device='cuda:0')
episode: 131 training return: tensor(-1.7652e+17, device='cuda:0')
epoch: 33 test_true_pfm: -23.01893393874811
episode: 132 training return: tensor(-3.2533e+15, device='cuda:0')
episode: 133 training return: tensor(-2.7789e+15, device='cuda:0')
episode: 134 training return: tensor(-2.8162e+15, device='cuda:0')
episode: 135 training return: tensor(-2.6558e+15, device='cuda:0')
epoch: 34 test_true_pfm: -21.688350184740653
episode: 136 training return: tensor(-3.4370e+15, device='cuda:0')
episode: 137 training return: tensor(-3.4523e+15, device='cuda:0')
episode: 138 training return: tensor(-3.4685e+15, device='cuda:0')
episode: 139 training return: tensor(-2.9243e+15, device='cuda:0')
epoch: 35 test_true_pfm: -12.281325823201877
episode: 140 training return: tensor(-1.8935e+15, device='cuda:0')
episode: 141 training return: tensor(-2.6441e+15, device='cuda:0')
episode: 142 training return: tensor(-3.2478e+15, device='cuda:0')
episode: 143 training return: tensor(-3.2396e+15, device='cuda:0')
epoch: 36 test_true_pfm: 22.433971417312126
episode: 144 training return: tensor(-7.9881e+16, device='cuda:0')
episode: 145 training return: tensor(-6.4543e+19, device='cuda:0')
episode: 146 training return: tensor(-2.6122e+15, device='cuda:0')
episode: 147 training return: tensor(-1.3337e+15, device='cuda:0')
epoch: 37 test_true_pfm: -22.05107735226449
episode: 148 training return: tensor(-3.3464e+15, device='cuda:0')
episode: 149 training return: tensor(-2.9481e+15, device='cuda:0')
episode: 150 training return: tensor(-3.0448e+15, device='cuda:0')
episode: 151 training return: tensor(-2.8743e+15, device='cuda:0')
epoch: 38 test_true_pfm: -21.954256609602805
episode: 152 training return: tensor(-2.8437e+15, device='cuda:0')
episode: 153 training return: tensor(-2.7449e+15, device='cuda:0')
episode: 154 training return: tensor(-3.4312e+15, device='cuda:0')
episode: 155 training return: tensor(-3.0964e+15, device='cuda:0')
epoch: 39 test_true_pfm: -22.578653743667246
episode: 156 training return: tensor(-3.1517e+15, device='cuda:0')
episode: 157 training return: tensor(-2.7519e+15, device='cuda:0')
episode: 158 training return: tensor(-2.9091e+15, device='cuda:0')
episode: 159 training return: tensor(-1.9561e+17, device='cuda:0')
epoch: 40 test_true_pfm: 40.11296532444927
episode: 160 training return: tensor(-2.0981e+17, device='cuda:0')
episode: 161 training return: tensor(-1.9534e+17, device='cuda:0')
episode: 162 training return: tensor(-3.1871e+15, device='cuda:0')
episode: 163 training return: tensor(-3.1857e+15, device='cuda:0')
epoch: 41 test_true_pfm: -21.859998429168698
episode: 164 training return: tensor(-2.8586e+15, device='cuda:0')
episode: 165 training return: tensor(-2.7029e+15, device='cuda:0')
episode: 166 training return: tensor(-3.0610e+15, device='cuda:0')
episode: 167 training return: tensor(-3.4036e+15, device='cuda:0')
epoch: 42 test_true_pfm: 14.752629901357428
episode: 168 training return: tensor(-6.8303e+19, device='cuda:0')
episode: 169 training return: tensor(-6.8402e+19, device='cuda:0')
episode: 170 training return: tensor(-6.8189e+19, device='cuda:0')
episode: 171 training return: tensor(-2.7185e+15, device='cuda:0')
epoch: 43 test_true_pfm: -22.056679073907336
episode: 172 training return: tensor(-2.8038e+15, device='cuda:0')
episode: 173 training return: tensor(-2.8261e+15, device='cuda:0')
episode: 174 training return: tensor(-3.5673e+15, device='cuda:0')
episode: 175 training return: tensor(-2.8881e+15, device='cuda:0')
epoch: 44 test_true_pfm: -22.301755745933235
episode: 176 training return: tensor(-3.2372e+15, device='cuda:0')
episode: 177 training return: tensor(-2.7049e+15, device='cuda:0')
episode: 178 training return: tensor(-7.9381e+14, device='cuda:0')
episode: 179 training return: tensor(-6.6438e+14, device='cuda:0')
epoch: 45 test_true_pfm: -22.4134042147996
episode: 180 training return: tensor(-2.9621e+15, device='cuda:0')
episode: 181 training return: tensor(-3.2628e+15, device='cuda:0')
episode: 182 training return: tensor(-2.6483e+15, device='cuda:0')
episode: 183 training return: tensor(-3.4395e+15, device='cuda:0')
epoch: 46 test_true_pfm: -22.37980881138684
episode: 184 training return: tensor(-2.8091e+15, device='cuda:0')
episode: 185 training return: tensor(-2.7061e+15, device='cuda:0')
episode: 186 training return: tensor(-3.2145e+15, device='cuda:0')
episode: 187 training return: tensor(-2.8394e+15, device='cuda:0')
epoch: 47 test_true_pfm: -22.16935351659534
episode: 188 training return: tensor(-3.1937e+15, device='cuda:0')
episode: 189 training return: tensor(-4.6595e+14, device='cuda:0')
episode: 190 training return: tensor(-3.0241e+15, device='cuda:0')
episode: 191 training return: tensor(-2.7106e+15, device='cuda:0')
epoch: 48 test_true_pfm: -22.03405271458409
episode: 192 training return: tensor(-3.4898e+15, device='cuda:0')
episode: 193 training return: tensor(-5.5608e+19, device='cuda:0')
episode: 194 training return: tensor(-6.1206e+19, device='cuda:0')
episode: 195 training return: tensor(-6.5926e+19, device='cuda:0')
epoch: 49 test_true_pfm: -21.9829496408836
episode: 196 training return: tensor(-3.2625e+15, device='cuda:0')
episode: 197 training return: tensor(-2.7243e+15, device='cuda:0')
episode: 198 training return: tensor(-2.6468e+15, device='cuda:0')
episode: 199 training return: tensor(-2.8038e+15, device='cuda:0')
epoch: 50 test_true_pfm: -21.807453653141405
episode: 200 training return: tensor(-3.4072e+15, device='cuda:0')
episode: 201 training return: tensor(-2.9711e+15, device='cuda:0')
episode: 202 training return: tensor(-3.0816e+15, device='cuda:0')
episode: 203 training return: tensor(-2.8057e+15, device='cuda:0')
epoch: 51 test_true_pfm: -56.50969372325087
episode: 204 training return: tensor(-1.0417e+15, device='cuda:0')
episode: 205 training return: tensor(-1.8126e+15, device='cuda:0')
episode: 206 training return: tensor(-9.6638e+15, device='cuda:0')
episode: 207 training return: tensor(-7.1302e+15, device='cuda:0')
epoch: 52 test_true_pfm: -21.808927991896798
episode: 208 training return: tensor(-1.4393e+16, device='cuda:0')
episode: 209 training return: tensor(-3.5299e+18, device='cuda:0')
episode: 210 training return: tensor(-2.6950e+15, device='cuda:0')
episode: 211 training return: tensor(-2.8553e+15, device='cuda:0')
epoch: 53 test_true_pfm: 4.211430944913151
episode: 212 training return: tensor(-1.4077e+16, device='cuda:0')
episode: 213 training return: tensor(-2.1970e+16, device='cuda:0')
episode: 214 training return: tensor(-2.0401e+16, device='cuda:0')
episode: 215 training return: tensor(-4.0374e+16, device='cuda:0')
epoch: 54 test_true_pfm: -16.06638847597691
episode: 216 training return: tensor(-2.5195e+15, device='cuda:0')
episode: 217 training return: tensor(-3.3789e+15, device='cuda:0')
episode: 218 training return: tensor(-2.9852e+15, device='cuda:0')
episode: 219 training return: tensor(-2.7822e+15, device='cuda:0')
epoch: 55 test_true_pfm: -22.00002952314725
episode: 220 training return: tensor(-2.8684e+15, device='cuda:0')
episode: 221 training return: tensor(-2.7637e+15, device='cuda:0')
episode: 222 training return: tensor(-2.7084e+15, device='cuda:0')
episode: 223 training return: tensor(-2.0530e+15, device='cuda:0')
epoch: 56 test_true_pfm: -22.632536631517336
episode: 224 training return: tensor(-3.3937e+15, device='cuda:0')
episode: 225 training return: tensor(-1.6474e+17, device='cuda:0')
episode: 226 training return: tensor(-8.1116e+15, device='cuda:0')
episode: 227 training return: tensor(-2.9287e+15, device='cuda:0')
epoch: 57 test_true_pfm: -22.369074147416224
episode: 228 training return: tensor(-2.9449e+15, device='cuda:0')
episode: 229 training return: tensor(-2.5578e+15, device='cuda:0')
episode: 230 training return: tensor(-2.5320e+15, device='cuda:0')
episode: 231 training return: tensor(-5.0113e+14, device='cuda:0')
epoch: 58 test_true_pfm: -91.35876862315642
episode: 232 training return: tensor(-1.0373e+15, device='cuda:0')
episode: 233 training return: tensor(-9.3510e+15, device='cuda:0')
episode: 234 training return: tensor(-2.8629e+15, device='cuda:0')
episode: 235 training return: tensor(-7.0535e+14, device='cuda:0')
epoch: 59 test_true_pfm: 6.848887345216752
episode: 236 training return: tensor(-6.6350e+15, device='cuda:0')
episode: 237 training return: tensor(-3.1925e+16, device='cuda:0')
episode: 238 training return: tensor(-4.4476e+16, device='cuda:0')
episode: 239 training return: tensor(-1.8191e+17, device='cuda:0')
epoch: 60 test_true_pfm: -104.53417434707048
episode: 240 training return: tensor(-6.6394e+18, device='cuda:0')
episode: 241 training return: tensor(-2.1155e+15, device='cuda:0')
episode: 242 training return: tensor(-7.2467e+14, device='cuda:0')
episode: 243 training return: tensor(-2.6077e+15, device='cuda:0')
epoch: 61 test_true_pfm: -3.3829532027738787
episode: 244 training return: tensor(-2.9753e+15, device='cuda:0')
episode: 245 training return: tensor(-4.8465e+18, device='cuda:0')
episode: 246 training return: tensor(-5.1210e+14, device='cuda:0')
episode: 247 training return: tensor(-2.8704e+16, device='cuda:0')
epoch: 62 test_true_pfm: -23.854050504828525
episode: 248 training return: tensor(-3.9412e+17, device='cuda:0')
episode: 249 training return: tensor(-6.8516e+14, device='cuda:0')
episode: 250 training return: tensor(-6.7821e+14, device='cuda:0')
episode: 251 training return: tensor(-7.9018e+14, device='cuda:0')
epoch: 63 test_true_pfm: -7.542585865864261
episode: 252 training return: tensor(-7.3041e+14, device='cuda:0')
episode: 253 training return: tensor(-2.9262e+15, device='cuda:0')
episode: 254 training return: tensor(-2.7026e+15, device='cuda:0')
episode: 255 training return: tensor(-2.7945e+15, device='cuda:0')
epoch: 64 test_true_pfm: -33.7312128050859
episode: 256 training return: tensor(-2.2199e+15, device='cuda:0')
episode: 257 training return: tensor(-2.6707e+15, device='cuda:0')
episode: 258 training return: tensor(-2.8070e+15, device='cuda:0')
episode: 259 training return: tensor(-4.5447e+16, device='cuda:0')
epoch: 65 test_true_pfm: -163.16750421825867
episode: 260 training return: tensor(-6.7300e+19, device='cuda:0')
episode: 261 training return: tensor(-5.2068e+16, device='cuda:0')
episode: 262 training return: tensor(-6.5931e+19, device='cuda:0')
episode: 263 training return: tensor(-3.5798e+15, device='cuda:0')
epoch: 66 test_true_pfm: -21.00717025102964
episode: 264 training return: tensor(-3.2837e+15, device='cuda:0')
episode: 265 training return: tensor(-1.3836e+15, device='cuda:0')
episode: 266 training return: tensor(-5.8365e+14, device='cuda:0')
episode: 267 training return: tensor(-2.2409e+16, device='cuda:0')
epoch: 67 test_true_pfm: -21.876399532083795
episode: 268 training return: tensor(-2.7153e+15, device='cuda:0')
episode: 269 training return: tensor(-8.9882e+14, device='cuda:0')
episode: 270 training return: tensor(-4.8992e+14, device='cuda:0')
episode: 271 training return: tensor(-2.6171e+18, device='cuda:0')
epoch: 68 test_true_pfm: -37.944892472572356
episode: 272 training return: tensor(-5.1230e+14, device='cuda:0')
episode: 273 training return: tensor(-8.9579e+14, device='cuda:0')
episode: 274 training return: tensor(-3.0788e+15, device='cuda:0')
episode: 275 training return: tensor(-2.8193e+15, device='cuda:0')
epoch: 69 test_true_pfm: -29.44838794658426
episode: 276 training return: tensor(-8.8984e+14, device='cuda:0')
episode: 277 training return: tensor(-2.7177e+15, device='cuda:0')
episode: 278 training return: tensor(-3.3368e+15, device='cuda:0')
episode: 279 training return: tensor(-2.9694e+15, device='cuda:0')
epoch: 70 test_true_pfm: -21.899277049110697
episode: 280 training return: tensor(-3.0899e+15, device='cuda:0')
episode: 281 training return: tensor(-3.3567e+15, device='cuda:0')
episode: 282 training return: tensor(-3.4273e+15, device='cuda:0')
episode: 283 training return: tensor(-3.2897e+15, device='cuda:0')
epoch: 71 test_true_pfm: -8.719909504355227
episode: 284 training return: tensor(-1.2835e+15, device='cuda:0')
episode: 285 training return: tensor(-3.1671e+16, device='cuda:0')
episode: 286 training return: tensor(-2.1288e+19, device='cuda:0')
episode: 287 training return: tensor(-1.1741e+18, device='cuda:0')
epoch: 72 test_true_pfm: -12.69802034098002
episode: 288 training return: tensor(-6.6538e+14, device='cuda:0')
episode: 289 training return: tensor(-6.9039e+14, device='cuda:0')
episode: 290 training return: tensor(-2.8671e+18, device='cuda:0')
episode: 291 training return: tensor(-1.3369e+15, device='cuda:0')
epoch: 73 test_true_pfm: -68.21062145765946
episode: 292 training return: tensor(-7.6474e+14, device='cuda:0')
episode: 293 training return: tensor(-1.8596e+15, device='cuda:0')
episode: 294 training return: tensor(-2.7588e+15, device='cuda:0')
episode: 295 training return: tensor(-2.7418e+15, device='cuda:0')
epoch: 74 test_true_pfm: -21.539973259666713
episode: 296 training return: tensor(-3.2818e+15, device='cuda:0')
episode: 297 training return: tensor(-2.7198e+15, device='cuda:0')
episode: 298 training return: tensor(-3.0621e+15, device='cuda:0')
episode: 299 training return: tensor(-3.0745e+15, device='cuda:0')
epoch: 75 test_true_pfm: -22.54689881734092
episode: 300 training return: tensor(-2.5658e+15, device='cuda:0')
episode: 301 training return: tensor(-3.2227e+15, device='cuda:0')
episode: 302 training return: tensor(-3.4661e+15, device='cuda:0')
episode: 303 training return: tensor(-3.2986e+15, device='cuda:0')
epoch: 76 test_true_pfm: -21.754548433762597
episode: 304 training return: tensor(-3.1867e+15, device='cuda:0')
episode: 305 training return: tensor(-3.4167e+15, device='cuda:0')
episode: 306 training return: tensor(-3.2700e+15, device='cuda:0')
episode: 307 training return: tensor(-3.1951e+15, device='cuda:0')
epoch: 77 test_true_pfm: -22.285404382909007
episode: 308 training return: tensor(-3.0704e+15, device='cuda:0')
episode: 309 training return: tensor(-9.7006e+14, device='cuda:0')
episode: 310 training return: tensor(-3.0786e+15, device='cuda:0')
episode: 311 training return: tensor(-2.7679e+15, device='cuda:0')
epoch: 78 test_true_pfm: -21.96985224527062
episode: 312 training return: tensor(-2.9118e+15, device='cuda:0')
episode: 313 training return: tensor(-2.8045e+15, device='cuda:0')
episode: 314 training return: tensor(-9.0199e+14, device='cuda:0')
episode: 315 training return: tensor(-3.0051e+15, device='cuda:0')
epoch: 79 test_true_pfm: -21.920007122148082
episode: 316 training return: tensor(-3.1617e+15, device='cuda:0')
episode: 317 training return: tensor(-2.6379e+15, device='cuda:0')
episode: 318 training return: tensor(-3.1802e+15, device='cuda:0')
episode: 319 training return: tensor(-6.6431e+14, device='cuda:0')
epoch: 80 test_true_pfm: -29.097953289884817
episode: 320 training return: tensor(-3.9821e+15, device='cuda:0')
episode: 321 training return: tensor(-2.7664e+15, device='cuda:0')
episode: 322 training return: tensor(-2.8444e+15, device='cuda:0')
episode: 323 training return: tensor(-2.7852e+15, device='cuda:0')
epoch: 81 test_true_pfm: -22.182287042315707
episode: 324 training return: tensor(-2.8181e+15, device='cuda:0')
episode: 325 training return: tensor(-1.6429e+21, device='cuda:0')
episode: 326 training return: tensor(-6.7645e+14, device='cuda:0')
episode: 327 training return: tensor(-5.5532e+14, device='cuda:0')
epoch: 82 test_true_pfm: -54.77431697633133
episode: 328 training return: tensor(-7.5779e+14, device='cuda:0')
episode: 329 training return: tensor(-1.3588e+15, device='cuda:0')
episode: 330 training return: tensor(-7.2565e+14, device='cuda:0')
episode: 331 training return: tensor(-5.9752e+14, device='cuda:0')
epoch: 83 test_true_pfm: -24.952928885789053
episode: 332 training return: tensor(-4.6330e+15, device='cuda:0')
episode: 333 training return: tensor(-3.2260e+14, device='cuda:0')
episode: 334 training return: tensor(-2.7400e+15, device='cuda:0')
episode: 335 training return: tensor(-3.4459e+15, device='cuda:0')
epoch: 84 test_true_pfm: 236.309227934502
episode: 336 training return: tensor(-36158832., device='cuda:0')
episode: 337 training return: tensor(-2.9433e+15, device='cuda:0')
episode: 338 training return: tensor(-3.4515e+15, device='cuda:0')
episode: 339 training return: tensor(-2.7293e+15, device='cuda:0')
epoch: 85 test_true_pfm: -22.19227683727941
episode: 340 training return: tensor(-2.6873e+15, device='cuda:0')
episode: 341 training return: tensor(-3.3097e+15, device='cuda:0')
episode: 342 training return: tensor(-6.8071e+19, device='cuda:0')
episode: 343 training return: tensor(-3.3087e+15, device='cuda:0')
epoch: 86 test_true_pfm: -23.788741467382152
episode: 344 training return: tensor(-3.2050e+15, device='cuda:0')
episode: 345 training return: tensor(-3.2864e+15, device='cuda:0')
episode: 346 training return: tensor(-3.6046e+15, device='cuda:0')
episode: 347 training return: tensor(-4.9398e+14, device='cuda:0')
epoch: 87 test_true_pfm: -104.8192261953457
episode: 348 training return: tensor(-8.7490e+14, device='cuda:0')
episode: 349 training return: tensor(-1.2594e+15, device='cuda:0')
episode: 350 training return: tensor(-1.2570e+15, device='cuda:0')
episode: 351 training return: tensor(-3.6803e+15, device='cuda:0')
epoch: 88 test_true_pfm: -36.166990559941354
episode: 352 training return: tensor(-3.3310e+15, device='cuda:0')
episode: 353 training return: tensor(-5.6066e+14, device='cuda:0')
episode: 354 training return: tensor(-3.9740e+14, device='cuda:0')
episode: 355 training return: tensor(-2.9255e+15, device='cuda:0')
epoch: 89 test_true_pfm: -43.63252572960972
episode: 356 training return: tensor(-2.3978e+15, device='cuda:0')
episode: 357 training return: tensor(-2.8434e+15, device='cuda:0')
episode: 358 training return: tensor(-2.8026e+15, device='cuda:0')
episode: 359 training return: tensor(-3.2099e+15, device='cuda:0')
epoch: 90 test_true_pfm: -21.83552992830906
episode: 360 training return: tensor(-3.0999e+15, device='cuda:0')
episode: 361 training return: tensor(-1.1138e+15, device='cuda:0')
episode: 362 training return: tensor(-4.6602e+14, device='cuda:0')
episode: 363 training return: tensor(-8.0953e+14, device='cuda:0')
epoch: 91 test_true_pfm: -30.41174122080885
episode: 364 training return: tensor(-8.9846e+14, device='cuda:0')
episode: 365 training return: tensor(-8.9558e+14, device='cuda:0')
episode: 366 training return: tensor(-4.7006e+14, device='cuda:0')
episode: 367 training return: tensor(-4.8909e+14, device='cuda:0')
epoch: 92 test_true_pfm: -48.027288062108006
episode: 368 training return: tensor(-4.6547e+14, device='cuda:0')
episode: 369 training return: tensor(-4.6386e+14, device='cuda:0')
episode: 370 training return: tensor(-1.1930e+15, device='cuda:0')
episode: 371 training return: tensor(-9.3163e+19, device='cuda:0')
epoch: 93 test_true_pfm: -38.14834558054907
episode: 372 training return: tensor(-4.9863e+14, device='cuda:0')
episode: 373 training return: tensor(-9.3461e+14, device='cuda:0')
episode: 374 training return: tensor(-2.6788e+15, device='cuda:0')
episode: 375 training return: tensor(-2.9586e+15, device='cuda:0')
epoch: 94 test_true_pfm: -14.138913496119818
episode: 376 training return: tensor(-6.8855e+14, device='cuda:0')
episode: 377 training return: tensor(-4.6155e+14, device='cuda:0')
episode: 378 training return: tensor(-6.4480e+14, device='cuda:0')
episode: 379 training return: tensor(-8.5605e+14, device='cuda:0')
epoch: 95 test_true_pfm: -0.10740393805044378
episode: 380 training return: tensor(-1.1357e+15, device='cuda:0')
episode: 381 training return: tensor(-3.0177e+15, device='cuda:0')
episode: 382 training return: tensor(-1.6147e+16, device='cuda:0')
episode: 383 training return: tensor(-1.1398e+20, device='cuda:0')
epoch: 96 test_true_pfm: 28.343509275321583
episode: 384 training return: tensor(-1.0882e+15, device='cuda:0')
episode: 385 training return: tensor(-8.4391e+19, device='cuda:0')
episode: 386 training return: tensor(-6.6123e+19, device='cuda:0')
episode: 387 training return: tensor(-4.5764e+14, device='cuda:0')
epoch: 97 test_true_pfm: -22.298738088504336
episode: 388 training return: tensor(-3.2687e+15, device='cuda:0')
episode: 389 training return: tensor(-2.7883e+15, device='cuda:0')
episode: 390 training return: tensor(-8.9629e+14, device='cuda:0')
episode: 391 training return: tensor(-9.8909e+14, device='cuda:0')
epoch: 98 test_true_pfm: 2.147486019481034
episode: 392 training return: tensor(-1.4815e+15, device='cuda:0')
episode: 393 training return: tensor(-1.2168e+15, device='cuda:0')
episode: 394 training return: tensor(-3.3826e+17, device='cuda:0')
episode: 395 training return: tensor(-2.8411e+15, device='cuda:0')
epoch: 99 test_true_pfm: -22.038605505031597
episode: 396 training return: tensor(-2.6765e+15, device='cuda:0')
episode: 397 training return: tensor(-7.2979e+14, device='cuda:0')
episode: 398 training return: tensor(-2.8379e+15, device='cuda:0')
episode: 399 training return: tensor(-1.0415e+15, device='cuda:0')
epoch: 100 test_true_pfm: -61.747085286293476
episode: 400 training return: tensor(-1.6098e+15, device='cuda:0')
episode: 401 training return: tensor(-1.7766e+15, device='cuda:0')
episode: 402 training return: tensor(-5.1260e+14, device='cuda:0')
episode: 403 training return: tensor(-2.7472e+15, device='cuda:0')
epoch: 101 test_true_pfm: -6.506412988828949
episode: 404 training return: tensor(-7.4450e+14, device='cuda:0')
episode: 405 training return: tensor(-2.9511e+20, device='cuda:0')
episode: 406 training return: tensor(-1.2900e+15, device='cuda:0')
episode: 407 training return: tensor(-2.8866e+15, device='cuda:0')
epoch: 102 test_true_pfm: -51.214541036787864
episode: 408 training return: tensor(-1.6910e+15, device='cuda:0')
episode: 409 training return: tensor(-2.8406e+15, device='cuda:0')
episode: 410 training return: tensor(-3.6495e+15, device='cuda:0')
episode: 411 training return: tensor(-2.7079e+15, device='cuda:0')
epoch: 103 test_true_pfm: -21.99791710683118
episode: 412 training return: tensor(-3.1959e+15, device='cuda:0')
episode: 413 training return: tensor(-2.5363e+15, device='cuda:0')
episode: 414 training return: tensor(-2.5715e+15, device='cuda:0')
episode: 415 training return: tensor(-1.9826e+15, device='cuda:0')
epoch: 104 test_true_pfm: -20.959356917635834
episode: 416 training return: tensor(-2.6552e+15, device='cuda:0')
episode: 417 training return: tensor(-3.2422e+15, device='cuda:0')
episode: 418 training return: tensor(-2.6481e+15, device='cuda:0')
episode: 419 training return: tensor(-3.2377e+15, device='cuda:0')
epoch: 105 test_true_pfm: -22.002150920440254
episode: 420 training return: tensor(-2.7050e+15, device='cuda:0')
episode: 421 training return: tensor(-2.7993e+15, device='cuda:0')
episode: 422 training return: tensor(-1.8231e+16, device='cuda:0')
episode: 423 training return: tensor(-1.8415e+17, device='cuda:0')
epoch: 106 test_true_pfm: -20.365226380300225
episode: 424 training return: tensor(-6.1399e+20, device='cuda:0')
episode: 425 training return: tensor(-4.1725e+15, device='cuda:0')
episode: 426 training return: tensor(-7.2831e+15, device='cuda:0')
episode: 427 training return: tensor(-1.7383e+16, device='cuda:0')
epoch: 107 test_true_pfm: -21.876405817035266
episode: 428 training return: tensor(-3.0782e+15, device='cuda:0')
episode: 429 training return: tensor(-2.9388e+15, device='cuda:0')
episode: 430 training return: tensor(-3.4844e+15, device='cuda:0')
episode: 431 training return: tensor(-3.1243e+15, device='cuda:0')
epoch: 108 test_true_pfm: -21.929664152956537
episode: 432 training return: tensor(-2.5902e+15, device='cuda:0')
episode: 433 training return: tensor(-2.9318e+15, device='cuda:0')
episode: 434 training return: tensor(-3.4742e+15, device='cuda:0')
episode: 435 training return: tensor(-2.9235e+15, device='cuda:0')
epoch: 109 test_true_pfm: -22.52706989625815
episode: 436 training return: tensor(-2.8204e+15, device='cuda:0')
episode: 437 training return: tensor(-3.1477e+15, device='cuda:0')
episode: 438 training return: tensor(-4.5493e+14, device='cuda:0')
episode: 439 training return: tensor(-3.3983e+15, device='cuda:0')
epoch: 110 test_true_pfm: -22.387028210782972
episode: 440 training return: tensor(-2.9583e+15, device='cuda:0')
episode: 441 training return: tensor(-3.1893e+15, device='cuda:0')
episode: 442 training return: tensor(-3.3410e+15, device='cuda:0')
episode: 443 training return: tensor(-2.6621e+15, device='cuda:0')
epoch: 111 test_true_pfm: -21.91317768061354
episode: 444 training return: tensor(-2.9103e+15, device='cuda:0')
episode: 445 training return: tensor(-3.1983e+15, device='cuda:0')
episode: 446 training return: tensor(-1.8613e+15, device='cuda:0')
episode: 447 training return: tensor(-1.5749e+15, device='cuda:0')
epoch: 112 test_true_pfm: 219.6966239475004
episode: 448 training return: tensor(-5.2792e+16, device='cuda:0')
episode: 449 training return: tensor(-2.5737e+15, device='cuda:0')
episode: 450 training return: tensor(-7.5485e+14, device='cuda:0')
episode: 451 training return: tensor(-9.0461e+17, device='cuda:0')
epoch: 113 test_true_pfm: 23.95253899779688
episode: 452 training return: tensor(-2.7361e+15, device='cuda:0')
episode: 453 training return: tensor(-7.4682e+14, device='cuda:0')
episode: 454 training return: tensor(-7.1071e+14, device='cuda:0')
episode: 455 training return: tensor(-1.0161e+18, device='cuda:0')
epoch: 114 test_true_pfm: -64.40621602079624
episode: 456 training return: tensor(-3.0661e+15, device='cuda:0')
episode: 457 training return: tensor(-4.2140e+14, device='cuda:0')
episode: 458 training return: tensor(-2.9832e+15, device='cuda:0')
episode: 459 training return: tensor(-9.0675e+14, device='cuda:0')
epoch: 115 test_true_pfm: -29.824840294031116
episode: 460 training return: tensor(-1.0189e+15, device='cuda:0')
episode: 461 training return: tensor(-4.6390e+14, device='cuda:0')
episode: 462 training return: tensor(-1.0423e+15, device='cuda:0')
episode: 463 training return: tensor(-1.0714e+15, device='cuda:0')
epoch: 116 test_true_pfm: -89.19418997812521
episode: 464 training return: tensor(-2.8736e+15, device='cuda:0')
episode: 465 training return: tensor(-2.9248e+15, device='cuda:0')
episode: 466 training return: tensor(-3.7661e+15, device='cuda:0')
episode: 467 training return: tensor(-3.5900e+15, device='cuda:0')
epoch: 117 test_true_pfm: -39.04852566148244
episode: 468 training return: tensor(-1.5547e+15, device='cuda:0')
episode: 469 training return: tensor(-2.3191e+15, device='cuda:0')
episode: 470 training return: tensor(-2.6584e+15, device='cuda:0')
episode: 471 training return: tensor(-5.1652e+14, device='cuda:0')
epoch: 118 test_true_pfm: -50.86655218791938
episode: 472 training return: tensor(-4.6314e+14, device='cuda:0')
episode: 473 training return: tensor(-1.0172e+15, device='cuda:0')
episode: 474 training return: tensor(-5.9091e+14, device='cuda:0')
episode: 475 training return: tensor(-8.4494e+14, device='cuda:0')
epoch: 119 test_true_pfm: -43.324954866129076
episode: 476 training return: tensor(-6.5604e+14, device='cuda:0')
episode: 477 training return: tensor(-4.8480e+14, device='cuda:0')
episode: 478 training return: tensor(-7.5265e+14, device='cuda:0')
episode: 479 training return: tensor(-7.6602e+14, device='cuda:0')
epoch: 120 test_true_pfm: 85.08470638641076
episode: 480 training return: tensor(-6.8836e+19, device='cuda:0')
episode: 481 training return: tensor(-4.3790e+14, device='cuda:0')
episode: 482 training return: tensor(-3.9853e+14, device='cuda:0')
episode: 483 training return: tensor(-4.6317e+14, device='cuda:0')
epoch: 121 test_true_pfm: -61.353315118173434
episode: 484 training return: tensor(-4.6061e+14, device='cuda:0')
episode: 485 training return: tensor(-4.6424e+14, device='cuda:0')
episode: 486 training return: tensor(-1.4756e+15, device='cuda:0')
episode: 487 training return: tensor(-35080848., device='cuda:0')
epoch: 122 test_true_pfm: -72.64430677224574
episode: 488 training return: tensor(-1.0590e+15, device='cuda:0')
episode: 489 training return: tensor(-9.9257e+14, device='cuda:0')
episode: 490 training return: tensor(-1.4230e+15, device='cuda:0')
episode: 491 training return: tensor(-4.7086e+14, device='cuda:0')
epoch: 123 test_true_pfm: -53.11034162622338
episode: 492 training return: tensor(-8.4346e+14, device='cuda:0')
episode: 493 training return: tensor(-9.8185e+14, device='cuda:0')
episode: 494 training return: tensor(-2.1935e+20, device='cuda:0')
episode: 495 training return: tensor(-8.9983e+14, device='cuda:0')
epoch: 124 test_true_pfm: -26.825876863546657
episode: 496 training return: tensor(-3.0251e+15, device='cuda:0')
episode: 497 training return: tensor(-4.3688e+14, device='cuda:0')
episode: 498 training return: tensor(-4.7160e+14, device='cuda:0')
episode: 499 training return: tensor(-2.6105e+11, device='cuda:0')
epoch: 125 test_true_pfm: -51.98714551210927
episode: 500 training return: tensor(-4.4945e+14, device='cuda:0')
episode: 501 training return: tensor(-4.3693e+14, device='cuda:0')
episode: 502 training return: tensor(-4.1412e+14, device='cuda:0')
episode: 503 training return: tensor(-2.8500e+15, device='cuda:0')
epoch: 126 test_true_pfm: -56.84937155098993
episode: 504 training return: tensor(-2.8572e+15, device='cuda:0')
episode: 505 training return: tensor(-4.8603e+14, device='cuda:0')
episode: 506 training return: tensor(-5.2813e+14, device='cuda:0')
episode: 507 training return: tensor(-5.4514e+16, device='cuda:0')
epoch: 127 test_true_pfm: -18.042405492323315
episode: 508 training return: tensor(-1.6432e+20, device='cuda:0')
episode: 509 training return: tensor(-1.8249e+15, device='cuda:0')
episode: 510 training return: tensor(-9.6878e+14, device='cuda:0')
episode: 511 training return: tensor(-4.5587e+14, device='cuda:0')
epoch: 128 test_true_pfm: -52.14689437313141
episode: 512 training return: tensor(-4.3001e+14, device='cuda:0')
episode: 513 training return: tensor(-4.3059e+14, device='cuda:0')
episode: 514 training return: tensor(-4.5850e+14, device='cuda:0')
episode: 515 training return: tensor(-4.6180e+14, device='cuda:0')
epoch: 129 test_true_pfm: -63.858202211144395
episode: 516 training return: tensor(-4.4448e+14, device='cuda:0')
episode: 517 training return: tensor(-2.8383e+15, device='cuda:0')
episode: 518 training return: tensor(-2.9413e+15, device='cuda:0')
episode: 519 training return: tensor(-4.6392e+14, device='cuda:0')
epoch: 130 test_true_pfm: -40.23401530957046
episode: 520 training return: tensor(-5.1840e+14, device='cuda:0')
episode: 521 training return: tensor(-1.3233e+15, device='cuda:0')
episode: 522 training return: tensor(-4.6380e+14, device='cuda:0')
episode: 523 training return: tensor(-7.0578e+14, device='cuda:0')
epoch: 131 test_true_pfm: -73.26463552273252
episode: 524 training return: tensor(-4.4017e+14, device='cuda:0')
episode: 525 training return: tensor(-4.3314e+14, device='cuda:0')
episode: 526 training return: tensor(-4.0941e+14, device='cuda:0')
episode: 527 training return: tensor(-8.7205e+14, device='cuda:0')
epoch: 132 test_true_pfm: -73.47806937884191
episode: 528 training return: tensor(-8.0846e+14, device='cuda:0')
episode: 529 training return: tensor(-1.5181e+15, device='cuda:0')
episode: 530 training return: tensor(-4.7261e+14, device='cuda:0')
episode: 531 training return: tensor(-5.8460e+14, device='cuda:0')
epoch: 133 test_true_pfm: 13.838301297226366
episode: 532 training return: tensor(-6.5313e+19, device='cuda:0')
episode: 533 training return: tensor(-2.8315e+15, device='cuda:0')
episode: 534 training return: tensor(-4.5639e+14, device='cuda:0')
episode: 535 training return: tensor(-4.0391e+14, device='cuda:0')
epoch: 134 test_true_pfm: -66.09365962404492
episode: 536 training return: tensor(-6.0495e+15, device='cuda:0')
episode: 537 training return: tensor(-5.3123e+15, device='cuda:0')
episode: 538 training return: tensor(-3.0657e+15, device='cuda:0')
episode: 539 training return: tensor(-4.9229e+14, device='cuda:0')
epoch: 135 test_true_pfm: 188.0554090248573
episode: 540 training return: tensor(-2.9447e+15, device='cuda:0')
episode: 541 training return: tensor(-3.2563e+15, device='cuda:0')
episode: 542 training return: tensor(-2.9590e+15, device='cuda:0')
episode: 543 training return: tensor(-2.3804e+17, device='cuda:0')
epoch: 136 test_true_pfm: -22.859450665488485
episode: 544 training return: tensor(-2.7624e+15, device='cuda:0')
episode: 545 training return: tensor(-2.9929e+15, device='cuda:0')
episode: 546 training return: tensor(-3.2439e+15, device='cuda:0')
episode: 547 training return: tensor(-3.0071e+15, device='cuda:0')
epoch: 137 test_true_pfm: -22.41527787273238
episode: 548 training return: tensor(-2.5792e+15, device='cuda:0')
episode: 549 training return: tensor(-3.2477e+15, device='cuda:0')
episode: 550 training return: tensor(-2.6817e+15, device='cuda:0')
episode: 551 training return: tensor(-2.6324e+15, device='cuda:0')
epoch: 138 test_true_pfm: -22.500955745522536
episode: 552 training return: tensor(-2.1982e+15, device='cuda:0')
episode: 553 training return: tensor(-2313675.5000, device='cuda:0')
episode: 554 training return: tensor(-6.2934e+08, device='cuda:0')
episode: 555 training return: tensor(-1.6657e+08, device='cuda:0')
epoch: 139 test_true_pfm: 202.00979214248514
episode: 556 training return: tensor(-2517560.2500, device='cuda:0')
episode: 557 training return: tensor(-2.8380e+15, device='cuda:0')
episode: 558 training return: tensor(-2.9344e+15, device='cuda:0')
episode: 559 training return: tensor(-55988772., device='cuda:0')
epoch: 140 test_true_pfm: -22.08159168021551
episode: 560 training return: tensor(-2.6103e+15, device='cuda:0')
episode: 561 training return: tensor(-38757484., device='cuda:0')
episode: 562 training return: tensor(-21774982., device='cuda:0')
episode: 563 training return: tensor(-2.7447e+15, device='cuda:0')
epoch: 141 test_true_pfm: -43.83645849229191
episode: 564 training return: tensor(-5.6357e+14, device='cuda:0')
episode: 565 training return: tensor(-3.3355e+15, device='cuda:0')
episode: 566 training return: tensor(-4.6211e+14, device='cuda:0')
episode: 567 training return: tensor(-3519469., device='cuda:0')
epoch: 142 test_true_pfm: -239.22174829136134
episode: 568 training return: tensor(-5.3528e+19, device='cuda:0')
episode: 569 training return: tensor(-3.2318e+14, device='cuda:0')
episode: 570 training return: tensor(-2.8758e+15, device='cuda:0')
episode: 571 training return: tensor(-4.6641e+14, device='cuda:0')
epoch: 143 test_true_pfm: -58.82499132062856
episode: 572 training return: tensor(-1.4990e+15, device='cuda:0')
episode: 573 training return: tensor(-3.1958e+14, device='cuda:0')
episode: 574 training return: tensor(-2.7675e+15, device='cuda:0')
episode: 575 training return: tensor(-1.2477e+15, device='cuda:0')
epoch: 144 test_true_pfm: -49.59552366426667
episode: 576 training return: tensor(-4.6987e+14, device='cuda:0')
episode: 577 training return: tensor(-27004694., device='cuda:0')
episode: 578 training return: tensor(-1.1076e+17, device='cuda:0')
episode: 579 training return: tensor(-1.6555e+15, device='cuda:0')
epoch: 145 test_true_pfm: -8.663622422920296
episode: 580 training return: tensor(-2.8837e+15, device='cuda:0')
episode: 581 training return: tensor(-7.8381e+14, device='cuda:0')
episode: 582 training return: tensor(-4.6256e+14, device='cuda:0')
episode: 583 training return: tensor(-4.3501e+14, device='cuda:0')
epoch: 146 test_true_pfm: -48.33958792050024
episode: 584 training return: tensor(-3.9158e+14, device='cuda:0')
episode: 585 training return: tensor(-4.2204e+14, device='cuda:0')
episode: 586 training return: tensor(-4.2363e+14, device='cuda:0')
episode: 587 training return: tensor(-2.1255e+15, device='cuda:0')
epoch: 147 test_true_pfm: -211.0162780437321
episode: 588 training return: tensor(-9.9392e+14, device='cuda:0')
episode: 589 training return: tensor(-5.4401e+19, device='cuda:0')
episode: 590 training return: tensor(-6.4785e+14, device='cuda:0')
episode: 591 training return: tensor(-3.0336e+15, device='cuda:0')
epoch: 148 test_true_pfm: -22.088517450555432
episode: 592 training return: tensor(-3.1258e+15, device='cuda:0')
episode: 593 training return: tensor(-4.6082e+14, device='cuda:0')
episode: 594 training return: tensor(-3.4226e+14, device='cuda:0')
episode: 595 training return: tensor(-7.9835e+14, device='cuda:0')
epoch: 149 test_true_pfm: 15.303388619301108
episode: 596 training return: tensor(-1.9486e+15, device='cuda:0')
episode: 597 training return: tensor(-3.0034e+19, device='cuda:0')
episode: 598 training return: tensor(-3.7466e+20, device='cuda:0')
episode: 599 training return: tensor(-4.4303e+16, device='cuda:0')
epoch: 150 test_true_pfm: 43.147647596812114
