['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'brac', '--traj', 'expert', '--seed', '4', '--data', '100000']
epoch: 0 training_loss 2.5868639527261257 test_loss: -4.083198928833008
epoch: 1 training_loss -6.397264153957367 test_loss: -8.075514221191407
epoch: 2 training_loss -8.748352990150451 test_loss: -9.669830322265625
epoch: 3 training_loss -10.18659176826477 test_loss: -10.634580993652344
epoch: 4 training_loss -11.004022054672241 test_loss: -11.309751892089844
epoch: 5 training_loss -11.570297937393189 test_loss: -11.731037139892578
epoch: 6 training_loss -12.053734254837035 test_loss: -12.19345474243164
epoch: 7 training_loss -12.355977640151977 test_loss: -12.352945709228516
epoch: 8 training_loss -12.50120032310486 test_loss: -12.588845825195312
epoch: 9 training_loss -12.745548391342163 test_loss: -12.724830627441406
epoch: 10 training_loss -13.101418190002441 test_loss: -13.124905395507813
epoch: 11 training_loss -13.099407234191894 test_loss: -13.4292236328125
epoch: 12 training_loss -13.340634689331054 test_loss: -13.099977111816406
epoch: 13 training_loss -13.503033380508423 test_loss: -13.449862670898437
epoch: 14 training_loss -13.58626935005188 test_loss: -13.690361022949219
epoch: 15 training_loss -13.680346660614013 test_loss: -13.951617431640624
epoch: 16 training_loss -13.731575651168823 test_loss: -13.778030395507812
epoch: 17 training_loss -13.928626155853271 test_loss: -13.959870910644531
epoch: 18 training_loss -14.016548538208008 test_loss: -14.140867614746094
epoch: 19 training_loss -14.060253496170043 test_loss: -14.158474731445313
epoch: 20 training_loss -14.095118312835693 test_loss: -14.052964782714843
epoch: 21 training_loss -14.201252489089965 test_loss: -14.477702331542968
epoch: 22 training_loss -14.394755630493163 test_loss: -14.444004821777344
epoch: 23 training_loss -14.364022598266601 test_loss: -14.28418426513672
epoch: 24 training_loss -14.575013608932496 test_loss: -14.641885375976562
epoch: 25 training_loss -14.539224071502685 test_loss: -14.607746887207032
epoch: 26 training_loss -14.64858787536621 test_loss: -14.757672119140626
epoch: 27 training_loss -14.714063186645507 test_loss: -14.917573547363281
epoch: 28 training_loss -14.799593591690064 test_loss: -15.01024169921875
epoch: 29 training_loss -14.788674974441529 test_loss: -14.83033447265625
epoch: 30 training_loss -14.921944618225098 test_loss: -14.869358825683594
epoch: 31 training_loss -14.935518045425415 test_loss: -14.861053466796875
epoch: 32 training_loss -14.93276746749878 test_loss: -15.114668273925782
epoch: 33 training_loss -15.012644329071044 test_loss: -15.095848083496094
epoch: 34 training_loss -15.09787509918213 test_loss: -15.207415771484374
epoch: 35 training_loss -15.132404050827027 test_loss: -14.850942993164063
epoch: 36 training_loss -15.124533081054688 test_loss: -15.1781982421875
epoch: 37 training_loss -15.178264513015748 test_loss: -15.160589599609375
epoch: 38 training_loss -15.202494411468505 test_loss: -15.066702270507813
epoch: 39 training_loss -15.274826431274414 test_loss: -15.422215270996094
epoch: 40 training_loss -15.275814723968505 test_loss: -15.35236053466797
epoch: 41 training_loss -15.39342206954956 test_loss: -15.394572448730468
epoch: 42 training_loss -15.33547161102295 test_loss: -15.490336608886718
epoch: 43 training_loss -15.454966983795167 test_loss: -15.423896789550781
epoch: 44 training_loss -15.4522136592865 test_loss: -15.489033508300782
epoch: 45 training_loss -15.503665733337403 test_loss: -15.558992004394531
epoch: 46 training_loss -15.4822066116333 test_loss: -15.627555847167969
epoch: 47 training_loss -15.547595806121826 test_loss: -15.50623779296875
epoch: 48 training_loss -15.557858810424804 test_loss: -15.738578796386719
epoch: 49 training_loss -15.620924615859986 test_loss: -15.632701110839843
epoch: 50 training_loss -15.683094434738159 test_loss: -15.734724426269532
epoch: 51 training_loss -15.668472356796265 test_loss: -15.693281555175782
epoch: 52 training_loss -15.64813985824585 test_loss: -15.704637145996093
epoch: 53 training_loss -15.776465482711792 test_loss: -15.645654296875
epoch: 54 training_loss -15.74119257926941 test_loss: -15.87520751953125
epoch: 55 training_loss -15.701173553466797 test_loss: -15.840907287597656
epoch: 56 training_loss -15.75901596069336 test_loss: -15.67113037109375
epoch: 57 training_loss -15.808612613677978 test_loss: -15.845542907714844
epoch: 58 training_loss -15.837067041397095 test_loss: -15.77030029296875
epoch: 59 training_loss -15.814255275726318 test_loss: -15.823065185546875
epoch: 60 training_loss -15.85484224319458 test_loss: -15.933148193359376
epoch: 61 training_loss -15.856687994003297 test_loss: -15.890647888183594
epoch: 62 training_loss -15.797656707763672 test_loss: -15.917340087890626
epoch: 63 training_loss -16.045822744369506 test_loss: -16.192668151855468
epoch: 64 training_loss -15.931277570724488 test_loss: -16.01400146484375
epoch: 65 training_loss -15.991528329849244 test_loss: -16.101718139648437
epoch: 66 training_loss -16.000436935424805 test_loss: -16.130833435058594
epoch: 67 training_loss -15.908623867034912 test_loss: -16.03917999267578
epoch: 68 training_loss -16.019488830566406 test_loss: -15.920491027832032
epoch: 69 training_loss -16.036116361618042 test_loss: -16.110397338867188
epoch: 70 training_loss -16.012065238952637 test_loss: -16.05074462890625
epoch: 71 training_loss -16.088791179656983 test_loss: -16.132598876953125
epoch: 72 training_loss -16.02603585243225 test_loss: -16.224497985839843
epoch: 73 training_loss -16.03515283584595 test_loss: -16.280821228027342
epoch: 74 training_loss -16.143691844940186 test_loss: -16.136439514160156
epoch: 75 training_loss -16.145607032775878 test_loss: -16.04207000732422
epoch: 76 training_loss -16.201851768493654 test_loss: -16.244305419921876
epoch: 77 training_loss -16.175719861984252 test_loss: -16.313296508789062
epoch: 78 training_loss -16.195722761154176 test_loss: -16.04853057861328
epoch: 79 training_loss -16.177884073257445 test_loss: -16.320994567871093
epoch: 80 training_loss -16.18502614021301 test_loss: -16.120512390136717
epoch: 81 training_loss -16.13294690132141 test_loss: -16.343199157714842
epoch: 82 training_loss -16.18627718925476 test_loss: -16.351112365722656
epoch: 83 training_loss -16.288331117630005 test_loss: -16.20568084716797
epoch: 84 training_loss -16.22194664955139 test_loss: -16.21815643310547
epoch: 85 training_loss -16.30385669708252 test_loss: -16.38349609375
epoch: 86 training_loss -16.274164600372316 test_loss: -16.284828186035156
epoch: 87 training_loss -16.270893068313597 test_loss: -16.251600646972655
epoch: 88 training_loss -16.308486804962158 test_loss: -16.41021728515625
epoch: 89 training_loss -16.333456287384035 test_loss: -16.21732940673828
epoch: 90 training_loss -16.342335834503174 test_loss: -16.378250122070312
epoch: 91 training_loss -16.341470651626587 test_loss: -16.423460388183592
epoch: 92 training_loss -16.284375019073487 test_loss: -16.600498962402344
epoch: 93 training_loss -16.304814910888673 test_loss: -16.246548461914063
epoch: 94 training_loss -16.38964945793152 test_loss: -16.42778778076172
epoch: 95 training_loss -16.339826774597167 test_loss: -16.478125
epoch: 96 training_loss -16.4380699634552 test_loss: -16.384727478027344
epoch: 97 training_loss -16.366603994369505 test_loss: -16.444879150390626
epoch: 98 training_loss -16.41297290802002 test_loss: -16.51493682861328
epoch: 99 training_loss -16.357250299453735 test_loss: -16.304200744628908
epoch: 100 training_loss -16.451703233718874 test_loss: -16.5693115234375
epoch: 101 training_loss -16.462121267318725 test_loss: -16.510536193847656
epoch: 102 training_loss -16.391482954025268 test_loss: -16.39104309082031
epoch: 103 training_loss -16.42694787979126 test_loss: -16.452363586425783
epoch: 104 training_loss -16.45371916770935 test_loss: -16.471844482421876
epoch: 105 training_loss -16.42363899230957 test_loss: -16.47828826904297
epoch: 106 training_loss -16.470563287734986 test_loss: -16.554180908203126
epoch: 107 training_loss -16.498520030975342 test_loss: -16.555862426757812
epoch: 108 training_loss -16.49036820411682 test_loss: -16.551116943359375
epoch: 109 training_loss -16.551530284881594 test_loss: -16.632594299316406
epoch: 110 training_loss -16.555763158798218 test_loss: -16.455964660644533
epoch: 111 training_loss -16.501920185089112 test_loss: -16.52311553955078
epoch: 112 training_loss -16.51466609954834 test_loss: -16.60286865234375
epoch: 113 training_loss -16.56511559486389 test_loss: -16.569551086425783
epoch: 114 training_loss -16.604596309661865 test_loss: -16.68639373779297
epoch: 115 training_loss -16.548776054382323 test_loss: -16.61772766113281
epoch: 116 training_loss -16.5644193649292 test_loss: -16.563137817382813
epoch: 117 training_loss -16.612021102905274 test_loss: -16.615988159179686
epoch: 118 training_loss -16.589276008605957 test_loss: -16.711405944824218
epoch: 119 training_loss -16.624873085021974 test_loss: -16.639495849609375
epoch: 120 training_loss -16.594548082351686 test_loss: -16.653462219238282
epoch: 121 training_loss -16.574387340545655 test_loss: -16.740733337402343
epoch: 122 training_loss -16.59489186286926 test_loss: -16.685377502441405
epoch: 123 training_loss -16.616590061187743 test_loss: -16.662034606933595
epoch: 124 training_loss -16.57281005859375 test_loss: -16.645584106445312
epoch: 125 training_loss -16.62035933494568 test_loss: -16.6640869140625
epoch: 126 training_loss -16.646838216781617 test_loss: -16.704693603515626
epoch: 127 training_loss -16.688404178619386 test_loss: -16.73985137939453
epoch: 128 training_loss -16.611616678237915 test_loss: -16.636015319824217
epoch: 129 training_loss -16.670871286392213 test_loss: -16.555250549316405
epoch: 130 training_loss -16.68903594017029 test_loss: -16.52616729736328
epoch: 131 training_loss -16.68499638557434 test_loss: -16.56157989501953
epoch: 132 training_loss -16.63736011505127 test_loss: -16.777789306640624
epoch: 133 training_loss -16.72284507751465 test_loss: -16.756733703613282
epoch: 134 training_loss -16.666645374298096 test_loss: -16.64586639404297
epoch: 135 training_loss -16.701052684783935 test_loss: -16.57707977294922
epoch: 136 training_loss -16.71371063232422 test_loss: -16.78809356689453
epoch: 137 training_loss -16.67633840560913 test_loss: -16.824971008300782
epoch: 138 training_loss -16.730311737060546 test_loss: -16.808998107910156
epoch: 139 training_loss -16.666507396697998 test_loss: -16.802923583984374
epoch: 140 training_loss -16.712693367004395 test_loss: -16.780685424804688
epoch: 141 training_loss -16.679110841751097 test_loss: -16.799403381347656
epoch: 142 training_loss -16.722752342224123 test_loss: -16.73584442138672
epoch: 143 training_loss -16.80715435028076 test_loss: -16.841078186035155
epoch: 144 training_loss -16.707077560424803 test_loss: -16.816632080078126
epoch: 145 training_loss -16.736315488815308 test_loss: -16.720297241210936
epoch: 146 training_loss -16.780090045928954 test_loss: -16.82673034667969
epoch: 147 training_loss -16.77064796447754 test_loss: -16.520448303222658
epoch: 148 training_loss -16.705273818969726 test_loss: -16.796868896484376
epoch: 149 training_loss -16.795434093475343 test_loss: -16.779782104492188
2760.036277391874
episode: 0 training return: tensor(-35287.8438, device='cuda:0')
episode: 1 training return: tensor(-24518.5352, device='cuda:0')
episode: 2 training return: tensor(-1.4874e+13, device='cuda:0')
episode: 3 training return: tensor(-10400.1934, device='cuda:0')
epoch: 1 test_true_pfm: 5.269475053217801
episode: 4 training return: tensor(-71637.3359, device='cuda:0')
episode: 5 training return: tensor(-9.2427e+10, device='cuda:0')
episode: 6 training return: tensor(-3.3553e+09, device='cuda:0')
episode: 7 training return: tensor(-1.6107e+12, device='cuda:0')
epoch: 2 test_true_pfm: 71.59119635093894
episode: 8 training return: tensor(-1.9020e+12, device='cuda:0')
episode: 9 training return: tensor(-22192.8223, device='cuda:0')
episode: 10 training return: tensor(-1.0492e+10, device='cuda:0')
episode: 11 training return: tensor(-1.8951e+09, device='cuda:0')
epoch: 3 test_true_pfm: 47.288622057698866
episode: 12 training return: tensor(-2982470.5000, device='cuda:0')
episode: 13 training return: tensor(-109472.2031, device='cuda:0')
episode: 14 training return: tensor(-9231685., device='cuda:0')
episode: 15 training return: tensor(-2620737.5000, device='cuda:0')
epoch: 4 test_true_pfm: -68.62543988535283
episode: 16 training return: tensor(-349017.1562, device='cuda:0')
episode: 17 training return: tensor(-6.4555e+09, device='cuda:0')
episode: 18 training return: tensor(-391363.1250, device='cuda:0')
episode: 19 training return: tensor(-71665.5391, device='cuda:0')
epoch: 5 test_true_pfm: -75.92575586296039
episode: 20 training return: tensor(-82608.1484, device='cuda:0')
episode: 21 training return: tensor(-187920.9375, device='cuda:0')
episode: 22 training return: tensor(-385438.9688, device='cuda:0')
episode: 23 training return: tensor(-39869.9102, device='cuda:0')
epoch: 6 test_true_pfm: -18.00831277254449
episode: 24 training return: tensor(-150168.7656, device='cuda:0')
episode: 25 training return: tensor(-137093.2500, device='cuda:0')
episode: 26 training return: tensor(-383833.0312, device='cuda:0')
episode: 27 training return: tensor(-135558.4844, device='cuda:0')
epoch: 7 test_true_pfm: -0.9641027500316769
episode: 28 training return: tensor(-31582.7695, device='cuda:0')
episode: 29 training return: tensor(-37680.1523, device='cuda:0')
episode: 30 training return: tensor(-46446.9961, device='cuda:0')
episode: 31 training return: tensor(-31075.1777, device='cuda:0')
epoch: 8 test_true_pfm: -3.8355962665839667
episode: 32 training return: tensor(-52699.4883, device='cuda:0')
episode: 33 training return: tensor(-28451.2891, device='cuda:0')
episode: 34 training return: tensor(-32603.1094, device='cuda:0')
episode: 35 training return: tensor(-26421.0254, device='cuda:0')
epoch: 9 test_true_pfm: -0.9355412962557991
episode: 36 training return: tensor(-30490.6621, device='cuda:0')
episode: 37 training return: tensor(-27573.6582, device='cuda:0')
episode: 38 training return: tensor(-31007.5684, device='cuda:0')
episode: 39 training return: tensor(-26306.3027, device='cuda:0')
epoch: 10 test_true_pfm: -5.465875242464189
episode: 40 training return: tensor(-28656.0117, device='cuda:0')
episode: 41 training return: tensor(-26947.9414, device='cuda:0')
episode: 42 training return: tensor(-26975.7852, device='cuda:0')
episode: 43 training return: tensor(-32440.2441, device='cuda:0')
epoch: 11 test_true_pfm: 14.417174600425243
episode: 44 training return: tensor(-31045.4609, device='cuda:0')
episode: 45 training return: tensor(-56637.3711, device='cuda:0')
episode: 46 training return: tensor(-31084.6738, device='cuda:0')
episode: 47 training return: tensor(-25068.8223, device='cuda:0')
epoch: 12 test_true_pfm: -8.564110234577717
episode: 48 training return: tensor(-33231.4727, device='cuda:0')
episode: 49 training return: tensor(-1258268.1250, device='cuda:0')
episode: 50 training return: tensor(-21927.9375, device='cuda:0')
episode: 51 training return: tensor(-35083.4766, device='cuda:0')
epoch: 13 test_true_pfm: -69.73440979009402
episode: 52 training return: tensor(-30267.7520, device='cuda:0')
episode: 53 training return: tensor(-89061312., device='cuda:0')
episode: 54 training return: tensor(-5.5370e+08, device='cuda:0')
episode: 55 training return: tensor(-2.4836e+09, device='cuda:0')
epoch: 14 test_true_pfm: -89.265988164348
episode: 56 training return: tensor(-2.4375e+09, device='cuda:0')
episode: 57 training return: tensor(-1443761., device='cuda:0')
episode: 58 training return: tensor(-2538767., device='cuda:0')
episode: 59 training return: tensor(-7.9976e+09, device='cuda:0')
epoch: 15 test_true_pfm: -64.38187887510401
episode: 60 training return: tensor(-101722.9297, device='cuda:0')
episode: 61 training return: tensor(-76582.6641, device='cuda:0')
episode: 62 training return: tensor(-1411761.5000, device='cuda:0')
episode: 63 training return: tensor(-1.6061e+09, device='cuda:0')
epoch: 16 test_true_pfm: -37.7668848496704
episode: 64 training return: tensor(-1.9283e+09, device='cuda:0')
episode: 65 training return: tensor(-124906.5781, device='cuda:0')
episode: 66 training return: tensor(-1.2125e+10, device='cuda:0')
episode: 67 training return: tensor(-2.3308e+09, device='cuda:0')
epoch: 17 test_true_pfm: -22.38449928366316
episode: 68 training return: tensor(-3.0366e+10, device='cuda:0')
episode: 69 training return: tensor(-61418.7305, device='cuda:0')
episode: 70 training return: tensor(-198510.3438, device='cuda:0')
episode: 71 training return: tensor(-1.2249e+11, device='cuda:0')
epoch: 18 test_true_pfm: -164.121553589072
episode: 72 training return: tensor(-1008039.4375, device='cuda:0')
episode: 73 training return: tensor(-9.0132e+10, device='cuda:0')
episode: 74 training return: tensor(-1071954.8750, device='cuda:0')
episode: 75 training return: tensor(-6671820.5000, device='cuda:0')
epoch: 19 test_true_pfm: 9.262706749580365
episode: 76 training return: tensor(-2.4560e+10, device='cuda:0')
episode: 77 training return: tensor(-4598548., device='cuda:0')
episode: 78 training return: tensor(-390016.6562, device='cuda:0')
episode: 79 training return: tensor(-459411.9688, device='cuda:0')
epoch: 20 test_true_pfm: -36.1074978818851
episode: 80 training return: tensor(-47814.6797, device='cuda:0')
episode: 81 training return: tensor(-4.7695e+10, device='cuda:0')
episode: 82 training return: tensor(-7.8852e+10, device='cuda:0')
episode: 83 training return: tensor(-133876.1094, device='cuda:0')
epoch: 21 test_true_pfm: -3.700445284602201
episode: 84 training return: tensor(-90131.3438, device='cuda:0')
episode: 85 training return: tensor(-33320.2852, device='cuda:0')
episode: 86 training return: tensor(-1100725.2500, device='cuda:0')
episode: 87 training return: tensor(-27148824., device='cuda:0')
epoch: 22 test_true_pfm: -29.785336835675736
episode: 88 training return: tensor(-2.1559e+10, device='cuda:0')
episode: 89 training return: tensor(-241583.8750, device='cuda:0')
episode: 90 training return: tensor(-1.9253e+11, device='cuda:0')
episode: 91 training return: tensor(-1.3922e+13, device='cuda:0')
epoch: 23 test_true_pfm: 14.510519629055134
episode: 92 training return: tensor(-4.9584e+11, device='cuda:0')
episode: 93 training return: tensor(-380003.7500, device='cuda:0')
episode: 94 training return: tensor(-772145.1875, device='cuda:0')
episode: 95 training return: tensor(-2262415.2500, device='cuda:0')
epoch: 24 test_true_pfm: 114.01475583197082
episode: 96 training return: tensor(-3434507., device='cuda:0')
episode: 97 training return: tensor(-1.1733e+08, device='cuda:0')
episode: 98 training return: tensor(-468075.8125, device='cuda:0')
episode: 99 training return: tensor(-5.6884e+11, device='cuda:0')
epoch: 25 test_true_pfm: -42.35350976085471
episode: 100 training return: tensor(-5011516., device='cuda:0')
episode: 101 training return: tensor(-10198556., device='cuda:0')
episode: 102 training return: tensor(-3.6102e+13, device='cuda:0')
episode: 103 training return: tensor(-1.1405e+13, device='cuda:0')
epoch: 26 test_true_pfm: -3.07518663186197
episode: 104 training return: tensor(-8.0875e+10, device='cuda:0')
episode: 105 training return: tensor(-6.9108e+09, device='cuda:0')
episode: 106 training return: tensor(-194310.0156, device='cuda:0')
episode: 107 training return: tensor(-1.3784e+11, device='cuda:0')
epoch: 27 test_true_pfm: -150.76777595144165
episode: 108 training return: tensor(-5521813., device='cuda:0')
episode: 109 training return: tensor(-143583.6719, device='cuda:0')
episode: 110 training return: tensor(-6.9141e+11, device='cuda:0')
episode: 111 training return: tensor(-1.7287e+09, device='cuda:0')
epoch: 28 test_true_pfm: -56.08804405618406
episode: 112 training return: tensor(-493341.0312, device='cuda:0')
episode: 113 training return: tensor(-2.3095e+08, device='cuda:0')
episode: 114 training return: tensor(-142510.5938, device='cuda:0')
episode: 115 training return: tensor(-27671.1035, device='cuda:0')
epoch: 29 test_true_pfm: -152.9916615547421
episode: 116 training return: tensor(-1987827.7500, device='cuda:0')
episode: 117 training return: tensor(-48132116., device='cuda:0')
episode: 118 training return: tensor(-1.3103e+12, device='cuda:0')
episode: 119 training return: tensor(-1.1296e+11, device='cuda:0')
epoch: 30 test_true_pfm: -31.002015407642443
episode: 120 training return: tensor(-281997.5000, device='cuda:0')
episode: 121 training return: tensor(-5.9530e+09, device='cuda:0')
episode: 122 training return: tensor(-229925.0312, device='cuda:0')
episode: 123 training return: tensor(-164918.0469, device='cuda:0')
epoch: 31 test_true_pfm: 13.305513710032587
episode: 124 training return: tensor(-127967.5234, device='cuda:0')
episode: 125 training return: tensor(-417863.6562, device='cuda:0')
episode: 126 training return: tensor(-254880.5625, device='cuda:0')
episode: 127 training return: tensor(-195745.7812, device='cuda:0')
epoch: 32 test_true_pfm: 13.888338108475063
episode: 128 training return: tensor(-1.9350e+09, device='cuda:0')
episode: 129 training return: tensor(-51312352., device='cuda:0')
episode: 130 training return: tensor(-184978.1250, device='cuda:0')
episode: 131 training return: tensor(-114047.2891, device='cuda:0')
epoch: 33 test_true_pfm: 0.8045517569585812
episode: 132 training return: tensor(-195916.2656, device='cuda:0')
episode: 133 training return: tensor(-128776.9688, device='cuda:0')
episode: 134 training return: tensor(-85785.8125, device='cuda:0')
episode: 135 training return: tensor(-151144.0625, device='cuda:0')
epoch: 34 test_true_pfm: -23.890066618284152
episode: 136 training return: tensor(-113141.7422, device='cuda:0')
episode: 137 training return: tensor(-197863.7812, device='cuda:0')
episode: 138 training return: tensor(-262478.1250, device='cuda:0')
episode: 139 training return: tensor(-229723.9531, device='cuda:0')
epoch: 35 test_true_pfm: 61.10320020600423
episode: 140 training return: tensor(-631476.6875, device='cuda:0')
episode: 141 training return: tensor(-324788.6562, device='cuda:0')
episode: 142 training return: tensor(-6.9538e+09, device='cuda:0')
episode: 143 training return: tensor(-3.0721e+09, device='cuda:0')
epoch: 36 test_true_pfm: 53.81544600956263
episode: 144 training return: tensor(-8.8221e+09, device='cuda:0')
episode: 145 training return: tensor(-2.4950e+09, device='cuda:0')
episode: 146 training return: tensor(-2.1502e+09, device='cuda:0')
episode: 147 training return: tensor(-170286.2031, device='cuda:0')
epoch: 37 test_true_pfm: 20.267730633160358
episode: 148 training return: tensor(-153517.2969, device='cuda:0')
episode: 149 training return: tensor(-287350.5938, device='cuda:0')
episode: 150 training return: tensor(-102382.3047, device='cuda:0')
episode: 151 training return: tensor(-118439.0547, device='cuda:0')
epoch: 38 test_true_pfm: -1.933715108082101
episode: 152 training return: tensor(-2.5677e+09, device='cuda:0')
episode: 153 training return: tensor(-4.8936e+09, device='cuda:0')
episode: 154 training return: tensor(-2.3885e+09, device='cuda:0')
episode: 155 training return: tensor(-148169.9219, device='cuda:0')
epoch: 39 test_true_pfm: -17.28869494553035
episode: 156 training return: tensor(-272624.5312, device='cuda:0')
episode: 157 training return: tensor(-175640.1562, device='cuda:0')
episode: 158 training return: tensor(-88745.2734, device='cuda:0')
episode: 159 training return: tensor(-1.9456e+09, device='cuda:0')
epoch: 40 test_true_pfm: -28.23939648595587
episode: 160 training return: tensor(-1.9798e+09, device='cuda:0')
episode: 161 training return: tensor(-1.7428e+09, device='cuda:0')
episode: 162 training return: tensor(-7.0142e+09, device='cuda:0')
episode: 163 training return: tensor(-2.0073e+09, device='cuda:0')
epoch: 41 test_true_pfm: 29.447930342739568
episode: 164 training return: tensor(-2677375.5000, device='cuda:0')
episode: 165 training return: tensor(-177893.2656, device='cuda:0')
episode: 166 training return: tensor(-1.8832e+09, device='cuda:0')
episode: 167 training return: tensor(-2.0492e+09, device='cuda:0')
epoch: 42 test_true_pfm: -37.604481722845755
episode: 168 training return: tensor(-1.9042e+09, device='cuda:0')
episode: 169 training return: tensor(-1630236.1250, device='cuda:0')
episode: 170 training return: tensor(-1.2472e+09, device='cuda:0')
episode: 171 training return: tensor(-225142.9844, device='cuda:0')
epoch: 43 test_true_pfm: 16.26363868611715
episode: 172 training return: tensor(-451780.2500, device='cuda:0')
episode: 173 training return: tensor(-1.5510e+09, device='cuda:0')
episode: 174 training return: tensor(-216170.4219, device='cuda:0')
episode: 175 training return: tensor(-84353.8125, device='cuda:0')
epoch: 44 test_true_pfm: 23.88816916743751
episode: 176 training return: tensor(-137117.7500, device='cuda:0')
episode: 177 training return: tensor(-2.0109e+09, device='cuda:0')
episode: 178 training return: tensor(-186315.2344, device='cuda:0')
episode: 179 training return: tensor(-380102.4062, device='cuda:0')
epoch: 45 test_true_pfm: -12.947813053439374
episode: 180 training return: tensor(-357008.7500, device='cuda:0')
episode: 181 training return: tensor(-103701.0703, device='cuda:0')
episode: 182 training return: tensor(-85553.9297, device='cuda:0')
episode: 183 training return: tensor(-153824.2656, device='cuda:0')
epoch: 46 test_true_pfm: -39.87476803346593
episode: 184 training return: tensor(-257398.7812, device='cuda:0')
episode: 185 training return: tensor(-164349.5156, device='cuda:0')
episode: 186 training return: tensor(-1793975.7500, device='cuda:0')
episode: 187 training return: tensor(-139481.2500, device='cuda:0')
epoch: 47 test_true_pfm: -87.77468082266829
episode: 188 training return: tensor(-536011.2500, device='cuda:0')
episode: 189 training return: tensor(-173729.6719, device='cuda:0')
episode: 190 training return: tensor(-169775.6406, device='cuda:0')
episode: 191 training return: tensor(-535328.3750, device='cuda:0')
epoch: 48 test_true_pfm: -54.733493500728656
episode: 192 training return: tensor(-519133.4375, device='cuda:0')
episode: 193 training return: tensor(-533076.1250, device='cuda:0')
episode: 194 training return: tensor(-535017.1250, device='cuda:0')
episode: 195 training return: tensor(-154366.2031, device='cuda:0')
epoch: 49 test_true_pfm: -38.382756009756584
episode: 196 training return: tensor(-533929.3750, device='cuda:0')
episode: 197 training return: tensor(-13199.2373, device='cuda:0')
episode: 198 training return: tensor(-529839.0625, device='cuda:0')
episode: 199 training return: tensor(-258084.2344, device='cuda:0')
epoch: 50 test_true_pfm: 32.40635070674555
episode: 200 training return: tensor(-33712.7852, device='cuda:0')
episode: 201 training return: tensor(-17219.7480, device='cuda:0')
episode: 202 training return: tensor(-112647.7266, device='cuda:0')
episode: 203 training return: tensor(-28104.0508, device='cuda:0')
epoch: 51 test_true_pfm: -10.713802053391674
episode: 204 training return: tensor(-82575.1641, device='cuda:0')
episode: 205 training return: tensor(-1805893.1250, device='cuda:0')
episode: 206 training return: tensor(-41804.1758, device='cuda:0')
episode: 207 training return: tensor(-81346.3906, device='cuda:0')
epoch: 52 test_true_pfm: -25.070232535891915
episode: 208 training return: tensor(-87862.1172, device='cuda:0')
episode: 209 training return: tensor(-109696.6328, device='cuda:0')
episode: 210 training return: tensor(-36375.9648, device='cuda:0')
episode: 211 training return: tensor(-109527.2422, device='cuda:0')
epoch: 53 test_true_pfm: 57.065848285580614
episode: 212 training return: tensor(-20382.7109, device='cuda:0')
episode: 213 training return: tensor(-188984.0781, device='cuda:0')
episode: 214 training return: tensor(-70360.8281, device='cuda:0')
episode: 215 training return: tensor(-197789.1094, device='cuda:0')
epoch: 54 test_true_pfm: 225.47234207559237
episode: 216 training return: tensor(-86123.7500, device='cuda:0')
episode: 217 training return: tensor(-29495.7773, device='cuda:0')
episode: 218 training return: tensor(-16700.7637, device='cuda:0')
episode: 219 training return: tensor(-19147.1992, device='cuda:0')
epoch: 55 test_true_pfm: 51.96927060688813
episode: 220 training return: tensor(-55971.7148, device='cuda:0')
episode: 221 training return: tensor(-534100.3750, device='cuda:0')
episode: 222 training return: tensor(-534142.3750, device='cuda:0')
episode: 223 training return: tensor(-534987.4375, device='cuda:0')
epoch: 56 test_true_pfm: -41.98875805257321
episode: 224 training return: tensor(-534002.2500, device='cuda:0')
episode: 225 training return: tensor(-535607.5000, device='cuda:0')
episode: 226 training return: tensor(-534021.5625, device='cuda:0')
episode: 227 training return: tensor(-533720.0625, device='cuda:0')
epoch: 57 test_true_pfm: -43.97850618469065
episode: 228 training return: tensor(-534576.7500, device='cuda:0')
episode: 229 training return: tensor(-534033.6875, device='cuda:0')
episode: 230 training return: tensor(-64662.6758, device='cuda:0')
episode: 231 training return: tensor(-534869.8125, device='cuda:0')
epoch: 58 test_true_pfm: -40.90283387213461
episode: 232 training return: tensor(-533341.1250, device='cuda:0')
episode: 233 training return: tensor(-533277.3750, device='cuda:0')
episode: 234 training return: tensor(-533784.1250, device='cuda:0')
episode: 235 training return: tensor(-533730.0625, device='cuda:0')
epoch: 59 test_true_pfm: -41.50503399832738
episode: 236 training return: tensor(-534164.5000, device='cuda:0')
episode: 237 training return: tensor(-2902685.7500, device='cuda:0')
episode: 238 training return: tensor(-74782.0156, device='cuda:0')
episode: 239 training return: tensor(-75071.5625, device='cuda:0')
epoch: 60 test_true_pfm: -46.883250623690266
episode: 240 training return: tensor(-101041.7188, device='cuda:0')
episode: 241 training return: tensor(-2.7200e+09, device='cuda:0')
episode: 242 training return: tensor(-531291.6250, device='cuda:0')
episode: 243 training return: tensor(-36527.9297, device='cuda:0')
epoch: 61 test_true_pfm: -41.09945451656836
episode: 244 training return: tensor(-533796.7500, device='cuda:0')
episode: 245 training return: tensor(-16711.5723, device='cuda:0')
episode: 246 training return: tensor(-13493.6895, device='cuda:0')
episode: 247 training return: tensor(-24698.3086, device='cuda:0')
epoch: 62 test_true_pfm: -16.072538982067783
episode: 248 training return: tensor(-983022.4375, device='cuda:0')
episode: 249 training return: tensor(-8290.0879, device='cuda:0')
episode: 250 training return: tensor(-8413.4619, device='cuda:0')
episode: 251 training return: tensor(-10328.5371, device='cuda:0')
epoch: 63 test_true_pfm: -71.64622330093937
episode: 252 training return: tensor(-962214.5000, device='cuda:0')
episode: 253 training return: tensor(-10661.9932, device='cuda:0')
episode: 254 training return: tensor(-8617.5273, device='cuda:0')
episode: 255 training return: tensor(-12515.7881, device='cuda:0')
epoch: 64 test_true_pfm: 6.360140205619992
episode: 256 training return: tensor(-8082.9902, device='cuda:0')
episode: 257 training return: tensor(-1474997.3750, device='cuda:0')
episode: 258 training return: tensor(-7742.2109, device='cuda:0')
episode: 259 training return: tensor(-11631.6943, device='cuda:0')
epoch: 65 test_true_pfm: 1.6265759562565112
episode: 260 training return: tensor(-11384.4375, device='cuda:0')
episode: 261 training return: tensor(-11311.2578, device='cuda:0')
episode: 262 training return: tensor(-15277.7988, device='cuda:0')
episode: 263 training return: tensor(-12125.6650, device='cuda:0')
epoch: 66 test_true_pfm: 0.11039736986756153
episode: 264 training return: tensor(-11034.8105, device='cuda:0')
episode: 265 training return: tensor(-12486.2314, device='cuda:0')
episode: 266 training return: tensor(-10529.1729, device='cuda:0')
episode: 267 training return: tensor(-11374.7754, device='cuda:0')
epoch: 67 test_true_pfm: -5.334374567189511
episode: 268 training return: tensor(-10628.4707, device='cuda:0')
episode: 269 training return: tensor(-10745.7568, device='cuda:0')
episode: 270 training return: tensor(-10453.0039, device='cuda:0')
episode: 271 training return: tensor(-11837.2285, device='cuda:0')
epoch: 68 test_true_pfm: 91.44811490255303
episode: 272 training return: tensor(-8767.0303, device='cuda:0')
episode: 273 training return: tensor(-9744.8604, device='cuda:0')
episode: 274 training return: tensor(-10860.7881, device='cuda:0')
episode: 275 training return: tensor(-8837.7070, device='cuda:0')
epoch: 69 test_true_pfm: 9.831671478053195
episode: 276 training return: tensor(-7843.9043, device='cuda:0')
episode: 277 training return: tensor(-70201.4062, device='cuda:0')
episode: 278 training return: tensor(-70396.9297, device='cuda:0')
episode: 279 training return: tensor(-11518.9541, device='cuda:0')
epoch: 70 test_true_pfm: -2.442160992078457
episode: 280 training return: tensor(-11730.5098, device='cuda:0')
episode: 281 training return: tensor(-11995.7578, device='cuda:0')
episode: 282 training return: tensor(-11717.2393, device='cuda:0')
episode: 283 training return: tensor(-7504.7905, device='cuda:0')
epoch: 71 test_true_pfm: 81.49566283220668
episode: 284 training return: tensor(-4988.8862, device='cuda:0')
episode: 285 training return: tensor(-11677.8438, device='cuda:0')
episode: 286 training return: tensor(-11137.3691, device='cuda:0')
episode: 287 training return: tensor(-6458.3872, device='cuda:0')
epoch: 72 test_true_pfm: 94.74162808006672
episode: 288 training return: tensor(-4868.4414, device='cuda:0')
episode: 289 training return: tensor(-8612.7822, device='cuda:0')
episode: 290 training return: tensor(-8607.1182, device='cuda:0')
episode: 291 training return: tensor(-4709.6943, device='cuda:0')
epoch: 73 test_true_pfm: -17.951620674336038
episode: 292 training return: tensor(-10191.9199, device='cuda:0')
episode: 293 training return: tensor(-5223.1880, device='cuda:0')
episode: 294 training return: tensor(-7981.4097, device='cuda:0')
episode: 295 training return: tensor(-67106.8828, device='cuda:0')
epoch: 74 test_true_pfm: -25.976451631853752
episode: 296 training return: tensor(-9886.5723, device='cuda:0')
episode: 297 training return: tensor(-8456.3018, device='cuda:0')
episode: 298 training return: tensor(-12560.5322, device='cuda:0')
episode: 299 training return: tensor(-11779.6494, device='cuda:0')
epoch: 75 test_true_pfm: 75.94092929930052
episode: 300 training return: tensor(-7722.0508, device='cuda:0')
episode: 301 training return: tensor(-8202.3076, device='cuda:0')
episode: 302 training return: tensor(-11726.8184, device='cuda:0')
episode: 303 training return: tensor(-11976.4258, device='cuda:0')
epoch: 76 test_true_pfm: -5.1189197529102906
episode: 304 training return: tensor(-10160.8223, device='cuda:0')
episode: 305 training return: tensor(-4305.4790, device='cuda:0')
episode: 306 training return: tensor(-61927.0742, device='cuda:0')
episode: 307 training return: tensor(-67996.0234, device='cuda:0')
epoch: 77 test_true_pfm: -65.59170803592396
episode: 308 training return: tensor(-67874.2109, device='cuda:0')
episode: 309 training return: tensor(-66017.0781, device='cuda:0')
episode: 310 training return: tensor(-68351.9219, device='cuda:0')
episode: 311 training return: tensor(-55250.6953, device='cuda:0')
epoch: 78 test_true_pfm: -3.7379122013520436
episode: 312 training return: tensor(-63624.3711, device='cuda:0')
episode: 313 training return: tensor(-5.5890e+08, device='cuda:0')
episode: 314 training return: tensor(-58223.6445, device='cuda:0')
episode: 315 training return: tensor(-58999.0273, device='cuda:0')
epoch: 79 test_true_pfm: -8.670979347475463
episode: 316 training return: tensor(-58365.9102, device='cuda:0')
episode: 317 training return: tensor(-80110.6562, device='cuda:0')
episode: 318 training return: tensor(-214459.6719, device='cuda:0')
episode: 319 training return: tensor(-233589.3281, device='cuda:0')
epoch: 80 test_true_pfm: 129.19428223376164
episode: 320 training return: tensor(-51726.3750, device='cuda:0')
episode: 321 training return: tensor(-55874.6367, device='cuda:0')
episode: 322 training return: tensor(-60900.9180, device='cuda:0')
episode: 323 training return: tensor(-169190.9375, device='cuda:0')
epoch: 81 test_true_pfm: -8.98421256771188
episode: 324 training return: tensor(-64169.5195, device='cuda:0')
episode: 325 training return: tensor(-91228.6719, device='cuda:0')
episode: 326 training return: tensor(-71659.9219, device='cuda:0')
episode: 327 training return: tensor(-34721.9531, device='cuda:0')
epoch: 82 test_true_pfm: -81.55713541117522
episode: 328 training return: tensor(-1.1138e+09, device='cuda:0')
episode: 329 training return: tensor(-3.0370e+10, device='cuda:0')
episode: 330 training return: tensor(-76029.2500, device='cuda:0')
episode: 331 training return: tensor(-12333.7334, device='cuda:0')
epoch: 83 test_true_pfm: 99.45962498486408
episode: 332 training return: tensor(-12214.9180, device='cuda:0')
episode: 333 training return: tensor(-12404.7119, device='cuda:0')
episode: 334 training return: tensor(-12657.8789, device='cuda:0')
episode: 335 training return: tensor(-13091.6992, device='cuda:0')
epoch: 84 test_true_pfm: 95.12129134028164
episode: 336 training return: tensor(-12262.8174, device='cuda:0')
episode: 337 training return: tensor(-13118.9893, device='cuda:0')
episode: 338 training return: tensor(-12524.1299, device='cuda:0')
episode: 339 training return: tensor(-14921.1152, device='cuda:0')
epoch: 85 test_true_pfm: 101.28980676977523
episode: 340 training return: tensor(-12506.8145, device='cuda:0')
episode: 341 training return: tensor(-12856.2305, device='cuda:0')
episode: 342 training return: tensor(-12288.6865, device='cuda:0')
episode: 343 training return: tensor(-14677.8975, device='cuda:0')
epoch: 86 test_true_pfm: 179.9589500618599
episode: 344 training return: tensor(-14755.5459, device='cuda:0')
episode: 345 training return: tensor(-12675.0957, device='cuda:0')
episode: 346 training return: tensor(-13115.7959, device='cuda:0')
episode: 347 training return: tensor(-23639.9336, device='cuda:0')
epoch: 87 test_true_pfm: 299.58821534704356
episode: 348 training return: tensor(-21582.5410, device='cuda:0')
episode: 349 training return: tensor(-66576.7734, device='cuda:0')
episode: 350 training return: tensor(-13051.9854, device='cuda:0')
episode: 351 training return: tensor(-24797.7656, device='cuda:0')
epoch: 88 test_true_pfm: 183.81001516898314
episode: 352 training return: tensor(-14991.4697, device='cuda:0')
episode: 353 training return: tensor(-67615.0391, device='cuda:0')
episode: 354 training return: tensor(-22736.7793, device='cuda:0')
episode: 355 training return: tensor(-19695.0801, device='cuda:0')
epoch: 89 test_true_pfm: 203.86331494389182
episode: 356 training return: tensor(-17407.3379, device='cuda:0')
episode: 357 training return: tensor(-87150.1328, device='cuda:0')
episode: 358 training return: tensor(-13408.4092, device='cuda:0')
episode: 359 training return: tensor(-16501.3066, device='cuda:0')
epoch: 90 test_true_pfm: 239.76693606763
episode: 360 training return: tensor(-15688.5156, device='cuda:0')
episode: 361 training return: tensor(-16917.3750, device='cuda:0')
episode: 362 training return: tensor(-19026.9961, device='cuda:0')
episode: 363 training return: tensor(-16412.0918, device='cuda:0')
epoch: 91 test_true_pfm: 103.93845331458719
episode: 364 training return: tensor(-12518.1309, device='cuda:0')
episode: 365 training return: tensor(-12197.8271, device='cuda:0')
episode: 366 training return: tensor(-37584.8281, device='cuda:0')
episode: 367 training return: tensor(-15960.8789, device='cuda:0')
epoch: 92 test_true_pfm: 221.0300040282671
episode: 368 training return: tensor(-16842.2832, device='cuda:0')
episode: 369 training return: tensor(-4485809.5000, device='cuda:0')
episode: 370 training return: tensor(-12130.0898, device='cuda:0')
episode: 371 training return: tensor(-13716.5420, device='cuda:0')
epoch: 93 test_true_pfm: 121.24097319446075
episode: 372 training return: tensor(-12662.4932, device='cuda:0')
episode: 373 training return: tensor(-12228.1719, device='cuda:0')
episode: 374 training return: tensor(-12996.8506, device='cuda:0')
episode: 375 training return: tensor(-14043.8643, device='cuda:0')
epoch: 94 test_true_pfm: 129.65334361104965
episode: 376 training return: tensor(-13097.0566, device='cuda:0')
episode: 377 training return: tensor(-13122.2998, device='cuda:0')
episode: 378 training return: tensor(-13561.7998, device='cuda:0')
episode: 379 training return: tensor(-15768.3574, device='cuda:0')
epoch: 95 test_true_pfm: 149.28265855180754
episode: 380 training return: tensor(-13296.8408, device='cuda:0')
episode: 381 training return: tensor(-12971.6309, device='cuda:0')
episode: 382 training return: tensor(-13666.9902, device='cuda:0')
episode: 383 training return: tensor(-12623.3252, device='cuda:0')
epoch: 96 test_true_pfm: 115.99200760172276
episode: 384 training return: tensor(-12736.9365, device='cuda:0')
episode: 385 training return: tensor(-13132.9873, device='cuda:0')
episode: 386 training return: tensor(-12837.9561, device='cuda:0')
episode: 387 training return: tensor(-12508.0537, device='cuda:0')
epoch: 97 test_true_pfm: 93.84938236574085
episode: 388 training return: tensor(-13017.4707, device='cuda:0')
episode: 389 training return: tensor(-12201.1709, device='cuda:0')
episode: 390 training return: tensor(-12357.3516, device='cuda:0')
episode: 391 training return: tensor(-12256.0312, device='cuda:0')
epoch: 98 test_true_pfm: 100.27979907119844
episode: 392 training return: tensor(-12174.4170, device='cuda:0')
episode: 393 training return: tensor(-12224.1025, device='cuda:0')
episode: 394 training return: tensor(-12369.1436, device='cuda:0')
episode: 395 training return: tensor(-12299.4795, device='cuda:0')
epoch: 99 test_true_pfm: 113.08368384498469
episode: 396 training return: tensor(-12381.1729, device='cuda:0')
episode: 397 training return: tensor(-12451.3379, device='cuda:0')
episode: 398 training return: tensor(-12509.3008, device='cuda:0')
episode: 399 training return: tensor(-12277.2793, device='cuda:0')
epoch: 100 test_true_pfm: 114.49347774013408
episode: 400 training return: tensor(-12115.7168, device='cuda:0')
episode: 401 training return: tensor(-64816.6328, device='cuda:0')
episode: 402 training return: tensor(-22397.1465, device='cuda:0')
episode: 403 training return: tensor(-13380.9180, device='cuda:0')
epoch: 101 test_true_pfm: 109.54072663731574
episode: 404 training return: tensor(-81531.8672, device='cuda:0')
episode: 405 training return: tensor(-12985.5586, device='cuda:0')
episode: 406 training return: tensor(-13870.9600, device='cuda:0')
episode: 407 training return: tensor(-13666.9746, device='cuda:0')
epoch: 102 test_true_pfm: 221.80755029527518
episode: 408 training return: tensor(-14197.8213, device='cuda:0')
episode: 409 training return: tensor(-12271.7549, device='cuda:0')
episode: 410 training return: tensor(-12228.4414, device='cuda:0')
episode: 411 training return: tensor(-12172.0225, device='cuda:0')
epoch: 103 test_true_pfm: 101.37931422805742
episode: 412 training return: tensor(-12176.2178, device='cuda:0')
episode: 413 training return: tensor(-12105.6074, device='cuda:0')
episode: 414 training return: tensor(-12124.0342, device='cuda:0')
episode: 415 training return: tensor(-12111.7900, device='cuda:0')
epoch: 104 test_true_pfm: 99.30705842749701
episode: 416 training return: tensor(-12297.8701, device='cuda:0')
episode: 417 training return: tensor(-15076.5273, device='cuda:0')
episode: 418 training return: tensor(-16216.7188, device='cuda:0')
episode: 419 training return: tensor(-16716.1348, device='cuda:0')
epoch: 105 test_true_pfm: 149.6610568419981
episode: 420 training return: tensor(-15873.1924, device='cuda:0')
episode: 421 training return: tensor(-299368.9375, device='cuda:0')
episode: 422 training return: tensor(-296766., device='cuda:0')
episode: 423 training return: tensor(-12227.0039, device='cuda:0')
epoch: 106 test_true_pfm: 99.57890082442509
episode: 424 training return: tensor(-11957.1406, device='cuda:0')
episode: 425 training return: tensor(-366844.2812, device='cuda:0')
episode: 426 training return: tensor(-68420.5312, device='cuda:0')
episode: 427 training return: tensor(-76261.2422, device='cuda:0')
epoch: 107 test_true_pfm: 98.16175655668441
episode: 428 training return: tensor(-12104.9404, device='cuda:0')
episode: 429 training return: tensor(-12195.4141, device='cuda:0')
episode: 430 training return: tensor(-12365.4873, device='cuda:0')
episode: 431 training return: tensor(-1854976.8750, device='cuda:0')
epoch: 108 test_true_pfm: 96.19421229218976
episode: 432 training return: tensor(-12022.1367, device='cuda:0')
episode: 433 training return: tensor(-12271.7383, device='cuda:0')
episode: 434 training return: tensor(-12232.0967, device='cuda:0')
episode: 435 training return: tensor(-12137.5449, device='cuda:0')
epoch: 109 test_true_pfm: 96.96274188111569
episode: 436 training return: tensor(-12219.4580, device='cuda:0')
episode: 437 training return: tensor(-12212.6729, device='cuda:0')
episode: 438 training return: tensor(-11983.1191, device='cuda:0')
episode: 439 training return: tensor(-60670.8242, device='cuda:0')
epoch: 110 test_true_pfm: 148.7342066242234
episode: 440 training return: tensor(-22233.8633, device='cuda:0')
episode: 441 training return: tensor(-296099.7812, device='cuda:0')
episode: 442 training return: tensor(-58204.4492, device='cuda:0')
episode: 443 training return: tensor(-1876113.3750, device='cuda:0')
epoch: 111 test_true_pfm: -8.677804781268232
episode: 444 training return: tensor(-108762.8359, device='cuda:0')
episode: 445 training return: tensor(-103747.3984, device='cuda:0')
episode: 446 training return: tensor(-297010.5000, device='cuda:0')
episode: 447 training return: tensor(-384154.3125, device='cuda:0')
epoch: 112 test_true_pfm: 63.16454845236211
episode: 448 training return: tensor(-78270.9297, device='cuda:0')
episode: 449 training return: tensor(-323280.5625, device='cuda:0')
episode: 450 training return: tensor(-297538.8125, device='cuda:0')
episode: 451 training return: tensor(-59176.7305, device='cuda:0')
epoch: 113 test_true_pfm: 99.41808100128189
episode: 452 training return: tensor(-75385.0859, device='cuda:0')
episode: 453 training return: tensor(-1.6342e+13, device='cuda:0')
episode: 454 training return: tensor(-1.9301e+12, device='cuda:0')
episode: 455 training return: tensor(-4.3345e+13, device='cuda:0')
epoch: 114 test_true_pfm: 308.82416156430446
episode: 456 training return: tensor(-1.0036e+14, device='cuda:0')
episode: 457 training return: tensor(-1.2099e+14, device='cuda:0')
episode: 458 training return: tensor(-6.8731e+10, device='cuda:0')
episode: 459 training return: tensor(-3.2129e+09, device='cuda:0')
epoch: 115 test_true_pfm: 44.42224173303365
episode: 460 training return: tensor(-14152.5225, device='cuda:0')
episode: 461 training return: tensor(-49742.9297, device='cuda:0')
episode: 462 training return: tensor(-3.5339e+09, device='cuda:0')
episode: 463 training return: tensor(-115691.6797, device='cuda:0')
epoch: 116 test_true_pfm: -20.245130989005034
episode: 464 training return: tensor(-1.8955e+12, device='cuda:0')
episode: 465 training return: tensor(-58747.8359, device='cuda:0')
episode: 466 training return: tensor(-286167.2812, device='cuda:0')
episode: 467 training return: tensor(-1989075.6250, device='cuda:0')
epoch: 117 test_true_pfm: 87.0690609367319
episode: 468 training return: tensor(-15455.7793, device='cuda:0')
episode: 469 training return: tensor(-163501.1875, device='cuda:0')
episode: 470 training return: tensor(-2719596.2500, device='cuda:0')
episode: 471 training return: tensor(-290884.2812, device='cuda:0')
epoch: 118 test_true_pfm: -118.05018927211927
episode: 472 training return: tensor(-139477.7812, device='cuda:0')
episode: 473 training return: tensor(-202445.4688, device='cuda:0')
episode: 474 training return: tensor(-41656.6953, device='cuda:0')
episode: 475 training return: tensor(-18885.5254, device='cuda:0')
epoch: 119 test_true_pfm: -275.1216931781036
episode: 476 training return: tensor(-1.2754e+08, device='cuda:0')
episode: 477 training return: tensor(-448726.5625, device='cuda:0')
episode: 478 training return: tensor(-120714.1094, device='cuda:0')
episode: 479 training return: tensor(-2419200.5000, device='cuda:0')
epoch: 120 test_true_pfm: 12.741267109923527
episode: 480 training return: tensor(-39833.7617, device='cuda:0')
episode: 481 training return: tensor(-15963.5283, device='cuda:0')
episode: 482 training return: tensor(-23975.3301, device='cuda:0')
episode: 483 training return: tensor(-62345.6094, device='cuda:0')
epoch: 121 test_true_pfm: -405.2149041529222
episode: 484 training return: tensor(-306128.7812, device='cuda:0')
episode: 485 training return: tensor(-36529.7070, device='cuda:0')
episode: 486 training return: tensor(-13537.7822, device='cuda:0')
episode: 487 training return: tensor(-510423.9062, device='cuda:0')
epoch: 122 test_true_pfm: -81.77980839369859
episode: 488 training return: tensor(-26618.9863, device='cuda:0')
episode: 489 training return: tensor(-54071.6211, device='cuda:0')
episode: 490 training return: tensor(-34323.0742, device='cuda:0')
episode: 491 training return: tensor(-9958.3203, device='cuda:0')
epoch: 123 test_true_pfm: 57.796047913533975
episode: 492 training return: tensor(-111348.0703, device='cuda:0')
episode: 493 training return: tensor(-93091.0156, device='cuda:0')
episode: 494 training return: tensor(-50215.3945, device='cuda:0')
episode: 495 training return: tensor(-52480.4219, device='cuda:0')
epoch: 124 test_true_pfm: -150.22101163103892
episode: 496 training return: tensor(-90765.9219, device='cuda:0')
episode: 497 training return: tensor(-39330.4297, device='cuda:0')
episode: 498 training return: tensor(-15698.7500, device='cuda:0')
episode: 499 training return: tensor(-27386.5273, device='cuda:0')
epoch: 125 test_true_pfm: -393.8527686841776
episode: 500 training return: tensor(-52566.5820, device='cuda:0')
episode: 501 training return: tensor(-58821.5938, device='cuda:0')
episode: 502 training return: tensor(-26350.0898, device='cuda:0')
episode: 503 training return: tensor(-16590.5879, device='cuda:0')
epoch: 126 test_true_pfm: -200.51684904107287
episode: 504 training return: tensor(-24744.8418, device='cuda:0')
episode: 505 training return: tensor(-26423.4375, device='cuda:0')
episode: 506 training return: tensor(-16804.1016, device='cuda:0')
episode: 507 training return: tensor(-12384.1699, device='cuda:0')
epoch: 127 test_true_pfm: -66.28787399467127
episode: 508 training return: tensor(-53793.9453, device='cuda:0')
episode: 509 training return: tensor(-48620.5664, device='cuda:0')
episode: 510 training return: tensor(-15497.8203, device='cuda:0')
episode: 511 training return: tensor(-52438.4883, device='cuda:0')
epoch: 128 test_true_pfm: -28.041185830941416
episode: 512 training return: tensor(-12064.0586, device='cuda:0')
episode: 513 training return: tensor(-30150.6934, device='cuda:0')
episode: 514 training return: tensor(-15707.3105, device='cuda:0')
episode: 515 training return: tensor(-17178.0723, device='cuda:0')
epoch: 129 test_true_pfm: -30.948208652412635
episode: 516 training return: tensor(-18463.9277, device='cuda:0')
episode: 517 training return: tensor(-15222.9375, device='cuda:0')
episode: 518 training return: tensor(-14244.1699, device='cuda:0')
episode: 519 training return: tensor(-15315.9717, device='cuda:0')
epoch: 130 test_true_pfm: -106.70583410060533
episode: 520 training return: tensor(-13660.6416, device='cuda:0')
episode: 521 training return: tensor(-58225.0938, device='cuda:0')
episode: 522 training return: tensor(-70200.1562, device='cuda:0')
episode: 523 training return: tensor(-81710.1328, device='cuda:0')
epoch: 131 test_true_pfm: -127.8824563214777
episode: 524 training return: tensor(-75789.5234, device='cuda:0')
episode: 525 training return: tensor(-122992.6406, device='cuda:0')
episode: 526 training return: tensor(-69228.1250, device='cuda:0')
episode: 527 training return: tensor(-135524.9219, device='cuda:0')
epoch: 132 test_true_pfm: -60.88394929178821
episode: 528 training return: tensor(-113924.9766, device='cuda:0')
episode: 529 training return: tensor(-74045.3906, device='cuda:0')
episode: 530 training return: tensor(-39283.8086, device='cuda:0')
episode: 531 training return: tensor(-15076.3799, device='cuda:0')
epoch: 133 test_true_pfm: 40.25527647359582
episode: 532 training return: tensor(-12106.9453, device='cuda:0')
episode: 533 training return: tensor(-15843.0010, device='cuda:0')
episode: 534 training return: tensor(-122221.7422, device='cuda:0')
episode: 535 training return: tensor(-103745.8203, device='cuda:0')
epoch: 134 test_true_pfm: -41.16992932923517
episode: 536 training return: tensor(-124670.8125, device='cuda:0')
episode: 537 training return: tensor(-87219.6016, device='cuda:0')
episode: 538 training return: tensor(-95143.6797, device='cuda:0')
episode: 539 training return: tensor(-176230.6250, device='cuda:0')
epoch: 135 test_true_pfm: -57.08988452620889
episode: 540 training return: tensor(-77812.6875, device='cuda:0')
episode: 541 training return: tensor(-85780.9688, device='cuda:0')
episode: 542 training return: tensor(-93964.8438, device='cuda:0')
episode: 543 training return: tensor(-13073.8457, device='cuda:0')
epoch: 136 test_true_pfm: -184.40406752062233
episode: 544 training return: tensor(-74666.5000, device='cuda:0')
episode: 545 training return: tensor(-25074.3574, device='cuda:0')
episode: 546 training return: tensor(-91632.5469, device='cuda:0')
episode: 547 training return: tensor(-75662.0156, device='cuda:0')
epoch: 137 test_true_pfm: -141.7988468862152
episode: 548 training return: tensor(-82192.5938, device='cuda:0')
episode: 549 training return: tensor(-120576.2266, device='cuda:0')
episode: 550 training return: tensor(-88194.9297, device='cuda:0')
episode: 551 training return: tensor(-108422.3906, device='cuda:0')
epoch: 138 test_true_pfm: 16.521987868062467
episode: 552 training return: tensor(-93099.9375, device='cuda:0')
episode: 553 training return: tensor(-166158.7969, device='cuda:0')
episode: 554 training return: tensor(-88601.5000, device='cuda:0')
episode: 555 training return: tensor(-83761.6094, device='cuda:0')
epoch: 139 test_true_pfm: -165.033693039489
episode: 556 training return: tensor(-87119.1797, device='cuda:0')
episode: 557 training return: tensor(-110729.8516, device='cuda:0')
episode: 558 training return: tensor(-75910.9453, device='cuda:0')
episode: 559 training return: tensor(-13280.1162, device='cuda:0')
epoch: 140 test_true_pfm: -84.7973732815978
episode: 560 training return: tensor(-77151.1953, device='cuda:0')
episode: 561 training return: tensor(-82459.3984, device='cuda:0')
episode: 562 training return: tensor(-117902.1641, device='cuda:0')
episode: 563 training return: tensor(-73018.0156, device='cuda:0')
epoch: 141 test_true_pfm: -44.084959012538754
episode: 564 training return: tensor(-29093.4180, device='cuda:0')
episode: 565 training return: tensor(-94059.0391, device='cuda:0')
episode: 566 training return: tensor(-101890.5156, device='cuda:0')
episode: 567 training return: tensor(-101033.2891, device='cuda:0')
epoch: 142 test_true_pfm: 106.00057625434778
episode: 568 training return: tensor(-38469.5156, device='cuda:0')
episode: 569 training return: tensor(-132168.8750, device='cuda:0')
episode: 570 training return: tensor(-93465.5625, device='cuda:0')
episode: 571 training return: tensor(-82022.0625, device='cuda:0')
epoch: 143 test_true_pfm: -184.6181934974586
episode: 572 training return: tensor(-133666.5000, device='cuda:0')
episode: 573 training return: tensor(-95592.0781, device='cuda:0')
episode: 574 training return: tensor(-102853.0781, device='cuda:0')
episode: 575 training return: tensor(-52150.4375, device='cuda:0')
epoch: 144 test_true_pfm: -66.2485431757287
episode: 576 training return: tensor(-63539.1953, device='cuda:0')
episode: 577 training return: tensor(-95508.1016, device='cuda:0')
episode: 578 training return: tensor(-144975.7812, device='cuda:0')
episode: 579 training return: tensor(-98516.6719, device='cuda:0')
epoch: 145 test_true_pfm: -209.82213807162506
episode: 580 training return: tensor(-154952.3750, device='cuda:0')
episode: 581 training return: tensor(-164523.0156, device='cuda:0')
episode: 582 training return: tensor(-64198.6211, device='cuda:0')
episode: 583 training return: tensor(-71318.2188, device='cuda:0')
epoch: 146 test_true_pfm: -44.49006779170126
episode: 584 training return: tensor(-1037517.4375, device='cuda:0')
episode: 585 training return: tensor(-78444.7500, device='cuda:0')
episode: 586 training return: tensor(-115334.6328, device='cuda:0')
episode: 587 training return: tensor(-97572.0781, device='cuda:0')
epoch: 147 test_true_pfm: -98.59202860515286
episode: 588 training return: tensor(-49840.8516, device='cuda:0')
episode: 589 training return: tensor(-49514.7539, device='cuda:0')
episode: 590 training return: tensor(-49289.5781, device='cuda:0')
episode: 591 training return: tensor(-49960.8984, device='cuda:0')
epoch: 148 test_true_pfm: -135.2084618644344
episode: 592 training return: tensor(-53612.7227, device='cuda:0')
episode: 593 training return: tensor(-82065.1641, device='cuda:0')
episode: 594 training return: tensor(-73371.7734, device='cuda:0')
episode: 595 training return: tensor(-51810.1602, device='cuda:0')
epoch: 149 test_true_pfm: -144.66382080716457
episode: 596 training return: tensor(-51302.9102, device='cuda:0')
episode: 597 training return: tensor(-58746.8555, device='cuda:0')
episode: 598 training return: tensor(-52791.3281, device='cuda:0')
episode: 599 training return: tensor(-50176.7344, device='cuda:0')
epoch: 150 test_true_pfm: -137.8519975914256
