['--alg', 'ddpg', '--env', 'Swimmer-v3', '--learn', 'brac', '--traj', 'medium', '--seed', '1', '--data', '100000']
epoch: 0 training_loss 4.300185980796814 test_loss: 3.048776626586914
epoch: 1 training_loss 2.4179249668121336 test_loss: 1.8992841720581055
epoch: 2 training_loss 1.5540832430124283 test_loss: 1.0987861633300782
epoch: 3 training_loss 0.9373025810718536 test_loss: 0.6951258182525635
epoch: 4 training_loss 0.44875134378671644 test_loss: 0.23217697143554689
epoch: 5 training_loss 0.08446293285582214 test_loss: -0.07633833289146423
epoch: 6 training_loss -0.17124836172908545 test_loss: -0.3094827890396118
epoch: 7 training_loss -0.41844399116933345 test_loss: -0.5546710968017579
epoch: 8 training_loss -0.577078605517745 test_loss: -0.6222895622253418
epoch: 9 training_loss -0.8047466917335987 test_loss: -0.8445442199707032
epoch: 10 training_loss -0.9635381802916527 test_loss: -1.1586179733276367
epoch: 11 training_loss -1.2020887321233749 test_loss: -1.356519603729248
epoch: 12 training_loss -1.2778052487969398 test_loss: -1.551805019378662
epoch: 13 training_loss -1.5158120274543763 test_loss: -1.7103607177734375
epoch: 14 training_loss -1.6030105423927308 test_loss: -1.902425193786621
epoch: 15 training_loss -1.7795488560199737 test_loss: -1.9460369110107423
epoch: 16 training_loss -1.8107822513580323 test_loss: -1.9601036071777345
epoch: 17 training_loss -2.0516259014606475 test_loss: -1.9935270309448243
epoch: 18 training_loss -2.154687546491623 test_loss: -2.1551013946533204
epoch: 19 training_loss -2.2646676647663115 test_loss: -2.215575408935547
epoch: 20 training_loss -2.191957598924637 test_loss: -2.3201417922973633
epoch: 21 training_loss -2.2995741510391237 test_loss: -2.300347328186035
epoch: 22 training_loss -2.447739714384079 test_loss: -2.461191749572754
epoch: 23 training_loss -2.5308080923557283 test_loss: -2.5943170547485352
epoch: 24 training_loss -2.5781957232952117 test_loss: -2.5503232955932615
epoch: 25 training_loss -2.6363981127738954 test_loss: -2.7384227752685546
epoch: 26 training_loss -2.6763924241065977 test_loss: -2.7136783599853516
epoch: 27 training_loss -2.667488751411438 test_loss: -2.7072607040405274
epoch: 28 training_loss -2.8109592258930207 test_loss: -2.8833745956420898
epoch: 29 training_loss -2.915768172740936 test_loss: -2.953622055053711
epoch: 30 training_loss -2.8356112957000734 test_loss: -2.873847770690918
epoch: 31 training_loss -3.022889711856842 test_loss: -2.9922256469726562
epoch: 32 training_loss -3.0418721628189087 test_loss: -2.957563591003418
epoch: 33 training_loss -3.0787611079216 test_loss: -3.144448471069336
epoch: 34 training_loss -3.122003209590912 test_loss: -3.16231632232666
epoch: 35 training_loss -3.1980409026145935 test_loss: -3.216337203979492
epoch: 36 training_loss -3.2426111197471617 test_loss: -3.303507995605469
epoch: 37 training_loss -3.2257168841362 test_loss: -3.152353858947754
epoch: 38 training_loss -3.2749235892295836 test_loss: -3.410696792602539
epoch: 39 training_loss -3.384316062927246 test_loss: -3.3032989501953125
epoch: 40 training_loss -3.264410526752472 test_loss: -3.3359195709228517
epoch: 41 training_loss -3.4336972188949586 test_loss: -3.4921649932861327
epoch: 42 training_loss -3.3039884400367736 test_loss: -3.249324417114258
epoch: 43 training_loss -3.5252902150154113 test_loss: -3.5125926971435546
epoch: 44 training_loss -3.51599730014801 test_loss: -3.4369014739990233
epoch: 45 training_loss -3.5336164951324465 test_loss: -3.5352779388427735
epoch: 46 training_loss -3.5522133827209474 test_loss: -3.5968254089355467
epoch: 47 training_loss -3.5922653317451476 test_loss: -3.6339271545410154
epoch: 48 training_loss -3.6029484915733336 test_loss: -3.727542495727539
epoch: 49 training_loss -3.701983463764191 test_loss: -3.5836109161376952
epoch: 50 training_loss -3.691152341365814 test_loss: -3.8071529388427736
epoch: 51 training_loss -3.6931551599502566 test_loss: -3.754111099243164
epoch: 52 training_loss -3.7346512866020203 test_loss: -3.7003410339355467
epoch: 53 training_loss -3.8175812768936157 test_loss: -3.7587257385253907
epoch: 54 training_loss -3.8077909469604494 test_loss: -3.821931076049805
epoch: 55 training_loss -3.755083701610565 test_loss: -3.8420387268066407
epoch: 56 training_loss -3.895935819149017 test_loss: -3.9436511993408203
epoch: 57 training_loss -3.874473011493683 test_loss: -3.8062511444091798
epoch: 58 training_loss -3.8639729237556457 test_loss: -3.8934383392333984
epoch: 59 training_loss -3.804922387599945 test_loss: -3.8840023040771485
epoch: 60 training_loss -3.9429495525360108 test_loss: -4.017520904541016
epoch: 61 training_loss -4.026122591495514 test_loss: -3.976055908203125
epoch: 62 training_loss -4.020106430053711 test_loss: -4.028633499145508
epoch: 63 training_loss -3.9524096632003785 test_loss: -4.002480697631836
epoch: 64 training_loss -3.9915403628349306 test_loss: -4.090755844116211
epoch: 65 training_loss -4.0593266201019285 test_loss: -4.068696975708008
epoch: 66 training_loss -4.097932851314544 test_loss: -4.076998519897461
epoch: 67 training_loss -4.1456004238128665 test_loss: -4.1600486755371096
epoch: 68 training_loss -4.026860105991363 test_loss: -4.097406387329102
epoch: 69 training_loss -4.085684161186219 test_loss: -4.170981597900391
epoch: 70 training_loss -4.1325016641616825 test_loss: -4.191789627075195
epoch: 71 training_loss -4.170042822360992 test_loss: -4.191404724121094
epoch: 72 training_loss -4.163432321548462 test_loss: -4.286521148681641
epoch: 73 training_loss -4.235316627025604 test_loss: -4.268516540527344
epoch: 74 training_loss -4.238812007904053 test_loss: -4.220157241821289
epoch: 75 training_loss -4.26664279460907 test_loss: -4.244134902954102
epoch: 76 training_loss -4.269717814922333 test_loss: -4.1903739929199215
epoch: 77 training_loss -4.252382078170776 test_loss: -4.411083221435547
epoch: 78 training_loss -4.301332771778107 test_loss: -4.2700347900390625
epoch: 79 training_loss -4.3650339770317075 test_loss: -4.329692459106445
epoch: 80 training_loss -4.35274436712265 test_loss: -4.343756866455078
epoch: 81 training_loss -4.358977360725403 test_loss: -4.282312393188477
epoch: 82 training_loss -4.353881895542145 test_loss: -4.398667907714843
epoch: 83 training_loss -4.177559030056 test_loss: -4.304466629028321
epoch: 84 training_loss -4.287458918094635 test_loss: -4.227144241333008
epoch: 85 training_loss -4.366720407009125 test_loss: -4.411522293090821
epoch: 86 training_loss -4.406109938621521 test_loss: -4.4574462890625
epoch: 87 training_loss -4.451785984039307 test_loss: -4.517169952392578
epoch: 88 training_loss -4.4182371592521665 test_loss: -4.303839874267578
epoch: 89 training_loss -4.4469260978698735 test_loss: -4.373293685913086
epoch: 90 training_loss -4.420689826011658 test_loss: -4.39929084777832
epoch: 91 training_loss -4.502050762176514 test_loss: -4.530321502685547
epoch: 92 training_loss -4.549318590164185 test_loss: -4.396981811523437
epoch: 93 training_loss -4.515302414894104 test_loss: -4.499039840698242
epoch: 94 training_loss -4.504241454601288 test_loss: -4.430238342285156
epoch: 95 training_loss -4.514834718704224 test_loss: -4.429191970825196
epoch: 96 training_loss -4.51838889837265 test_loss: -4.5715385437011715
epoch: 97 training_loss -4.533257718086243 test_loss: -4.611978530883789
epoch: 98 training_loss -4.65308560848236 test_loss: -4.534746170043945
epoch: 99 training_loss -4.611275627613067 test_loss: -4.6198280334472654
epoch: 100 training_loss -4.5717923545837404 test_loss: -4.565085983276367
epoch: 101 training_loss -4.5783347368240355 test_loss: -4.610186767578125
epoch: 102 training_loss -4.625217590332031 test_loss: -4.562821960449218
epoch: 103 training_loss -4.663594799041748 test_loss: -4.712298583984375
epoch: 104 training_loss -4.659852633476257 test_loss: -4.6186870574951175
epoch: 105 training_loss -4.6561892175674435 test_loss: -4.604742813110351
epoch: 106 training_loss -4.669543752670288 test_loss: -4.732439041137695
epoch: 107 training_loss -4.747053446769715 test_loss: -4.686299133300781
epoch: 108 training_loss -4.6676587390899655 test_loss: -4.679698944091797
epoch: 109 training_loss -4.741407375335694 test_loss: -4.805088043212891
epoch: 110 training_loss -4.762407217025757 test_loss: -4.841872024536133
epoch: 111 training_loss -4.763566522598267 test_loss: -4.708975982666016
epoch: 112 training_loss -4.739553647041321 test_loss: -4.723412322998047
epoch: 113 training_loss -4.7564147233963014 test_loss: -4.695166015625
epoch: 114 training_loss -4.735609669685363 test_loss: -4.887804794311523
epoch: 115 training_loss -4.7982653045654295 test_loss: -4.790489196777344
epoch: 116 training_loss -4.769100089073181 test_loss: -4.755591583251953
epoch: 117 training_loss -4.85625750541687 test_loss: -4.8202980041503904
epoch: 118 training_loss -4.789544830322265 test_loss: -4.764284133911133
epoch: 119 training_loss -4.888682522773743 test_loss: -4.809868240356446
epoch: 120 training_loss -4.882203054428101 test_loss: -4.8309375762939455
epoch: 121 training_loss -4.901378207206726 test_loss: -4.900455093383789
epoch: 122 training_loss -4.878401932716369 test_loss: -4.918374252319336
epoch: 123 training_loss -4.902275981903077 test_loss: -5.0653831481933596
epoch: 124 training_loss -4.9115621900558475 test_loss: -4.961955642700195
epoch: 125 training_loss -4.875863637924194 test_loss: -4.950732421875
epoch: 126 training_loss -4.9705612754821775 test_loss: -5.0210918426513675
epoch: 127 training_loss -5.015969944000244 test_loss: -4.987334823608398
epoch: 128 training_loss -4.9951324367523195 test_loss: -4.996559906005859
epoch: 129 training_loss -5.031080150604248 test_loss: -4.961026382446289
epoch: 130 training_loss -5.012812633514404 test_loss: -5.022103118896484
epoch: 131 training_loss -5.039982414245605 test_loss: -4.974357223510742
epoch: 132 training_loss -5.021485652923584 test_loss: -5.082594299316407
epoch: 133 training_loss -5.0578848695755 test_loss: -5.055383682250977
epoch: 134 training_loss -5.072301430702209 test_loss: -5.126778411865234
epoch: 135 training_loss -5.115146732330322 test_loss: -5.149191284179688
epoch: 136 training_loss -5.1034019184112545 test_loss: -5.091721725463867
epoch: 137 training_loss -5.129255995750428 test_loss: -5.218462753295898
epoch: 138 training_loss -5.134950094223022 test_loss: -5.150250625610352
epoch: 139 training_loss -5.142587895393372 test_loss: -5.141445922851562
epoch: 140 training_loss -5.157180762290954 test_loss: -5.139433670043945
epoch: 141 training_loss -5.1564374971389775 test_loss: -5.078065490722656
epoch: 142 training_loss -5.186173920631409 test_loss: -5.105189514160156
epoch: 143 training_loss -5.197880210876465 test_loss: -5.273166656494141
epoch: 144 training_loss -5.201042928695679 test_loss: -5.278433227539063
epoch: 145 training_loss -5.177827033996582 test_loss: -5.265547561645508
epoch: 146 training_loss -5.219428796768188 test_loss: -5.142201995849609
epoch: 147 training_loss -5.192827472686767 test_loss: -5.243658447265625
epoch: 148 training_loss -5.192595467567444 test_loss: -5.2931968688964846
epoch: 149 training_loss -5.27284185886383 test_loss: -5.177392196655274
16.780060366550586
episode: 0 training return: tensor(-1015100.1875, device='cuda:0')
episode: 1 training return: tensor(-2649494., device='cuda:0')
episode: 2 training return: tensor(-1894846.6250, device='cuda:0')
episode: 3 training return: tensor(-684696.5625, device='cuda:0')
epoch: 1 test_true_pfm: 11.81885386213917
episode: 4 training return: tensor(-453438.3125, device='cuda:0')
episode: 5 training return: tensor(-70843264., device='cuda:0')
episode: 6 training return: tensor(-892807.4375, device='cuda:0')
episode: 7 training return: tensor(-6530038., device='cuda:0')
epoch: 2 test_true_pfm: 9.703057695921176
episode: 8 training return: tensor(-1966108.8750, device='cuda:0')
episode: 9 training return: tensor(-786780.7500, device='cuda:0')
episode: 10 training return: tensor(-8.0377e+08, device='cuda:0')
episode: 11 training return: tensor(-6.2445e+08, device='cuda:0')
epoch: 3 test_true_pfm: -22.69906126307826
episode: 12 training return: tensor(-12888976., device='cuda:0')
episode: 13 training return: tensor(-342483.6875, device='cuda:0')
episode: 14 training return: tensor(-811.6342, device='cuda:0')
episode: 15 training return: tensor(-747.8871, device='cuda:0')
epoch: 4 test_true_pfm: -18.58082950493246
episode: 16 training return: tensor(-758.6744, device='cuda:0')
episode: 17 training return: tensor(-754.8538, device='cuda:0')
episode: 18 training return: tensor(-751.9646, device='cuda:0')
episode: 19 training return: tensor(-788.8311, device='cuda:0')
epoch: 5 test_true_pfm: -19.18206184106652
episode: 20 training return: tensor(-774.8226, device='cuda:0')
episode: 21 training return: tensor(-750.8458, device='cuda:0')
episode: 22 training return: tensor(-755.7220, device='cuda:0')
episode: 23 training return: tensor(-781.6386, device='cuda:0')
epoch: 6 test_true_pfm: -18.437913941144974
episode: 24 training return: tensor(-772.9743, device='cuda:0')
episode: 25 training return: tensor(-805.2263, device='cuda:0')
episode: 26 training return: tensor(-753.0614, device='cuda:0')
episode: 27 training return: tensor(-753.7640, device='cuda:0')
epoch: 7 test_true_pfm: -20.218152257044387
episode: 28 training return: tensor(-774.9523, device='cuda:0')
episode: 29 training return: tensor(-790.5158, device='cuda:0')
episode: 30 training return: tensor(-752.2703, device='cuda:0')
episode: 31 training return: tensor(-1398.7146, device='cuda:0')
epoch: 8 test_true_pfm: -26.12091762372563
episode: 32 training return: tensor(-3883.5942, device='cuda:0')
episode: 33 training return: tensor(-4351.5688, device='cuda:0')
episode: 34 training return: tensor(-8582.4639, device='cuda:0')
episode: 35 training return: tensor(-7100.7979, device='cuda:0')
epoch: 9 test_true_pfm: -26.04371857383864
episode: 36 training return: tensor(-11612.1631, device='cuda:0')
episode: 37 training return: tensor(-9028.9307, device='cuda:0')
episode: 38 training return: tensor(-7691.0459, device='cuda:0')
episode: 39 training return: tensor(-7585.5112, device='cuda:0')
epoch: 10 test_true_pfm: -10.790833793791453
episode: 40 training return: tensor(-5099.8320, device='cuda:0')
episode: 41 training return: tensor(-5493.5010, device='cuda:0')
episode: 42 training return: tensor(-3758.2100, device='cuda:0')
episode: 43 training return: tensor(-61533.9023, device='cuda:0')
epoch: 11 test_true_pfm: 17.13405621314895
episode: 44 training return: tensor(-16503.7441, device='cuda:0')
episode: 45 training return: tensor(-2929.2957, device='cuda:0')
episode: 46 training return: tensor(-2466.8201, device='cuda:0')
episode: 47 training return: tensor(-10185.9365, device='cuda:0')
epoch: 12 test_true_pfm: 10.85333579674899
episode: 48 training return: tensor(-2542.2358, device='cuda:0')
episode: 49 training return: tensor(-2515.1091, device='cuda:0')
episode: 50 training return: tensor(-2931.0645, device='cuda:0')
episode: 51 training return: tensor(-3600.1638, device='cuda:0')
epoch: 13 test_true_pfm: 9.905454106319555
episode: 52 training return: tensor(-3276.6050, device='cuda:0')
episode: 53 training return: tensor(-4405.4590, device='cuda:0')
episode: 54 training return: tensor(-2415.4165, device='cuda:0')
episode: 55 training return: tensor(-19245.4883, device='cuda:0')
epoch: 14 test_true_pfm: 12.080040397283568
episode: 56 training return: tensor(-3139.9675, device='cuda:0')
episode: 57 training return: tensor(-10711.9453, device='cuda:0')
episode: 58 training return: tensor(-1382.4880, device='cuda:0')
episode: 59 training return: tensor(-3930.3276, device='cuda:0')
epoch: 15 test_true_pfm: 18.080234630669455
episode: 60 training return: tensor(-3036.6572, device='cuda:0')
episode: 61 training return: tensor(-8.1763e+08, device='cuda:0')
episode: 62 training return: tensor(-146872.6875, device='cuda:0')
episode: 63 training return: tensor(-6757420., device='cuda:0')
epoch: 16 test_true_pfm: -16.01272371082181
episode: 64 training return: tensor(-15166392., device='cuda:0')
episode: 65 training return: tensor(-8.9347e+10, device='cuda:0')
episode: 66 training return: tensor(-3.1877e+10, device='cuda:0')
episode: 67 training return: tensor(-40097.2539, device='cuda:0')
epoch: 17 test_true_pfm: 11.351458811092195
episode: 68 training return: tensor(-44752.8789, device='cuda:0')
episode: 69 training return: tensor(-73160.7500, device='cuda:0')
episode: 70 training return: tensor(-60567.2344, device='cuda:0')
episode: 71 training return: tensor(-67280.6328, device='cuda:0')
epoch: 18 test_true_pfm: 12.837886271977373
episode: 72 training return: tensor(-36222.2500, device='cuda:0')
episode: 73 training return: tensor(-2677.8567, device='cuda:0')
episode: 74 training return: tensor(-2947.0918, device='cuda:0')
episode: 75 training return: tensor(-3463.3181, device='cuda:0')
epoch: 19 test_true_pfm: 10.052335919470528
episode: 76 training return: tensor(-947.8225, device='cuda:0')
episode: 77 training return: tensor(-2435.9875, device='cuda:0')
episode: 78 training return: tensor(-2514.5486, device='cuda:0')
episode: 79 training return: tensor(-2397.4272, device='cuda:0')
epoch: 20 test_true_pfm: 12.997586447682739
episode: 80 training return: tensor(-3036.1846, device='cuda:0')
episode: 81 training return: tensor(-2895.0046, device='cuda:0')
episode: 82 training return: tensor(-2730.5945, device='cuda:0')
episode: 83 training return: tensor(-765.7036, device='cuda:0')
epoch: 21 test_true_pfm: 12.666054973957788
episode: 84 training return: tensor(-30871.0742, device='cuda:0')
episode: 85 training return: tensor(-13489.7715, device='cuda:0')
episode: 86 training return: tensor(-33514.6875, device='cuda:0')
episode: 87 training return: tensor(-35138.8242, device='cuda:0')
epoch: 22 test_true_pfm: 12.209771194753333
episode: 88 training return: tensor(-26483.1152, device='cuda:0')
episode: 89 training return: tensor(-19888.6582, device='cuda:0')
episode: 90 training return: tensor(-33473.4727, device='cuda:0')
episode: 91 training return: tensor(-20916.2012, device='cuda:0')
epoch: 23 test_true_pfm: 13.195626081442395
episode: 92 training return: tensor(-28990.1641, device='cuda:0')
episode: 93 training return: tensor(-59656.7070, device='cuda:0')
episode: 94 training return: tensor(-46062.5117, device='cuda:0')
episode: 95 training return: tensor(-24133.3945, device='cuda:0')
epoch: 24 test_true_pfm: -21.204541050176942
episode: 96 training return: tensor(-780.5037, device='cuda:0')
episode: 97 training return: tensor(-798.8522, device='cuda:0')
episode: 98 training return: tensor(-760.9802, device='cuda:0')
episode: 99 training return: tensor(-799.6683, device='cuda:0')
epoch: 25 test_true_pfm: -19.142685656669464
episode: 100 training return: tensor(-789.9597, device='cuda:0')
episode: 101 training return: tensor(-739.1619, device='cuda:0')
episode: 102 training return: tensor(-745.6453, device='cuda:0')
episode: 103 training return: tensor(-920.4715, device='cuda:0')
epoch: 26 test_true_pfm: -8.915634117651118
episode: 104 training return: tensor(-850.1031, device='cuda:0')
episode: 105 training return: tensor(-858.0498, device='cuda:0')
episode: 106 training return: tensor(-802.1670, device='cuda:0')
episode: 107 training return: tensor(-1164.5878, device='cuda:0')
epoch: 27 test_true_pfm: 14.173053880353788
episode: 108 training return: tensor(-2883597., device='cuda:0')
episode: 109 training return: tensor(-2580578.5000, device='cuda:0')
episode: 110 training return: tensor(-3940818., device='cuda:0')
episode: 111 training return: tensor(-3855957.7500, device='cuda:0')
epoch: 28 test_true_pfm: 13.772861977860916
episode: 112 training return: tensor(-4020103.5000, device='cuda:0')
episode: 113 training return: tensor(-3364489., device='cuda:0')
episode: 114 training return: tensor(-4561839.5000, device='cuda:0')
episode: 115 training return: tensor(-3514632.7500, device='cuda:0')
epoch: 29 test_true_pfm: 14.004293543139307
episode: 116 training return: tensor(-4020140.2500, device='cuda:0')
episode: 117 training return: tensor(-4575581., device='cuda:0')
episode: 118 training return: tensor(-3307403.2500, device='cuda:0')
episode: 119 training return: tensor(-5530569.5000, device='cuda:0')
epoch: 30 test_true_pfm: 12.93555191886451
episode: 120 training return: tensor(-3064632.2500, device='cuda:0')
episode: 121 training return: tensor(-2722507.7500, device='cuda:0')
episode: 122 training return: tensor(-3773649.5000, device='cuda:0')
episode: 123 training return: tensor(-3014044.2500, device='cuda:0')
epoch: 31 test_true_pfm: 17.007389149924855
episode: 124 training return: tensor(-3116264.7500, device='cuda:0')
episode: 125 training return: tensor(-2541452., device='cuda:0')
episode: 126 training return: tensor(-1507480.3750, device='cuda:0')
episode: 127 training return: tensor(-1267501.8750, device='cuda:0')
epoch: 32 test_true_pfm: 17.67027525743041
episode: 128 training return: tensor(-2199492., device='cuda:0')
episode: 129 training return: tensor(-1805900.5000, device='cuda:0')
episode: 130 training return: tensor(-1826635.1250, device='cuda:0')
episode: 131 training return: tensor(-426057.2812, device='cuda:0')
epoch: 33 test_true_pfm: 6.541625052107972
episode: 132 training return: tensor(-283116.3125, device='cuda:0')
episode: 133 training return: tensor(-60712.9492, device='cuda:0')
episode: 134 training return: tensor(-13278.2295, device='cuda:0')
episode: 135 training return: tensor(-44340.3945, device='cuda:0')
epoch: 34 test_true_pfm: 19.746035083499727
episode: 136 training return: tensor(-12157.8193, device='cuda:0')
episode: 137 training return: tensor(-15436.4258, device='cuda:0')
episode: 138 training return: tensor(-19475.5938, device='cuda:0')
episode: 139 training return: tensor(-6305813., device='cuda:0')
epoch: 35 test_true_pfm: 14.951996783297508
episode: 140 training return: tensor(-70457.0547, device='cuda:0')
episode: 141 training return: tensor(-73114.8828, device='cuda:0')
episode: 142 training return: tensor(-328671.9062, device='cuda:0')
episode: 143 training return: tensor(-455952.2188, device='cuda:0')
epoch: 36 test_true_pfm: 38.34964240312844
episode: 144 training return: tensor(-2.3378e+10, device='cuda:0')
episode: 145 training return: tensor(-3.8049e+11, device='cuda:0')
episode: 146 training return: tensor(-2.0480e+12, device='cuda:0')
episode: 147 training return: tensor(-1.3922e+08, device='cuda:0')
epoch: 37 test_true_pfm: 61.94082222834684
episode: 148 training return: tensor(-5.9749e+08, device='cuda:0')
episode: 149 training return: tensor(-9.9610e+09, device='cuda:0')
episode: 150 training return: tensor(-6.4781e+09, device='cuda:0')
episode: 151 training return: tensor(-1.0107e+10, device='cuda:0')
epoch: 38 test_true_pfm: 19.901123277409834
episode: 152 training return: tensor(-2.6167e+08, device='cuda:0')
episode: 153 training return: tensor(-2.7797e+08, device='cuda:0')
episode: 154 training return: tensor(-17669.2734, device='cuda:0')
episode: 155 training return: tensor(-16648.9062, device='cuda:0')
epoch: 39 test_true_pfm: 12.566082175724603
episode: 156 training return: tensor(-29328.3379, device='cuda:0')
episode: 157 training return: tensor(-7.3819e+08, device='cuda:0')
episode: 158 training return: tensor(-1554960.3750, device='cuda:0')
episode: 159 training return: tensor(-1.7878e+09, device='cuda:0')
epoch: 40 test_true_pfm: 10.92345490256043
episode: 160 training return: tensor(-2483296.5000, device='cuda:0')
episode: 161 training return: tensor(-316304.6250, device='cuda:0')
episode: 162 training return: tensor(-3151533.5000, device='cuda:0')
episode: 163 training return: tensor(-24836.4375, device='cuda:0')
epoch: 41 test_true_pfm: 8.602324182074305
episode: 164 training return: tensor(-1288231.8750, device='cuda:0')
episode: 165 training return: tensor(-18960.5527, device='cuda:0')
episode: 166 training return: tensor(-20318.5918, device='cuda:0')
episode: 167 training return: tensor(-61349.3477, device='cuda:0')
epoch: 42 test_true_pfm: 11.528099259862008
episode: 168 training return: tensor(-46687.3438, device='cuda:0')
episode: 169 training return: tensor(-18908.4180, device='cuda:0')
episode: 170 training return: tensor(-17339.4336, device='cuda:0')
episode: 171 training return: tensor(-17232.5684, device='cuda:0')
epoch: 43 test_true_pfm: 6.986730340775152
episode: 172 training return: tensor(-13249.4785, device='cuda:0')
episode: 173 training return: tensor(-817.0676, device='cuda:0')
episode: 174 training return: tensor(-756.3282, device='cuda:0')
episode: 175 training return: tensor(-755.5380, device='cuda:0')
epoch: 44 test_true_pfm: -19.07747090449851
episode: 176 training return: tensor(-761.1817, device='cuda:0')
episode: 177 training return: tensor(-768.1474, device='cuda:0')
episode: 178 training return: tensor(-750.6132, device='cuda:0')
episode: 179 training return: tensor(-764.8910, device='cuda:0')
epoch: 45 test_true_pfm: -19.327958333636918
episode: 180 training return: tensor(-774.1665, device='cuda:0')
episode: 181 training return: tensor(-1271738., device='cuda:0')
episode: 182 training return: tensor(-753.2471, device='cuda:0')
episode: 183 training return: tensor(-764.7530, device='cuda:0')
epoch: 46 test_true_pfm: -23.479819838865183
episode: 184 training return: tensor(-16381.5469, device='cuda:0')
episode: 185 training return: tensor(-859.5411, device='cuda:0')
episode: 186 training return: tensor(-1464.2606, device='cuda:0')
episode: 187 training return: tensor(-5877.0762, device='cuda:0')
epoch: 47 test_true_pfm: -11.240471645306128
episode: 188 training return: tensor(-2873.5032, device='cuda:0')
episode: 189 training return: tensor(-3885.3062, device='cuda:0')
episode: 190 training return: tensor(-36102.5586, device='cuda:0')
episode: 191 training return: tensor(-4260.1426, device='cuda:0')
epoch: 48 test_true_pfm: 16.276317369512245
episode: 192 training return: tensor(-18266.7344, device='cuda:0')
episode: 193 training return: tensor(-34917., device='cuda:0')
episode: 194 training return: tensor(-2937.3704, device='cuda:0')
episode: 195 training return: tensor(-1602.0342, device='cuda:0')
epoch: 49 test_true_pfm: 6.501508539290157
episode: 196 training return: tensor(-1394.9418, device='cuda:0')
episode: 197 training return: tensor(-3626.4543, device='cuda:0')
episode: 198 training return: tensor(-31356.9277, device='cuda:0')
episode: 199 training return: tensor(-7778.6294, device='cuda:0')
epoch: 50 test_true_pfm: 12.903681150582063
episode: 200 training return: tensor(-2693.9612, device='cuda:0')
episode: 201 training return: tensor(-1397.6250, device='cuda:0')
episode: 202 training return: tensor(-1980.0601, device='cuda:0')
episode: 203 training return: tensor(-2463.3555, device='cuda:0')
epoch: 51 test_true_pfm: 15.70196068307589
episode: 204 training return: tensor(-2406.5239, device='cuda:0')
episode: 205 training return: tensor(-1005.6102, device='cuda:0')
episode: 206 training return: tensor(-3060.9187, device='cuda:0')
episode: 207 training return: tensor(-40245.2617, device='cuda:0')
epoch: 52 test_true_pfm: 12.82821657252585
episode: 208 training return: tensor(-13327.1738, device='cuda:0')
episode: 209 training return: tensor(-2486.4280, device='cuda:0')
episode: 210 training return: tensor(-2607.1841, device='cuda:0')
episode: 211 training return: tensor(-2678.0693, device='cuda:0')
epoch: 53 test_true_pfm: 15.0251900953578
episode: 212 training return: tensor(-3048.7998, device='cuda:0')
episode: 213 training return: tensor(-2358.9475, device='cuda:0')
episode: 214 training return: tensor(-1778.6038, device='cuda:0')
episode: 215 training return: tensor(-17496.5957, device='cuda:0')
epoch: 54 test_true_pfm: 8.867266442465489
episode: 216 training return: tensor(-2926.4827, device='cuda:0')
episode: 217 training return: tensor(-3417.5808, device='cuda:0')
episode: 218 training return: tensor(-911.8221, device='cuda:0')
episode: 219 training return: tensor(-2478.8843, device='cuda:0')
epoch: 55 test_true_pfm: 21.085795137555046
episode: 220 training return: tensor(-13221.8467, device='cuda:0')
episode: 221 training return: tensor(-3109.0303, device='cuda:0')
episode: 222 training return: tensor(-3061.2046, device='cuda:0')
episode: 223 training return: tensor(-1878.6107, device='cuda:0')
epoch: 56 test_true_pfm: 8.207584435612423
episode: 224 training return: tensor(-3049.7383, device='cuda:0')
episode: 225 training return: tensor(-26843.9629, device='cuda:0')
episode: 226 training return: tensor(-2497.6948, device='cuda:0')
episode: 227 training return: tensor(-2836.7495, device='cuda:0')
epoch: 57 test_true_pfm: 1.4201765506386372
episode: 228 training return: tensor(-2611.8020, device='cuda:0')
episode: 229 training return: tensor(-2857.6863, device='cuda:0')
episode: 230 training return: tensor(-914.4661, device='cuda:0')
episode: 231 training return: tensor(-4277.1929, device='cuda:0')
epoch: 58 test_true_pfm: -17.577446822259247
episode: 232 training return: tensor(-1800.7006, device='cuda:0')
episode: 233 training return: tensor(-1250.7177, device='cuda:0')
episode: 234 training return: tensor(-3021.4001, device='cuda:0')
episode: 235 training return: tensor(-1364.6658, device='cuda:0')
epoch: 59 test_true_pfm: -14.938914253404093
episode: 236 training return: tensor(-834.3645, device='cuda:0')
episode: 237 training return: tensor(-764.3854, device='cuda:0')
episode: 238 training return: tensor(-2689.3225, device='cuda:0')
episode: 239 training return: tensor(-1830.7551, device='cuda:0')
epoch: 60 test_true_pfm: -0.9349318213278359
episode: 240 training return: tensor(-34242.6641, device='cuda:0')
episode: 241 training return: tensor(-829.3743, device='cuda:0')
episode: 242 training return: tensor(-2473.1221, device='cuda:0')
episode: 243 training return: tensor(-850.1982, device='cuda:0')
epoch: 61 test_true_pfm: -23.449746285641957
episode: 244 training return: tensor(-1505.4375, device='cuda:0')
episode: 245 training return: tensor(-782.7163, device='cuda:0')
episode: 246 training return: tensor(-779.0834, device='cuda:0')
episode: 247 training return: tensor(-778.8693, device='cuda:0')
epoch: 62 test_true_pfm: -12.530679155658692
episode: 248 training return: tensor(-1166.2870, device='cuda:0')
episode: 249 training return: tensor(-768.8895, device='cuda:0')
episode: 250 training return: tensor(-757.4929, device='cuda:0')
episode: 251 training return: tensor(-2393.5081, device='cuda:0')
epoch: 63 test_true_pfm: -22.30482069233087
episode: 252 training return: tensor(-908.7191, device='cuda:0')
episode: 253 training return: tensor(-769.3329, device='cuda:0')
episode: 254 training return: tensor(-925.8745, device='cuda:0')
episode: 255 training return: tensor(-836.4324, device='cuda:0')
epoch: 64 test_true_pfm: -17.48888568973192
episode: 256 training return: tensor(-8944.0488, device='cuda:0')
episode: 257 training return: tensor(-853.9243, device='cuda:0')
episode: 258 training return: tensor(-1194.8232, device='cuda:0')
episode: 259 training return: tensor(-1182.8025, device='cuda:0')
epoch: 65 test_true_pfm: -13.784867849194862
episode: 260 training return: tensor(-2538.9160, device='cuda:0')
episode: 261 training return: tensor(-758.9503, device='cuda:0')
episode: 262 training return: tensor(-751.5751, device='cuda:0')
episode: 263 training return: tensor(-776.8817, device='cuda:0')
epoch: 66 test_true_pfm: -20.168590995882496
episode: 264 training return: tensor(-866.3162, device='cuda:0')
episode: 265 training return: tensor(-842.1989, device='cuda:0')
episode: 266 training return: tensor(-783.6970, device='cuda:0')
episode: 267 training return: tensor(-763.3286, device='cuda:0')
epoch: 67 test_true_pfm: -21.625375065539554
episode: 268 training return: tensor(-810.1628, device='cuda:0')
episode: 269 training return: tensor(-728.1826, device='cuda:0')
episode: 270 training return: tensor(-859.1186, device='cuda:0')
episode: 271 training return: tensor(-789.3350, device='cuda:0')
epoch: 68 test_true_pfm: -21.10863157271699
episode: 272 training return: tensor(-933.6973, device='cuda:0')
episode: 273 training return: tensor(-793.7873, device='cuda:0')
episode: 274 training return: tensor(-790.2382, device='cuda:0')
episode: 275 training return: tensor(-786.1276, device='cuda:0')
epoch: 69 test_true_pfm: -21.434049050010294
episode: 276 training return: tensor(-746.0607, device='cuda:0')
episode: 277 training return: tensor(-774.0593, device='cuda:0')
episode: 278 training return: tensor(-1042.5239, device='cuda:0')
episode: 279 training return: tensor(-899.9047, device='cuda:0')
epoch: 70 test_true_pfm: -21.56046922184824
episode: 280 training return: tensor(-831.5750, device='cuda:0')
episode: 281 training return: tensor(-811.1306, device='cuda:0')
episode: 282 training return: tensor(-954.4177, device='cuda:0')
episode: 283 training return: tensor(-987.3629, device='cuda:0')
epoch: 71 test_true_pfm: -22.015273189467663
episode: 284 training return: tensor(-927.4648, device='cuda:0')
episode: 285 training return: tensor(-752.9144, device='cuda:0')
episode: 286 training return: tensor(-801.7588, device='cuda:0')
episode: 287 training return: tensor(-843.6160, device='cuda:0')
epoch: 72 test_true_pfm: -20.70221000576905
episode: 288 training return: tensor(-1036.4039, device='cuda:0')
episode: 289 training return: tensor(-775.8137, device='cuda:0')
episode: 290 training return: tensor(-822.0452, device='cuda:0')
episode: 291 training return: tensor(-907.2504, device='cuda:0')
epoch: 73 test_true_pfm: -20.12651375174217
episode: 292 training return: tensor(-6798.0093, device='cuda:0')
episode: 293 training return: tensor(-926.9815, device='cuda:0')
episode: 294 training return: tensor(-766.3816, device='cuda:0')
episode: 295 training return: tensor(-780.8557, device='cuda:0')
epoch: 74 test_true_pfm: -22.320087734907112
episode: 296 training return: tensor(-786.0369, device='cuda:0')
episode: 297 training return: tensor(-787.1310, device='cuda:0')
episode: 298 training return: tensor(-1006.7170, device='cuda:0')
episode: 299 training return: tensor(-899.4775, device='cuda:0')
epoch: 75 test_true_pfm: -20.940299981634418
episode: 300 training return: tensor(-768.5953, device='cuda:0')
episode: 301 training return: tensor(-810.8837, device='cuda:0')
episode: 302 training return: tensor(-783.5545, device='cuda:0')
episode: 303 training return: tensor(-810.6487, device='cuda:0')
epoch: 76 test_true_pfm: -11.701536618617743
episode: 304 training return: tensor(-1137.6377, device='cuda:0')
episode: 305 training return: tensor(-970.8817, device='cuda:0')
episode: 306 training return: tensor(-775.0134, device='cuda:0')
episode: 307 training return: tensor(-669.8887, device='cuda:0')
epoch: 77 test_true_pfm: -23.326344544551265
episode: 308 training return: tensor(-932.0895, device='cuda:0')
episode: 309 training return: tensor(-791.1433, device='cuda:0')
episode: 310 training return: tensor(-820.3158, device='cuda:0')
episode: 311 training return: tensor(-753.3616, device='cuda:0')
epoch: 78 test_true_pfm: -25.596668458883546
episode: 312 training return: tensor(-677.6606, device='cuda:0')
episode: 313 training return: tensor(-788.4372, device='cuda:0')
episode: 314 training return: tensor(-1036.1279, device='cuda:0')
episode: 315 training return: tensor(-4035.7219, device='cuda:0')
epoch: 79 test_true_pfm: -6.318105129634807
episode: 316 training return: tensor(-3572.3721, device='cuda:0')
episode: 317 training return: tensor(-2979.9509, device='cuda:0')
episode: 318 training return: tensor(-3526.4685, device='cuda:0')
episode: 319 training return: tensor(-3100.2246, device='cuda:0')
epoch: 80 test_true_pfm: -27.900320280208717
episode: 320 training return: tensor(-3446.6736, device='cuda:0')
episode: 321 training return: tensor(-3612.3982, device='cuda:0')
episode: 322 training return: tensor(-712.2598, device='cuda:0')
episode: 323 training return: tensor(-3587.2788, device='cuda:0')
epoch: 81 test_true_pfm: -28.170549340540582
episode: 324 training return: tensor(-4550.0142, device='cuda:0')
episode: 325 training return: tensor(-5052.1699, device='cuda:0')
episode: 326 training return: tensor(-3026.5833, device='cuda:0')
episode: 327 training return: tensor(-6439.8926, device='cuda:0')
epoch: 82 test_true_pfm: -28.20199985353031
episode: 328 training return: tensor(-3696.2256, device='cuda:0')
episode: 329 training return: tensor(-696.1879, device='cuda:0')
episode: 330 training return: tensor(-765.2385, device='cuda:0')
episode: 331 training return: tensor(-803.4609, device='cuda:0')
epoch: 83 test_true_pfm: -28.5851976482069
episode: 332 training return: tensor(-865.2433, device='cuda:0')
episode: 333 training return: tensor(-879.0389, device='cuda:0')
episode: 334 training return: tensor(-838.9290, device='cuda:0')
episode: 335 training return: tensor(-775.8190, device='cuda:0')
epoch: 84 test_true_pfm: -27.47186020371753
episode: 336 training return: tensor(-840.9384, device='cuda:0')
episode: 337 training return: tensor(-782.1818, device='cuda:0')
episode: 338 training return: tensor(-880.4480, device='cuda:0')
episode: 339 training return: tensor(-777.5957, device='cuda:0')
epoch: 85 test_true_pfm: -28.198925158570596
episode: 340 training return: tensor(-823.5746, device='cuda:0')
episode: 341 training return: tensor(-873.4860, device='cuda:0')
episode: 342 training return: tensor(-951.4590, device='cuda:0')
episode: 343 training return: tensor(-848.5497, device='cuda:0')
epoch: 86 test_true_pfm: -27.094256064671306
episode: 344 training return: tensor(-816.0625, device='cuda:0')
episode: 345 training return: tensor(-908.6643, device='cuda:0')
episode: 346 training return: tensor(-805.7287, device='cuda:0')
episode: 347 training return: tensor(-901.5538, device='cuda:0')
epoch: 87 test_true_pfm: -28.06142334701831
episode: 348 training return: tensor(-842.6840, device='cuda:0')
episode: 349 training return: tensor(-809.0122, device='cuda:0')
episode: 350 training return: tensor(-765.7661, device='cuda:0')
episode: 351 training return: tensor(-863.0164, device='cuda:0')
epoch: 88 test_true_pfm: -27.49992725478926
episode: 352 training return: tensor(-853.4594, device='cuda:0')
episode: 353 training return: tensor(-846.4661, device='cuda:0')
episode: 354 training return: tensor(-916.8411, device='cuda:0')
episode: 355 training return: tensor(-809.0363, device='cuda:0')
epoch: 89 test_true_pfm: -27.03092516591038
episode: 356 training return: tensor(-855.1284, device='cuda:0')
episode: 357 training return: tensor(-834.2477, device='cuda:0')
episode: 358 training return: tensor(-856.7751, device='cuda:0')
episode: 359 training return: tensor(-863.6509, device='cuda:0')
epoch: 90 test_true_pfm: -26.999862614588572
episode: 360 training return: tensor(-772.4269, device='cuda:0')
episode: 361 training return: tensor(-877.6661, device='cuda:0')
episode: 362 training return: tensor(-799.4478, device='cuda:0')
episode: 363 training return: tensor(-818.8177, device='cuda:0')
epoch: 91 test_true_pfm: -27.674111293777536
episode: 364 training return: tensor(-808.9476, device='cuda:0')
episode: 365 training return: tensor(-857.1022, device='cuda:0')
episode: 366 training return: tensor(-792.2368, device='cuda:0')
episode: 367 training return: tensor(-876.4860, device='cuda:0')
epoch: 92 test_true_pfm: -28.758752812016333
episode: 368 training return: tensor(-809.5985, device='cuda:0')
episode: 369 training return: tensor(-767.7961, device='cuda:0')
episode: 370 training return: tensor(-853.9487, device='cuda:0')
episode: 371 training return: tensor(-809.0799, device='cuda:0')
epoch: 93 test_true_pfm: -27.62017393512279
episode: 372 training return: tensor(-891.0532, device='cuda:0')
episode: 373 training return: tensor(-910.4139, device='cuda:0')
episode: 374 training return: tensor(-927.3011, device='cuda:0')
episode: 375 training return: tensor(-872.9151, device='cuda:0')
epoch: 94 test_true_pfm: -27.60902648010886
episode: 376 training return: tensor(-832.9177, device='cuda:0')
episode: 377 training return: tensor(-816.1231, device='cuda:0')
episode: 378 training return: tensor(-834.2347, device='cuda:0')
episode: 379 training return: tensor(-820.2259, device='cuda:0')
epoch: 95 test_true_pfm: -28.371522818755274
episode: 380 training return: tensor(-853.7026, device='cuda:0')
episode: 381 training return: tensor(-889.7189, device='cuda:0')
episode: 382 training return: tensor(-844.0770, device='cuda:0')
episode: 383 training return: tensor(-795.0189, device='cuda:0')
epoch: 96 test_true_pfm: -28.224672308269383
episode: 384 training return: tensor(-850.3261, device='cuda:0')
episode: 385 training return: tensor(-836.8340, device='cuda:0')
episode: 386 training return: tensor(-785.4546, device='cuda:0')
episode: 387 training return: tensor(-827.2866, device='cuda:0')
epoch: 97 test_true_pfm: -27.965325758515256
episode: 388 training return: tensor(-853.2871, device='cuda:0')
episode: 389 training return: tensor(-829.9566, device='cuda:0')
episode: 390 training return: tensor(-851.7820, device='cuda:0')
episode: 391 training return: tensor(-880.9656, device='cuda:0')
epoch: 98 test_true_pfm: -27.55254756597567
episode: 392 training return: tensor(-868.6973, device='cuda:0')
episode: 393 training return: tensor(-834.8067, device='cuda:0')
episode: 394 training return: tensor(-832.7283, device='cuda:0')
episode: 395 training return: tensor(-836.1152, device='cuda:0')
epoch: 99 test_true_pfm: -26.766016189283267
episode: 396 training return: tensor(-894.6934, device='cuda:0')
episode: 397 training return: tensor(-801.9541, device='cuda:0')
episode: 398 training return: tensor(-934.6359, device='cuda:0')
episode: 399 training return: tensor(-862.5580, device='cuda:0')
epoch: 100 test_true_pfm: -28.239927436211246
episode: 400 training return: tensor(-879.4554, device='cuda:0')
episode: 401 training return: tensor(-884.8155, device='cuda:0')
episode: 402 training return: tensor(-925.3779, device='cuda:0')
episode: 403 training return: tensor(-908.2553, device='cuda:0')
epoch: 101 test_true_pfm: -26.978553313335492
episode: 404 training return: tensor(-886.0302, device='cuda:0')
episode: 405 training return: tensor(-872.1141, device='cuda:0')
episode: 406 training return: tensor(-878.6662, device='cuda:0')
episode: 407 training return: tensor(-887.1988, device='cuda:0')
epoch: 102 test_true_pfm: -29.30392261147057
episode: 408 training return: tensor(-852.1485, device='cuda:0')
episode: 409 training return: tensor(-861.5384, device='cuda:0')
episode: 410 training return: tensor(-908.3459, device='cuda:0')
episode: 411 training return: tensor(-830.8718, device='cuda:0')
epoch: 103 test_true_pfm: -29.32476401302183
episode: 412 training return: tensor(-792.7036, device='cuda:0')
episode: 413 training return: tensor(-960.2578, device='cuda:0')
episode: 414 training return: tensor(-903.1044, device='cuda:0')
episode: 415 training return: tensor(-883.2151, device='cuda:0')
epoch: 104 test_true_pfm: -22.820787766490337
episode: 416 training return: tensor(-1652.1027, device='cuda:0')
episode: 417 training return: tensor(-972.5436, device='cuda:0')
episode: 418 training return: tensor(-931.1241, device='cuda:0')
episode: 419 training return: tensor(-905.6343, device='cuda:0')
epoch: 105 test_true_pfm: -23.7334261094284
episode: 420 training return: tensor(-2633.9487, device='cuda:0')
episode: 421 training return: tensor(-896.1214, device='cuda:0')
episode: 422 training return: tensor(-815.1891, device='cuda:0')
episode: 423 training return: tensor(-928.2114, device='cuda:0')
epoch: 106 test_true_pfm: -22.074316944511473
episode: 424 training return: tensor(-4293.9131, device='cuda:0')
episode: 425 training return: tensor(-2497.9172, device='cuda:0')
episode: 426 training return: tensor(-4666.2441, device='cuda:0')
episode: 427 training return: tensor(-873.1813, device='cuda:0')
epoch: 107 test_true_pfm: -22.26907935944917
episode: 428 training return: tensor(-903.3403, device='cuda:0')
episode: 429 training return: tensor(-5041.5591, device='cuda:0')
episode: 430 training return: tensor(-909.4359, device='cuda:0')
episode: 431 training return: tensor(-4177.7563, device='cuda:0')
epoch: 108 test_true_pfm: -20.720782435231808
episode: 432 training return: tensor(-3635.8357, device='cuda:0')
episode: 433 training return: tensor(-4372.1240, device='cuda:0')
episode: 434 training return: tensor(-4623.2729, device='cuda:0')
episode: 435 training return: tensor(-5495.4619, device='cuda:0')
epoch: 109 test_true_pfm: -23.603711934727784
episode: 436 training return: tensor(-5175.2178, device='cuda:0')
episode: 437 training return: tensor(-4290.3896, device='cuda:0')
episode: 438 training return: tensor(-4547.8169, device='cuda:0')
episode: 439 training return: tensor(-3660.9648, device='cuda:0')
epoch: 110 test_true_pfm: -23.31825082927315
episode: 440 training return: tensor(-4707.9595, device='cuda:0')
episode: 441 training return: tensor(-5054.5840, device='cuda:0')
episode: 442 training return: tensor(-5407.7124, device='cuda:0')
episode: 443 training return: tensor(-4482.7183, device='cuda:0')
epoch: 111 test_true_pfm: -22.853135531326604
episode: 444 training return: tensor(-3494.3682, device='cuda:0')
episode: 445 training return: tensor(-4096.9014, device='cuda:0')
episode: 446 training return: tensor(-3042.5039, device='cuda:0')
episode: 447 training return: tensor(-4267.3223, device='cuda:0')
epoch: 112 test_true_pfm: -20.563455630995055
episode: 448 training return: tensor(-2730.9392, device='cuda:0')
episode: 449 training return: tensor(-3825.2019, device='cuda:0')
episode: 450 training return: tensor(-3944.0330, device='cuda:0')
episode: 451 training return: tensor(-4315.0117, device='cuda:0')
epoch: 113 test_true_pfm: -22.660200088191345
episode: 452 training return: tensor(-2345.3347, device='cuda:0')
episode: 453 training return: tensor(-1508.9197, device='cuda:0')
episode: 454 training return: tensor(-3653.2764, device='cuda:0')
episode: 455 training return: tensor(-4276.8442, device='cuda:0')
epoch: 114 test_true_pfm: -21.609741241970575
episode: 456 training return: tensor(-1763.8438, device='cuda:0')
episode: 457 training return: tensor(-2765.7883, device='cuda:0')
episode: 458 training return: tensor(-2185.3992, device='cuda:0')
episode: 459 training return: tensor(-2794.1995, device='cuda:0')
epoch: 115 test_true_pfm: -18.553664521380075
episode: 460 training return: tensor(-1616.8966, device='cuda:0')
episode: 461 training return: tensor(-1499.7283, device='cuda:0')
episode: 462 training return: tensor(-2515.2842, device='cuda:0')
episode: 463 training return: tensor(-3161.2368, device='cuda:0')
epoch: 116 test_true_pfm: -18.309855312685393
episode: 464 training return: tensor(-1894.7369, device='cuda:0')
episode: 465 training return: tensor(-4177.0884, device='cuda:0')
episode: 466 training return: tensor(-3487.3201, device='cuda:0')
episode: 467 training return: tensor(-3851.7410, device='cuda:0')
epoch: 117 test_true_pfm: -20.407121046354582
episode: 468 training return: tensor(-3755.1968, device='cuda:0')
episode: 469 training return: tensor(-1735.8945, device='cuda:0')
episode: 470 training return: tensor(-3059.3289, device='cuda:0')
episode: 471 training return: tensor(-3109.4365, device='cuda:0')
epoch: 118 test_true_pfm: -21.074324519568062
episode: 472 training return: tensor(-3053.6418, device='cuda:0')
episode: 473 training return: tensor(-3333.2690, device='cuda:0')
episode: 474 training return: tensor(-4382.7847, device='cuda:0')
episode: 475 training return: tensor(-2713.9524, device='cuda:0')
epoch: 119 test_true_pfm: -18.952517418366934
episode: 476 training return: tensor(-1992.5438, device='cuda:0')
episode: 477 training return: tensor(-2317.4072, device='cuda:0')
episode: 478 training return: tensor(-1850.9558, device='cuda:0')
episode: 479 training return: tensor(-3360.2681, device='cuda:0')
epoch: 120 test_true_pfm: -19.238191826476726
episode: 480 training return: tensor(-1950.0109, device='cuda:0')
episode: 481 training return: tensor(-2462.9629, device='cuda:0')
episode: 482 training return: tensor(-2535.4248, device='cuda:0')
episode: 483 training return: tensor(-1881.8141, device='cuda:0')
epoch: 121 test_true_pfm: -18.49382252568186
episode: 484 training return: tensor(-1540.2526, device='cuda:0')
episode: 485 training return: tensor(-1867.0189, device='cuda:0')
episode: 486 training return: tensor(-1795.0024, device='cuda:0')
episode: 487 training return: tensor(-1552.3036, device='cuda:0')
epoch: 122 test_true_pfm: -19.38029454808032
episode: 488 training return: tensor(-1959.7827, device='cuda:0')
episode: 489 training return: tensor(-2503.1179, device='cuda:0')
episode: 490 training return: tensor(-2645.9756, device='cuda:0')
episode: 491 training return: tensor(-2228.2385, device='cuda:0')
epoch: 123 test_true_pfm: -20.06074947704311
episode: 492 training return: tensor(-2434.8896, device='cuda:0')
episode: 493 training return: tensor(-4655.6064, device='cuda:0')
episode: 494 training return: tensor(-3559.5042, device='cuda:0')
episode: 495 training return: tensor(-1931.2941, device='cuda:0')
epoch: 124 test_true_pfm: -20.112150441089774
episode: 496 training return: tensor(-1800.7867, device='cuda:0')
episode: 497 training return: tensor(-1574.6823, device='cuda:0')
episode: 498 training return: tensor(-2827.1724, device='cuda:0')
episode: 499 training return: tensor(-1591.9930, device='cuda:0')
epoch: 125 test_true_pfm: -19.593658702836287
episode: 500 training return: tensor(-1765.0541, device='cuda:0')
episode: 501 training return: tensor(-1779.7491, device='cuda:0')
episode: 502 training return: tensor(-2153.2427, device='cuda:0')
episode: 503 training return: tensor(-1524.6577, device='cuda:0')
epoch: 126 test_true_pfm: -16.028618833292526
episode: 504 training return: tensor(-2070.6450, device='cuda:0')
episode: 505 training return: tensor(-1391.1116, device='cuda:0')
episode: 506 training return: tensor(-1825.0411, device='cuda:0')
episode: 507 training return: tensor(-2291.1833, device='cuda:0')
epoch: 127 test_true_pfm: -15.768721962986666
episode: 508 training return: tensor(-2433.6768, device='cuda:0')
episode: 509 training return: tensor(-1423.8433, device='cuda:0')
episode: 510 training return: tensor(-1457.6749, device='cuda:0')
episode: 511 training return: tensor(-1282.2828, device='cuda:0')
epoch: 128 test_true_pfm: -13.209538041160169
episode: 512 training return: tensor(-1721.0713, device='cuda:0')
episode: 513 training return: tensor(-1375.0913, device='cuda:0')
episode: 514 training return: tensor(-1446.6169, device='cuda:0')
episode: 515 training return: tensor(-1279.8785, device='cuda:0')
epoch: 129 test_true_pfm: -12.977897046532338
episode: 516 training return: tensor(-1244.4612, device='cuda:0')
episode: 517 training return: tensor(-1340.4727, device='cuda:0')
episode: 518 training return: tensor(-1327.3260, device='cuda:0')
episode: 519 training return: tensor(-1136.8872, device='cuda:0')
epoch: 130 test_true_pfm: -14.074542602067158
episode: 520 training return: tensor(-1159.1586, device='cuda:0')
episode: 521 training return: tensor(-1185.9159, device='cuda:0')
episode: 522 training return: tensor(-1594.1355, device='cuda:0')
episode: 523 training return: tensor(-1590.1821, device='cuda:0')
epoch: 131 test_true_pfm: -14.751446332105939
episode: 524 training return: tensor(-1358.5012, device='cuda:0')
episode: 525 training return: tensor(-1213.9541, device='cuda:0')
episode: 526 training return: tensor(-1141.7938, device='cuda:0')
episode: 527 training return: tensor(-1225.7173, device='cuda:0')
epoch: 132 test_true_pfm: -13.21516450734203
episode: 528 training return: tensor(-1105.1309, device='cuda:0')
episode: 529 training return: tensor(-1146.1288, device='cuda:0')
episode: 530 training return: tensor(-1204.3920, device='cuda:0')
episode: 531 training return: tensor(-1249.0558, device='cuda:0')
epoch: 133 test_true_pfm: -10.518535069427177
episode: 532 training return: tensor(-1166.5312, device='cuda:0')
episode: 533 training return: tensor(-1198.2041, device='cuda:0')
episode: 534 training return: tensor(-1214.5660, device='cuda:0')
episode: 535 training return: tensor(-1367.7986, device='cuda:0')
epoch: 134 test_true_pfm: -12.296798868250935
episode: 536 training return: tensor(-1132.8083, device='cuda:0')
episode: 537 training return: tensor(-1324.7864, device='cuda:0')
episode: 538 training return: tensor(-1341.3577, device='cuda:0')
episode: 539 training return: tensor(-1681.8932, device='cuda:0')
epoch: 135 test_true_pfm: -10.265228161486695
episode: 540 training return: tensor(-1202.3529, device='cuda:0')
episode: 541 training return: tensor(-1138.0052, device='cuda:0')
episode: 542 training return: tensor(-1175.2854, device='cuda:0')
episode: 543 training return: tensor(-1127.2002, device='cuda:0')
epoch: 136 test_true_pfm: -9.91955525315544
episode: 544 training return: tensor(-1314.9718, device='cuda:0')
episode: 545 training return: tensor(-1180.3541, device='cuda:0')
episode: 546 training return: tensor(-1182.3831, device='cuda:0')
episode: 547 training return: tensor(-1207.6067, device='cuda:0')
epoch: 137 test_true_pfm: -15.012460702932648
episode: 548 training return: tensor(-1245.5802, device='cuda:0')
episode: 549 training return: tensor(-1268.8475, device='cuda:0')
episode: 550 training return: tensor(-1281.1742, device='cuda:0')
episode: 551 training return: tensor(-1358.1018, device='cuda:0')
epoch: 138 test_true_pfm: -6.53850160689687
episode: 552 training return: tensor(-1780.7885, device='cuda:0')
episode: 553 training return: tensor(-2103.1572, device='cuda:0')
episode: 554 training return: tensor(-2131.2463, device='cuda:0')
episode: 555 training return: tensor(-2494.5613, device='cuda:0')
epoch: 139 test_true_pfm: -4.510907406351984
episode: 556 training return: tensor(-2170.6743, device='cuda:0')
episode: 557 training return: tensor(-2246.2068, device='cuda:0')
episode: 558 training return: tensor(-2128.9558, device='cuda:0')
episode: 559 training return: tensor(-2371.0356, device='cuda:0')
epoch: 140 test_true_pfm: 11.841282687766244
episode: 560 training return: tensor(-2509.2476, device='cuda:0')
episode: 561 training return: tensor(-2191.9705, device='cuda:0')
episode: 562 training return: tensor(-2358.2549, device='cuda:0')
episode: 563 training return: tensor(-2163.9963, device='cuda:0')
epoch: 141 test_true_pfm: -16.219887975079455
episode: 564 training return: tensor(-2959.8713, device='cuda:0')
episode: 565 training return: tensor(-2760.9302, device='cuda:0')
episode: 566 training return: tensor(-1844.0275, device='cuda:0')
episode: 567 training return: tensor(-1821.1964, device='cuda:0')
epoch: 142 test_true_pfm: 12.45869577089458
episode: 568 training return: tensor(-2493.6494, device='cuda:0')
episode: 569 training return: tensor(-2639.3562, device='cuda:0')
episode: 570 training return: tensor(-2264.3149, device='cuda:0')
episode: 571 training return: tensor(-2373.4595, device='cuda:0')
epoch: 143 test_true_pfm: -16.952447522771315
episode: 572 training return: tensor(-2275.4734, device='cuda:0')
episode: 573 training return: tensor(-2361.0181, device='cuda:0')
episode: 574 training return: tensor(-940.5759, device='cuda:0')
episode: 575 training return: tensor(-1808.5842, device='cuda:0')
epoch: 144 test_true_pfm: -18.21745540634593
episode: 576 training return: tensor(-2421.2170, device='cuda:0')
episode: 577 training return: tensor(-2367.4426, device='cuda:0')
episode: 578 training return: tensor(-2409.9192, device='cuda:0')
episode: 579 training return: tensor(-2434.1738, device='cuda:0')
epoch: 145 test_true_pfm: -16.728640382699272
episode: 580 training return: tensor(-1889.4062, device='cuda:0')
episode: 581 training return: tensor(-2030.8486, device='cuda:0')
episode: 582 training return: tensor(-2720.9023, device='cuda:0')
episode: 583 training return: tensor(-2391.0825, device='cuda:0')
epoch: 146 test_true_pfm: -18.52267024722635
episode: 584 training return: tensor(-2855.8386, device='cuda:0')
episode: 585 training return: tensor(-2423.9182, device='cuda:0')
episode: 586 training return: tensor(-2188.7456, device='cuda:0')
episode: 587 training return: tensor(-2378.0806, device='cuda:0')
epoch: 147 test_true_pfm: -16.691423897048704
episode: 588 training return: tensor(-1980.4637, device='cuda:0')
episode: 589 training return: tensor(-1846.7526, device='cuda:0')
episode: 590 training return: tensor(-2487.8132, device='cuda:0')
episode: 591 training return: tensor(-2238.6919, device='cuda:0')
epoch: 148 test_true_pfm: -19.52009455968917
episode: 592 training return: tensor(-2363.2056, device='cuda:0')
episode: 593 training return: tensor(-2253.5105, device='cuda:0')
episode: 594 training return: tensor(-1111.2089, device='cuda:0')
episode: 595 training return: tensor(-1985.9135, device='cuda:0')
epoch: 149 test_true_pfm: -16.024370024736864
episode: 596 training return: tensor(-938.1276, device='cuda:0')
episode: 597 training return: tensor(-2287.7817, device='cuda:0')
episode: 598 training return: tensor(-2180.1560, device='cuda:0')
episode: 599 training return: tensor(-793.0284, device='cuda:0')
epoch: 150 test_true_pfm: -17.417746506015263
