['--alg', 'ddpg', '--env', 'Swimmer-v3', '--learn', 'brac', '--traj', 'medium', '--seed', '2', '--data', '100000']
epoch: 0 training_loss 4.2922273135185245 test_loss: 3.0106389999389647
epoch: 1 training_loss 2.363718082904816 test_loss: 1.8199653625488281
epoch: 2 training_loss 1.422613252401352 test_loss: 0.9512990951538086
epoch: 3 training_loss 0.8267294256389142 test_loss: 0.5157157897949218
epoch: 4 training_loss 0.35122853410895916 test_loss: 0.16570768356323243
epoch: 5 training_loss 0.0904664352023974 test_loss: -0.1484628438949585
epoch: 6 training_loss -0.18046197490300983 test_loss: -0.34487483501434324
epoch: 7 training_loss -0.4471993410307914 test_loss: -0.5322072029113769
epoch: 8 training_loss -0.6058302508294582 test_loss: -0.8826794624328613
epoch: 9 training_loss -0.8652608427405357 test_loss: -1.0159947395324707
epoch: 10 training_loss -1.014326704442501 test_loss: -1.084474754333496
epoch: 11 training_loss -1.125291449725628 test_loss: -1.1883537292480468
epoch: 12 training_loss -1.2567151921987534 test_loss: -1.363936996459961
epoch: 13 training_loss -1.3594649112969637 test_loss: -1.3695049285888672
epoch: 14 training_loss -1.5208038419485093 test_loss: -1.5547926902770997
epoch: 15 training_loss -1.5888312047719955 test_loss: -1.565479564666748
epoch: 16 training_loss -1.6721039819717407 test_loss: -1.8747451782226563
epoch: 17 training_loss -1.6962757202982903 test_loss: -1.7813129425048828
epoch: 18 training_loss -1.8532668590545653 test_loss: -1.9491447448730468
epoch: 19 training_loss -1.9217518025636673 test_loss: -2.024247741699219
epoch: 20 training_loss -2.073850781917572 test_loss: -2.077440643310547
epoch: 21 training_loss -2.114832036495209 test_loss: -2.0813104629516603
epoch: 22 training_loss -2.211853972673416 test_loss: -2.3899215698242187
epoch: 23 training_loss -2.2414831101894377 test_loss: -2.2322696685791015
epoch: 24 training_loss -2.2350501215457914 test_loss: -2.4231845855712892
epoch: 25 training_loss -2.3521027970314026 test_loss: -2.2141069412231444
epoch: 26 training_loss -2.44093635559082 test_loss: -2.4424980163574217
epoch: 27 training_loss -2.475824165344238 test_loss: -2.6258827209472657
epoch: 28 training_loss -2.5415762877464294 test_loss: -2.57669677734375
epoch: 29 training_loss -2.5086080133914948 test_loss: -2.54451847076416
epoch: 30 training_loss -2.619681007862091 test_loss: -2.64023494720459
epoch: 31 training_loss -2.6438248908519744 test_loss: -2.654007911682129
epoch: 32 training_loss -2.6831311917304994 test_loss: -2.7396703720092774
epoch: 33 training_loss -2.712008099555969 test_loss: -2.8072410583496095
epoch: 34 training_loss -2.7856163585186007 test_loss: -2.7904565811157225
epoch: 35 training_loss -2.82674245595932 test_loss: -2.9665855407714843
epoch: 36 training_loss -2.8773539090156555 test_loss: -2.987160301208496
epoch: 37 training_loss -2.8939854192733763 test_loss: -2.926305961608887
epoch: 38 training_loss -2.8959349036216735 test_loss: -2.8954980850219725
epoch: 39 training_loss -2.881290678977966 test_loss: -3.0319604873657227
epoch: 40 training_loss -2.9564721465110777 test_loss: -3.060993957519531
epoch: 41 training_loss -3.0162360644340516 test_loss: -3.0793357849121095
epoch: 42 training_loss -2.978130338191986 test_loss: -3.0137765884399412
epoch: 43 training_loss -2.9943871688842774 test_loss: -3.1170095443725585
epoch: 44 training_loss -3.0079163408279417 test_loss: -3.0152942657470705
epoch: 45 training_loss -3.0844202995300294 test_loss: -2.940459632873535
epoch: 46 training_loss -3.1346316599845885 test_loss: -3.2047195434570312
epoch: 47 training_loss -3.1479907989501954 test_loss: -3.188455009460449
epoch: 48 training_loss -3.1637785983085633 test_loss: -3.1707916259765625
epoch: 49 training_loss -3.178667676448822 test_loss: -3.2252643585205076
epoch: 50 training_loss -3.2338161849975586 test_loss: -3.2696685791015625
epoch: 51 training_loss -3.2426619863510133 test_loss: -3.0799091339111326
epoch: 52 training_loss -3.146188542842865 test_loss: -3.2282993316650392
epoch: 53 training_loss -3.286636474132538 test_loss: -3.2784912109375
epoch: 54 training_loss -3.348804306983948 test_loss: -3.24891357421875
epoch: 55 training_loss -3.3887634587287905 test_loss: -3.325847625732422
epoch: 56 training_loss -3.3647974061965944 test_loss: -3.436265563964844
epoch: 57 training_loss -3.382330205440521 test_loss: -3.4185554504394533
epoch: 58 training_loss -3.462021734714508 test_loss: -3.382187271118164
epoch: 59 training_loss -3.45367769241333 test_loss: -3.526558685302734
epoch: 60 training_loss -3.5133454179763794 test_loss: -3.6368568420410154
epoch: 61 training_loss -3.5785401129722594 test_loss: -3.6108734130859377
epoch: 62 training_loss -3.5749293756484986 test_loss: -3.5764957427978517
epoch: 63 training_loss -3.5376536560058596 test_loss: -3.6004825592041017
epoch: 64 training_loss -3.591460471153259 test_loss: -3.584820556640625
epoch: 65 training_loss -3.5199406218528746 test_loss: -3.6101608276367188
epoch: 66 training_loss -3.6353048372268675 test_loss: -3.709669494628906
epoch: 67 training_loss -3.6430239272117615 test_loss: -3.6376365661621093
epoch: 68 training_loss -3.6714428853988648 test_loss: -3.8235565185546876
epoch: 69 training_loss -3.7469524216651915 test_loss: -3.823591613769531
epoch: 70 training_loss -3.7547243857383727 test_loss: -3.705652618408203
epoch: 71 training_loss -3.7899579071998595 test_loss: -3.756755065917969
epoch: 72 training_loss -3.794265294075012 test_loss: -3.8744842529296877
epoch: 73 training_loss -3.782038311958313 test_loss: -3.776647186279297
epoch: 74 training_loss -3.8479051280021666 test_loss: -3.809690475463867
epoch: 75 training_loss -3.8568593072891235 test_loss: -3.9596633911132812
epoch: 76 training_loss -3.9360694766044615 test_loss: -3.9397823333740236
epoch: 77 training_loss -3.8789108490943907 test_loss: -3.8346771240234374
epoch: 78 training_loss -3.9872710299491883 test_loss: -3.7971542358398436
epoch: 79 training_loss -3.9517035341262816 test_loss: -3.8582611083984375
epoch: 80 training_loss -3.9708384466171265 test_loss: -3.957277297973633
epoch: 81 training_loss -4.0291912484169 test_loss: -3.9865581512451174
epoch: 82 training_loss -4.082159993648529 test_loss: -4.032746887207031
epoch: 83 training_loss -4.107031328678131 test_loss: -4.017856597900391
epoch: 84 training_loss -4.011963651180268 test_loss: -4.158740234375
epoch: 85 training_loss -4.127127912044525 test_loss: -4.084326171875
epoch: 86 training_loss -4.157591619491577 test_loss: -4.162240982055664
epoch: 87 training_loss -4.148017013072968 test_loss: -4.204634475708008
epoch: 88 training_loss -4.230723156929016 test_loss: -4.137945175170898
epoch: 89 training_loss -4.1734418249130245 test_loss: -4.246268463134766
epoch: 90 training_loss -4.150579421520233 test_loss: -4.280296707153321
epoch: 91 training_loss -4.190243003368377 test_loss: -4.054664611816406
epoch: 92 training_loss -4.202327914237976 test_loss: -4.303619766235352
epoch: 93 training_loss -4.2299485421180725 test_loss: -4.330341720581055
epoch: 94 training_loss -4.243626570701599 test_loss: -4.300850677490234
epoch: 95 training_loss -4.215706429481506 test_loss: -4.393764495849609
epoch: 96 training_loss -4.275804409980774 test_loss: -4.391406631469726
epoch: 97 training_loss -4.294521210193634 test_loss: -4.229341888427735
epoch: 98 training_loss -4.382183454036713 test_loss: -4.020468902587891
epoch: 99 training_loss -4.362633893489837 test_loss: -4.445872497558594
epoch: 100 training_loss -4.386110007762909 test_loss: -4.440428924560547
epoch: 101 training_loss -4.386514642238617 test_loss: -4.4492145538330075
epoch: 102 training_loss -4.45371780872345 test_loss: -4.410615158081055
epoch: 103 training_loss -4.394119691848755 test_loss: -4.268473434448242
epoch: 104 training_loss -4.484426805973053 test_loss: -4.455160522460938
epoch: 105 training_loss -4.482236361503601 test_loss: -4.481048965454102
epoch: 106 training_loss -4.472895925045013 test_loss: -4.448173141479492
epoch: 107 training_loss -4.499846706390381 test_loss: -4.55577507019043
epoch: 108 training_loss -4.478600468635559 test_loss: -4.60246467590332
epoch: 109 training_loss -4.514497876167297 test_loss: -4.476539611816406
epoch: 110 training_loss -4.540551347732544 test_loss: -4.56651496887207
epoch: 111 training_loss -4.584930462837219 test_loss: -4.647776031494141
epoch: 112 training_loss -4.574690768718719 test_loss: -4.510715484619141
epoch: 113 training_loss -4.607172441482544 test_loss: -4.771023941040039
epoch: 114 training_loss -4.5925468158721925 test_loss: -4.615034484863282
epoch: 115 training_loss -4.634644432067871 test_loss: -4.3908344268798825
epoch: 116 training_loss -4.667381987571717 test_loss: -4.7066600799560545
epoch: 117 training_loss -4.629678087234497 test_loss: -4.607949066162109
epoch: 118 training_loss -4.661640386581421 test_loss: -4.806973266601562
epoch: 119 training_loss -4.676750502586365 test_loss: -4.69951057434082
epoch: 120 training_loss -4.688137121200562 test_loss: -4.75416145324707
epoch: 121 training_loss -4.713397521972656 test_loss: -4.775869750976563
epoch: 122 training_loss -4.718177003860474 test_loss: -4.48009147644043
epoch: 123 training_loss -4.69373327255249 test_loss: -4.833065032958984
epoch: 124 training_loss -4.745082120895386 test_loss: -4.797422409057617
epoch: 125 training_loss -4.762668266296386 test_loss: -4.734047698974609
epoch: 126 training_loss -4.775373311042785 test_loss: -4.876242446899414
epoch: 127 training_loss -4.797985305786133 test_loss: -4.759194946289062
epoch: 128 training_loss -4.777786722183228 test_loss: -4.630421829223633
epoch: 129 training_loss -4.77972891330719 test_loss: -4.890025329589844
epoch: 130 training_loss -4.8444124031066895 test_loss: -4.841460037231445
epoch: 131 training_loss -4.879574875831604 test_loss: -4.920329284667969
epoch: 132 training_loss -4.8438116812706 test_loss: -4.895245742797852
epoch: 133 training_loss -4.922734336853027 test_loss: -4.957189559936523
epoch: 134 training_loss -4.918967471122742 test_loss: -4.8473762512207035
epoch: 135 training_loss -4.9367075490951535 test_loss: -4.9167743682861325
epoch: 136 training_loss -4.913701477050782 test_loss: -4.9809307098388675
epoch: 137 training_loss -4.934601984024048 test_loss: -4.849641418457031
epoch: 138 training_loss -4.945958948135376 test_loss: -4.9867706298828125
epoch: 139 training_loss -5.060384926795959 test_loss: -5.001888656616211
epoch: 140 training_loss -5.005881252288819 test_loss: -4.857550048828125
epoch: 141 training_loss -5.016420497894287 test_loss: -4.836038589477539
epoch: 142 training_loss -4.969723601341247 test_loss: -5.033938217163086
epoch: 143 training_loss -4.998162522315979 test_loss: -5.018717956542969
epoch: 144 training_loss -4.997577953338623 test_loss: -5.0962677001953125
epoch: 145 training_loss -5.071662230491638 test_loss: -5.151168441772461
epoch: 146 training_loss -5.0517924118041995 test_loss: -5.068017578125
epoch: 147 training_loss -5.101124510765076 test_loss: -4.958815383911133
epoch: 148 training_loss -5.126903781890869 test_loss: -5.124937057495117
epoch: 149 training_loss -5.104088706970215 test_loss: -5.187507629394531
13.523055012548468
episode: 0 training return: tensor(-3.0578e+10, device='cuda:0')
episode: 1 training return: tensor(-5.8357e+10, device='cuda:0')
episode: 2 training return: tensor(-3.7211e+10, device='cuda:0')
episode: 3 training return: tensor(-4.6304e+12, device='cuda:0')
epoch: 1 test_true_pfm: -36.79234443714561
episode: 4 training return: tensor(-3.3403e+09, device='cuda:0')
episode: 5 training return: tensor(-3.1869e+11, device='cuda:0')
episode: 6 training return: tensor(-1.4782e+09, device='cuda:0')
episode: 7 training return: tensor(-1.4124e+08, device='cuda:0')
epoch: 2 test_true_pfm: -21.960493547206113
episode: 8 training return: tensor(-8.8599e+09, device='cuda:0')
episode: 9 training return: tensor(-1.1518e+12, device='cuda:0')
episode: 10 training return: tensor(-1270529.3750, device='cuda:0')
episode: 11 training return: tensor(-748910.3750, device='cuda:0')
epoch: 3 test_true_pfm: -29.033900759011622
episode: 12 training return: tensor(-335653.2812, device='cuda:0')
episode: 13 training return: tensor(-299385.4688, device='cuda:0')
episode: 14 training return: tensor(-207032.1094, device='cuda:0')
episode: 15 training return: tensor(-921284.3125, device='cuda:0')
epoch: 4 test_true_pfm: -30.05919433369464
episode: 16 training return: tensor(-39053.5195, device='cuda:0')
episode: 17 training return: tensor(-27865.2715, device='cuda:0')
episode: 18 training return: tensor(-36765.8906, device='cuda:0')
episode: 19 training return: tensor(-61763.8242, device='cuda:0')
epoch: 5 test_true_pfm: -26.51719944493457
episode: 20 training return: tensor(-7980.0137, device='cuda:0')
episode: 21 training return: tensor(-7085.1055, device='cuda:0')
episode: 22 training return: tensor(-9057.6709, device='cuda:0')
episode: 23 training return: tensor(-10652.8906, device='cuda:0')
epoch: 6 test_true_pfm: -18.40317757585334
episode: 24 training return: tensor(-17733.3008, device='cuda:0')
episode: 25 training return: tensor(-7991.9985, device='cuda:0')
episode: 26 training return: tensor(-16805.6094, device='cuda:0')
episode: 27 training return: tensor(-11775.3457, device='cuda:0')
epoch: 7 test_true_pfm: -14.761916397098735
episode: 28 training return: tensor(-11830.2676, device='cuda:0')
episode: 29 training return: tensor(-9049.8369, device='cuda:0')
episode: 30 training return: tensor(-9211.8027, device='cuda:0')
episode: 31 training return: tensor(-6061.5444, device='cuda:0')
epoch: 8 test_true_pfm: -2.964587433969903
episode: 32 training return: tensor(-17132.8418, device='cuda:0')
episode: 33 training return: tensor(-12076.7100, device='cuda:0')
episode: 34 training return: tensor(-9338.7832, device='cuda:0')
episode: 35 training return: tensor(-19432.8320, device='cuda:0')
epoch: 9 test_true_pfm: -2.3293310012137525
episode: 36 training return: tensor(-1613.2146, device='cuda:0')
episode: 37 training return: tensor(-8764.7441, device='cuda:0')
episode: 38 training return: tensor(-1759.0726, device='cuda:0')
episode: 39 training return: tensor(-5300.6294, device='cuda:0')
epoch: 10 test_true_pfm: 2.957143058496139
episode: 40 training return: tensor(-6434.7207, device='cuda:0')
episode: 41 training return: tensor(-1510.5201, device='cuda:0')
episode: 42 training return: tensor(-1525.9390, device='cuda:0')
episode: 43 training return: tensor(-1523.9479, device='cuda:0')
epoch: 11 test_true_pfm: 6.578891126552243
episode: 44 training return: tensor(-1560.7328, device='cuda:0')
episode: 45 training return: tensor(-1616.2690, device='cuda:0')
episode: 46 training return: tensor(-40209.3828, device='cuda:0')
episode: 47 training return: tensor(-1422.7482, device='cuda:0')
epoch: 12 test_true_pfm: 7.45550723280572
episode: 48 training return: tensor(-1334.4004, device='cuda:0')
episode: 49 training return: tensor(-27933.9297, device='cuda:0')
episode: 50 training return: tensor(-49647.7031, device='cuda:0')
episode: 51 training return: tensor(-50661.9961, device='cuda:0')
epoch: 13 test_true_pfm: 14.29315799882489
episode: 52 training return: tensor(-27210.5703, device='cuda:0')
episode: 53 training return: tensor(-42516.5859, device='cuda:0')
episode: 54 training return: tensor(-13238.9834, device='cuda:0')
episode: 55 training return: tensor(-20219.0801, device='cuda:0')
epoch: 14 test_true_pfm: 0.41640162840794337
episode: 56 training return: tensor(-47490.6250, device='cuda:0')
episode: 57 training return: tensor(-20117.0938, device='cuda:0')
episode: 58 training return: tensor(-28289.0293, device='cuda:0')
episode: 59 training return: tensor(-2888.0176, device='cuda:0')
epoch: 15 test_true_pfm: -1.144409608723796
episode: 60 training return: tensor(-3725.2234, device='cuda:0')
episode: 61 training return: tensor(-65695.7891, device='cuda:0')
episode: 62 training return: tensor(-53401.0938, device='cuda:0')
episode: 63 training return: tensor(-9189.9932, device='cuda:0')
epoch: 16 test_true_pfm: -1.3325794498687666
episode: 64 training return: tensor(-42728.4492, device='cuda:0')
episode: 65 training return: tensor(-2396.7852, device='cuda:0')
episode: 66 training return: tensor(-3681.0620, device='cuda:0')
episode: 67 training return: tensor(-29527.3203, device='cuda:0')
epoch: 17 test_true_pfm: -1.339830082927921
episode: 68 training return: tensor(-12012.2236, device='cuda:0')
episode: 69 training return: tensor(-2477.8237, device='cuda:0')
episode: 70 training return: tensor(-3384.7837, device='cuda:0')
episode: 71 training return: tensor(-7716.0054, device='cuda:0')
epoch: 18 test_true_pfm: -11.737107378711325
episode: 72 training return: tensor(-4549.0620, device='cuda:0')
episode: 73 training return: tensor(-25243.6270, device='cuda:0')
episode: 74 training return: tensor(-7393.4619, device='cuda:0')
episode: 75 training return: tensor(-8997.2959, device='cuda:0')
epoch: 19 test_true_pfm: -5.8576536429113535
episode: 76 training return: tensor(-75668.5938, device='cuda:0')
episode: 77 training return: tensor(-52875.0039, device='cuda:0')
episode: 78 training return: tensor(-100775.9062, device='cuda:0')
episode: 79 training return: tensor(-22372.2676, device='cuda:0')
epoch: 20 test_true_pfm: -4.227436378061624
episode: 80 training return: tensor(-9106.6367, device='cuda:0')
episode: 81 training return: tensor(-48085.7656, device='cuda:0')
episode: 82 training return: tensor(-1926.4601, device='cuda:0')
episode: 83 training return: tensor(-19851.2441, device='cuda:0')
epoch: 21 test_true_pfm: -16.04498042606088
episode: 84 training return: tensor(-49945.1914, device='cuda:0')
episode: 85 training return: tensor(-1432.6089, device='cuda:0')
episode: 86 training return: tensor(-17825.7207, device='cuda:0')
episode: 87 training return: tensor(-42759.6406, device='cuda:0')
epoch: 22 test_true_pfm: -14.59400356828857
episode: 88 training return: tensor(-78881.5000, device='cuda:0')
episode: 89 training return: tensor(-40770.4180, device='cuda:0')
episode: 90 training return: tensor(-61174.7148, device='cuda:0')
episode: 91 training return: tensor(-17721.6426, device='cuda:0')
epoch: 23 test_true_pfm: -13.004601432355596
episode: 92 training return: tensor(-10712.2324, device='cuda:0')
episode: 93 training return: tensor(-1506.2863, device='cuda:0')
episode: 94 training return: tensor(-9208.5469, device='cuda:0')
episode: 95 training return: tensor(-18820.2559, device='cuda:0')
epoch: 24 test_true_pfm: -13.80130198515692
episode: 96 training return: tensor(-26724.6641, device='cuda:0')
episode: 97 training return: tensor(-52063.2266, device='cuda:0')
episode: 98 training return: tensor(-64885.7344, device='cuda:0')
episode: 99 training return: tensor(-2115.5850, device='cuda:0')
epoch: 25 test_true_pfm: -12.160041094190703
episode: 100 training return: tensor(-96107.7266, device='cuda:0')
episode: 101 training return: tensor(-136716.3750, device='cuda:0')
episode: 102 training return: tensor(-50460.4062, device='cuda:0')
episode: 103 training return: tensor(-17302.1699, device='cuda:0')
epoch: 26 test_true_pfm: -14.0065047890053
episode: 104 training return: tensor(-13250.0605, device='cuda:0')
episode: 105 training return: tensor(-116893.6719, device='cuda:0')
episode: 106 training return: tensor(-17726.3301, device='cuda:0')
episode: 107 training return: tensor(-2124.7278, device='cuda:0')
epoch: 27 test_true_pfm: -16.33230014700643
episode: 108 training return: tensor(-19369.6543, device='cuda:0')
episode: 109 training return: tensor(-43274.4727, device='cuda:0')
episode: 110 training return: tensor(-14878.3242, device='cuda:0')
episode: 111 training return: tensor(-4663.0303, device='cuda:0')
epoch: 28 test_true_pfm: -11.128898041009823
episode: 112 training return: tensor(-74681.7969, device='cuda:0')
episode: 113 training return: tensor(-15196.3076, device='cuda:0')
episode: 114 training return: tensor(-23043.5605, device='cuda:0')
episode: 115 training return: tensor(-10911.1787, device='cuda:0')
epoch: 29 test_true_pfm: -11.86877239869107
episode: 116 training return: tensor(-11804.7109, device='cuda:0')
episode: 117 training return: tensor(-236972.7188, device='cuda:0')
episode: 118 training return: tensor(-8217.7490, device='cuda:0')
episode: 119 training return: tensor(-84039.3203, device='cuda:0')
epoch: 30 test_true_pfm: -14.024832362850677
episode: 120 training return: tensor(-38055.2305, device='cuda:0')
episode: 121 training return: tensor(-19250.0273, device='cuda:0')
episode: 122 training return: tensor(-74896.9062, device='cuda:0')
episode: 123 training return: tensor(-52503.6016, device='cuda:0')
epoch: 31 test_true_pfm: -13.71066164899031
episode: 124 training return: tensor(-77444.4531, device='cuda:0')
episode: 125 training return: tensor(-1488.0479, device='cuda:0')
episode: 126 training return: tensor(-73841.0703, device='cuda:0')
episode: 127 training return: tensor(-1210.8956, device='cuda:0')
epoch: 32 test_true_pfm: -15.718680589476074
episode: 128 training return: tensor(-14326.5635, device='cuda:0')
episode: 129 training return: tensor(-8659.9893, device='cuda:0')
episode: 130 training return: tensor(-20196.0488, device='cuda:0')
episode: 131 training return: tensor(-55189.3516, device='cuda:0')
epoch: 33 test_true_pfm: -14.617170657507803
episode: 132 training return: tensor(-10803.2354, device='cuda:0')
episode: 133 training return: tensor(-37502.3945, device='cuda:0')
episode: 134 training return: tensor(-22720.9590, device='cuda:0')
episode: 135 training return: tensor(-15841.6328, device='cuda:0')
epoch: 34 test_true_pfm: -13.256816339467267
episode: 136 training return: tensor(-15195.4150, device='cuda:0')
episode: 137 training return: tensor(-14416.2920, device='cuda:0')
episode: 138 training return: tensor(-35711.9805, device='cuda:0')
episode: 139 training return: tensor(-19231.6270, device='cuda:0')
epoch: 35 test_true_pfm: -17.200716621527
episode: 140 training return: tensor(-63523.9961, device='cuda:0')
episode: 141 training return: tensor(-59924.7617, device='cuda:0')
episode: 142 training return: tensor(-12987.5000, device='cuda:0')
episode: 143 training return: tensor(-1426.3623, device='cuda:0')
epoch: 36 test_true_pfm: -16.129628355234377
episode: 144 training return: tensor(-156927.4531, device='cuda:0')
episode: 145 training return: tensor(-57859.6406, device='cuda:0')
episode: 146 training return: tensor(-36419.7266, device='cuda:0')
episode: 147 training return: tensor(-63227.6289, device='cuda:0')
epoch: 37 test_true_pfm: -12.11056823887433
episode: 148 training return: tensor(-24035.0664, device='cuda:0')
episode: 149 training return: tensor(-27494.0098, device='cuda:0')
episode: 150 training return: tensor(-90013.7188, device='cuda:0')
episode: 151 training return: tensor(-8422.4463, device='cuda:0')
epoch: 38 test_true_pfm: -15.18409154871496
episode: 152 training return: tensor(-89801.1172, device='cuda:0')
episode: 153 training return: tensor(-18882.3262, device='cuda:0')
episode: 154 training return: tensor(-25802.9570, device='cuda:0')
episode: 155 training return: tensor(-74473.4453, device='cuda:0')
epoch: 39 test_true_pfm: -18.105522126802242
episode: 156 training return: tensor(-89107.3516, device='cuda:0')
episode: 157 training return: tensor(-132789.2812, device='cuda:0')
episode: 158 training return: tensor(-19368.5586, device='cuda:0')
episode: 159 training return: tensor(-26171.1250, device='cuda:0')
epoch: 40 test_true_pfm: -16.003762608618718
episode: 160 training return: tensor(-64085.1445, device='cuda:0')
episode: 161 training return: tensor(-22801.2090, device='cuda:0')
episode: 162 training return: tensor(-28885.3926, device='cuda:0')
episode: 163 training return: tensor(-51680.9805, device='cuda:0')
epoch: 41 test_true_pfm: -14.91779719815734
episode: 164 training return: tensor(-38611.1953, device='cuda:0')
episode: 165 training return: tensor(-2504.6128, device='cuda:0')
episode: 166 training return: tensor(-5115.8096, device='cuda:0')
episode: 167 training return: tensor(-55570.4414, device='cuda:0')
epoch: 42 test_true_pfm: -15.812254630870148
episode: 168 training return: tensor(-174159.3750, device='cuda:0')
episode: 169 training return: tensor(-15609.8018, device='cuda:0')
episode: 170 training return: tensor(-17249.1914, device='cuda:0')
episode: 171 training return: tensor(-31380.7227, device='cuda:0')
epoch: 43 test_true_pfm: -15.93932411556283
episode: 172 training return: tensor(-28755.5703, device='cuda:0')
episode: 173 training return: tensor(-66676.0781, device='cuda:0')
episode: 174 training return: tensor(-19183.5957, device='cuda:0')
episode: 175 training return: tensor(-56317.0781, device='cuda:0')
epoch: 44 test_true_pfm: -13.313114603094158
episode: 176 training return: tensor(-71613.5625, device='cuda:0')
episode: 177 training return: tensor(-43459.0039, device='cuda:0')
episode: 178 training return: tensor(-27224.6426, device='cuda:0')
episode: 179 training return: tensor(-59096.8711, device='cuda:0')
epoch: 45 test_true_pfm: -16.550743120553918
episode: 180 training return: tensor(-5018.5430, device='cuda:0')
episode: 181 training return: tensor(-55238.0625, device='cuda:0')
episode: 182 training return: tensor(-39873.3750, device='cuda:0')
episode: 183 training return: tensor(-168632.1562, device='cuda:0')
epoch: 46 test_true_pfm: -15.100849271617681
episode: 184 training return: tensor(-120409.1172, device='cuda:0')
episode: 185 training return: tensor(-5472.1846, device='cuda:0')
episode: 186 training return: tensor(-17289.6680, device='cuda:0')
episode: 187 training return: tensor(-10006.2480, device='cuda:0')
epoch: 47 test_true_pfm: -19.941182493704538
episode: 188 training return: tensor(-19791.2930, device='cuda:0')
episode: 189 training return: tensor(-161454.9062, device='cuda:0')
episode: 190 training return: tensor(-23661.3125, device='cuda:0')
episode: 191 training return: tensor(-20156.2793, device='cuda:0')
epoch: 48 test_true_pfm: -16.268284577746872
episode: 192 training return: tensor(-8230.2002, device='cuda:0')
episode: 193 training return: tensor(-84004.9219, device='cuda:0')
episode: 194 training return: tensor(-16230.3545, device='cuda:0')
episode: 195 training return: tensor(-48462.3828, device='cuda:0')
epoch: 49 test_true_pfm: -14.146036987612671
episode: 196 training return: tensor(-24876.0410, device='cuda:0')
episode: 197 training return: tensor(-7037.5532, device='cuda:0')
episode: 198 training return: tensor(-8865.9102, device='cuda:0')
episode: 199 training return: tensor(-32222.8945, device='cuda:0')
epoch: 50 test_true_pfm: -14.952694664181944
episode: 200 training return: tensor(-55631.2539, device='cuda:0')
episode: 201 training return: tensor(-78421.2266, device='cuda:0')
episode: 202 training return: tensor(-141303.4375, device='cuda:0')
episode: 203 training return: tensor(-23453.9824, device='cuda:0')
epoch: 51 test_true_pfm: -16.468926472655987
episode: 204 training return: tensor(-112702.8828, device='cuda:0')
episode: 205 training return: tensor(-33147.3008, device='cuda:0')
episode: 206 training return: tensor(-37158.2227, device='cuda:0')
episode: 207 training return: tensor(-129545.7500, device='cuda:0')
epoch: 52 test_true_pfm: -18.4939693341636
episode: 208 training return: tensor(-88071.6953, device='cuda:0')
episode: 209 training return: tensor(-168111.7188, device='cuda:0')
episode: 210 training return: tensor(-191029.1875, device='cuda:0')
episode: 211 training return: tensor(-31775.0625, device='cuda:0')
epoch: 53 test_true_pfm: -18.190302892300373
episode: 212 training return: tensor(-90753.6562, device='cuda:0')
episode: 213 training return: tensor(-44624.0977, device='cuda:0')
episode: 214 training return: tensor(-80997.6953, device='cuda:0')
episode: 215 training return: tensor(-24648.5254, device='cuda:0')
epoch: 54 test_true_pfm: -17.563456852540646
episode: 216 training return: tensor(-13812.9668, device='cuda:0')
episode: 217 training return: tensor(-1922.6388, device='cuda:0')
episode: 218 training return: tensor(-1156.3832, device='cuda:0')
episode: 219 training return: tensor(-24343.2520, device='cuda:0')
epoch: 55 test_true_pfm: -15.645054509365599
episode: 220 training return: tensor(-38403.2305, device='cuda:0')
episode: 221 training return: tensor(-35984.6094, device='cuda:0')
episode: 222 training return: tensor(-149895.5000, device='cuda:0')
episode: 223 training return: tensor(-27321.3184, device='cuda:0')
epoch: 56 test_true_pfm: -17.93242965139918
episode: 224 training return: tensor(-94046.7031, device='cuda:0')
episode: 225 training return: tensor(-58150.0859, device='cuda:0')
episode: 226 training return: tensor(-48229.2617, device='cuda:0')
episode: 227 training return: tensor(-101792.3125, device='cuda:0')
epoch: 57 test_true_pfm: -16.30930003191413
episode: 228 training return: tensor(-104881.0625, device='cuda:0')
episode: 229 training return: tensor(-183084.0625, device='cuda:0')
episode: 230 training return: tensor(-2396.5286, device='cuda:0')
episode: 231 training return: tensor(-16013.7490, device='cuda:0')
epoch: 58 test_true_pfm: -14.389908019037438
episode: 232 training return: tensor(-3417.5444, device='cuda:0')
episode: 233 training return: tensor(-12742.2920, device='cuda:0')
episode: 234 training return: tensor(-86352.9453, device='cuda:0')
episode: 235 training return: tensor(-14011.8340, device='cuda:0')
epoch: 59 test_true_pfm: -16.09744907285824
episode: 236 training return: tensor(-21727.7520, device='cuda:0')
episode: 237 training return: tensor(-9545.1377, device='cuda:0')
episode: 238 training return: tensor(-69147.3203, device='cuda:0')
episode: 239 training return: tensor(-71011.3672, device='cuda:0')
epoch: 60 test_true_pfm: -14.672565459318122
episode: 240 training return: tensor(-9587.7646, device='cuda:0')
episode: 241 training return: tensor(-53868.2227, device='cuda:0')
episode: 242 training return: tensor(-26591.3242, device='cuda:0')
episode: 243 training return: tensor(-43835.5938, device='cuda:0')
epoch: 61 test_true_pfm: -17.472905974761584
episode: 244 training return: tensor(-30292.8242, device='cuda:0')
episode: 245 training return: tensor(-9978.0859, device='cuda:0')
episode: 246 training return: tensor(-6978.6367, device='cuda:0')
episode: 247 training return: tensor(-24722.7559, device='cuda:0')
epoch: 62 test_true_pfm: -11.453092373897917
episode: 248 training return: tensor(-123361.2891, device='cuda:0')
episode: 249 training return: tensor(-152712.6250, device='cuda:0')
episode: 250 training return: tensor(-22357.8066, device='cuda:0')
episode: 251 training return: tensor(-24047.8770, device='cuda:0')
epoch: 63 test_true_pfm: -15.717171103151667
episode: 252 training return: tensor(-14614.0986, device='cuda:0')
episode: 253 training return: tensor(-113364.1562, device='cuda:0')
episode: 254 training return: tensor(-98795.4922, device='cuda:0')
episode: 255 training return: tensor(-112015.7188, device='cuda:0')
epoch: 64 test_true_pfm: -16.18212962123689
episode: 256 training return: tensor(-30069.2715, device='cuda:0')
episode: 257 training return: tensor(-1612.5886, device='cuda:0')
episode: 258 training return: tensor(-28619.2109, device='cuda:0')
episode: 259 training return: tensor(-3691.6174, device='cuda:0')
epoch: 65 test_true_pfm: -16.461276717780816
episode: 260 training return: tensor(-18088.1367, device='cuda:0')
episode: 261 training return: tensor(-21634.5840, device='cuda:0')
episode: 262 training return: tensor(-2021.1473, device='cuda:0')
episode: 263 training return: tensor(-50149.4805, device='cuda:0')
epoch: 66 test_true_pfm: -15.170763987749774
episode: 264 training return: tensor(-78354.2500, device='cuda:0')
episode: 265 training return: tensor(-26209.3320, device='cuda:0')
episode: 266 training return: tensor(-20603.0430, device='cuda:0')
episode: 267 training return: tensor(-9511.0488, device='cuda:0')
epoch: 67 test_true_pfm: -14.646520849408507
episode: 268 training return: tensor(-39155.8047, device='cuda:0')
episode: 269 training return: tensor(-58966.9180, device='cuda:0')
episode: 270 training return: tensor(-2863.8901, device='cuda:0')
episode: 271 training return: tensor(-98054.1484, device='cuda:0')
epoch: 68 test_true_pfm: -17.32266018866561
episode: 272 training return: tensor(-22597.3301, device='cuda:0')
episode: 273 training return: tensor(-73312.4062, device='cuda:0')
episode: 274 training return: tensor(-4589.9194, device='cuda:0')
episode: 275 training return: tensor(-2186.4055, device='cuda:0')
epoch: 69 test_true_pfm: -15.250416007934524
episode: 276 training return: tensor(-42659.2930, device='cuda:0')
episode: 277 training return: tensor(-122526.2344, device='cuda:0')
episode: 278 training return: tensor(-75305.3672, device='cuda:0')
episode: 279 training return: tensor(-39220.2383, device='cuda:0')
epoch: 70 test_true_pfm: -19.12209270423588
episode: 280 training return: tensor(-8494.4580, device='cuda:0')
episode: 281 training return: tensor(-28083.8770, device='cuda:0')
episode: 282 training return: tensor(-125848.1562, device='cuda:0')
episode: 283 training return: tensor(-9336.0195, device='cuda:0')
epoch: 71 test_true_pfm: -16.95979768669796
episode: 284 training return: tensor(-21182.4258, device='cuda:0')
episode: 285 training return: tensor(-93771.0859, device='cuda:0')
episode: 286 training return: tensor(-20825.5762, device='cuda:0')
episode: 287 training return: tensor(-10112.4727, device='cuda:0')
epoch: 72 test_true_pfm: -16.95458260344651
episode: 288 training return: tensor(-74646.9453, device='cuda:0')
episode: 289 training return: tensor(-27780.2070, device='cuda:0')
episode: 290 training return: tensor(-181501.5625, device='cuda:0')
episode: 291 training return: tensor(-26207.4980, device='cuda:0')
epoch: 73 test_true_pfm: -15.223052078217808
episode: 292 training return: tensor(-34940.8984, device='cuda:0')
episode: 293 training return: tensor(-28588.6699, device='cuda:0')
episode: 294 training return: tensor(-182359.6562, device='cuda:0')
episode: 295 training return: tensor(-21674.1133, device='cuda:0')
epoch: 74 test_true_pfm: -13.112426848684104
episode: 296 training return: tensor(-72733.5703, device='cuda:0')
episode: 297 training return: tensor(-29186.6484, device='cuda:0')
episode: 298 training return: tensor(-31973.7461, device='cuda:0')
episode: 299 training return: tensor(-5355.5049, device='cuda:0')
epoch: 75 test_true_pfm: -16.487294656541597
episode: 300 training return: tensor(-45310.1172, device='cuda:0')
episode: 301 training return: tensor(-8053.0449, device='cuda:0')
episode: 302 training return: tensor(-129721.4922, device='cuda:0')
episode: 303 training return: tensor(-5701.9360, device='cuda:0')
epoch: 76 test_true_pfm: -17.50452183667506
episode: 304 training return: tensor(-125002.3516, device='cuda:0')
episode: 305 training return: tensor(-23093.1094, device='cuda:0')
episode: 306 training return: tensor(-162518.2812, device='cuda:0')
episode: 307 training return: tensor(-34336.7344, device='cuda:0')
epoch: 77 test_true_pfm: -18.841321560179658
episode: 308 training return: tensor(-28875.7266, device='cuda:0')
episode: 309 training return: tensor(-115165.5547, device='cuda:0')
episode: 310 training return: tensor(-45997.0078, device='cuda:0')
episode: 311 training return: tensor(-4702.5361, device='cuda:0')
epoch: 78 test_true_pfm: -18.93154073496948
episode: 312 training return: tensor(-132350.3281, device='cuda:0')
episode: 313 training return: tensor(-44060.0703, device='cuda:0')
episode: 314 training return: tensor(-21027.5195, device='cuda:0')
episode: 315 training return: tensor(-6572.3833, device='cuda:0')
epoch: 79 test_true_pfm: -16.535554302161124
episode: 316 training return: tensor(-84906.1406, device='cuda:0')
episode: 317 training return: tensor(-40153.8906, device='cuda:0')
episode: 318 training return: tensor(-151056.2344, device='cuda:0')
episode: 319 training return: tensor(-81835.0625, device='cuda:0')
epoch: 80 test_true_pfm: -17.66585201496263
episode: 320 training return: tensor(-233040.6875, device='cuda:0')
episode: 321 training return: tensor(-46871.5820, device='cuda:0')
episode: 322 training return: tensor(-54813.7695, device='cuda:0')
episode: 323 training return: tensor(-8473.3799, device='cuda:0')
epoch: 81 test_true_pfm: -18.860030166366393
episode: 324 training return: tensor(-41271.6680, device='cuda:0')
episode: 325 training return: tensor(-45926.8164, device='cuda:0')
episode: 326 training return: tensor(-50124.8359, device='cuda:0')
episode: 327 training return: tensor(-35816.9102, device='cuda:0')
epoch: 82 test_true_pfm: -14.430185630844306
episode: 328 training return: tensor(-140274.6250, device='cuda:0')
episode: 329 training return: tensor(-1441.8906, device='cuda:0')
episode: 330 training return: tensor(-42294.7891, device='cuda:0')
episode: 331 training return: tensor(-44658.1133, device='cuda:0')
epoch: 83 test_true_pfm: -18.259924213112747
episode: 332 training return: tensor(-150733.3750, device='cuda:0')
episode: 333 training return: tensor(-84162.2422, device='cuda:0')
episode: 334 training return: tensor(-35515.3398, device='cuda:0')
episode: 335 training return: tensor(-71861.2578, device='cuda:0')
epoch: 84 test_true_pfm: -18.588162937565055
episode: 336 training return: tensor(-36351.7383, device='cuda:0')
episode: 337 training return: tensor(-77835.5469, device='cuda:0')
episode: 338 training return: tensor(-11467.8525, device='cuda:0')
episode: 339 training return: tensor(-52222.8945, device='cuda:0')
epoch: 85 test_true_pfm: -16.01156706547209
episode: 340 training return: tensor(-68480.4219, device='cuda:0')
episode: 341 training return: tensor(-118611.8672, device='cuda:0')
episode: 342 training return: tensor(-100494.8047, device='cuda:0')
episode: 343 training return: tensor(-93523.3516, device='cuda:0')
epoch: 86 test_true_pfm: -16.694158963127542
episode: 344 training return: tensor(-14050.4453, device='cuda:0')
episode: 345 training return: tensor(-26557.1523, device='cuda:0')
episode: 346 training return: tensor(-30400.2344, device='cuda:0')
episode: 347 training return: tensor(-100242.4375, device='cuda:0')
epoch: 87 test_true_pfm: -16.90250664239128
episode: 348 training return: tensor(-30293.6602, device='cuda:0')
episode: 349 training return: tensor(-55927.8008, device='cuda:0')
episode: 350 training return: tensor(-75936.1094, device='cuda:0')
episode: 351 training return: tensor(-13009.3438, device='cuda:0')
epoch: 88 test_true_pfm: -16.27986679194358
episode: 352 training return: tensor(-60585.4883, device='cuda:0')
episode: 353 training return: tensor(-21545.7070, device='cuda:0')
episode: 354 training return: tensor(-20352.8867, device='cuda:0')
episode: 355 training return: tensor(-83354.8281, device='cuda:0')
epoch: 89 test_true_pfm: -15.936191151229838
episode: 356 training return: tensor(-223440.9844, device='cuda:0')
episode: 357 training return: tensor(-97066.0078, device='cuda:0')
episode: 358 training return: tensor(-17430.6074, device='cuda:0')
episode: 359 training return: tensor(-24348.3848, device='cuda:0')
epoch: 90 test_true_pfm: -15.65464107272983
episode: 360 training return: tensor(-20546.4375, device='cuda:0')
episode: 361 training return: tensor(-83756.1797, device='cuda:0')
episode: 362 training return: tensor(-3461.0642, device='cuda:0')
episode: 363 training return: tensor(-21115.2715, device='cuda:0')
epoch: 91 test_true_pfm: -17.033635666027322
episode: 364 training return: tensor(-61482.4141, device='cuda:0')
episode: 365 training return: tensor(-94536.4609, device='cuda:0')
episode: 366 training return: tensor(-16183.6182, device='cuda:0')
episode: 367 training return: tensor(-60472.4492, device='cuda:0')
epoch: 92 test_true_pfm: -17.809021641076924
episode: 368 training return: tensor(-51200.1836, device='cuda:0')
episode: 369 training return: tensor(-54945.1055, device='cuda:0')
episode: 370 training return: tensor(-31071.1465, device='cuda:0')
episode: 371 training return: tensor(-27171.1699, device='cuda:0')
epoch: 93 test_true_pfm: -14.067892816868024
episode: 372 training return: tensor(-21647.4062, device='cuda:0')
episode: 373 training return: tensor(-11715.4902, device='cuda:0')
episode: 374 training return: tensor(-20223.9551, device='cuda:0')
episode: 375 training return: tensor(-62969.6836, device='cuda:0')
epoch: 94 test_true_pfm: -17.4193011130468
episode: 376 training return: tensor(-47839.6641, device='cuda:0')
episode: 377 training return: tensor(-26585.0918, device='cuda:0')
episode: 378 training return: tensor(-167425.5156, device='cuda:0')
episode: 379 training return: tensor(-70136.5156, device='cuda:0')
epoch: 95 test_true_pfm: -18.168359122663794
episode: 380 training return: tensor(-34302.3320, device='cuda:0')
episode: 381 training return: tensor(-25862., device='cuda:0')
episode: 382 training return: tensor(-115330.3203, device='cuda:0')
episode: 383 training return: tensor(-117617.9141, device='cuda:0')
epoch: 96 test_true_pfm: -20.29218485858058
episode: 384 training return: tensor(-77001.6250, device='cuda:0')
episode: 385 training return: tensor(-132112.9844, device='cuda:0')
episode: 386 training return: tensor(-43615.3984, device='cuda:0')
episode: 387 training return: tensor(-20166.7168, device='cuda:0')
epoch: 97 test_true_pfm: -18.81953181956477
episode: 388 training return: tensor(-190152.9375, device='cuda:0')
episode: 389 training return: tensor(-17053.4238, device='cuda:0')
episode: 390 training return: tensor(-149295.8438, device='cuda:0')
episode: 391 training return: tensor(-40224.3867, device='cuda:0')
epoch: 98 test_true_pfm: -13.605241856220498
episode: 392 training return: tensor(-45025.7656, device='cuda:0')
episode: 393 training return: tensor(-21277.0508, device='cuda:0')
episode: 394 training return: tensor(-17518.7637, device='cuda:0')
episode: 395 training return: tensor(-25731.4160, device='cuda:0')
epoch: 99 test_true_pfm: -14.779522601659897
episode: 396 training return: tensor(-135148.1875, device='cuda:0')
episode: 397 training return: tensor(-100832.2266, device='cuda:0')
episode: 398 training return: tensor(-122318.6797, device='cuda:0')
episode: 399 training return: tensor(-10728.5342, device='cuda:0')
epoch: 100 test_true_pfm: -18.983231199251424
episode: 400 training return: tensor(-122220.5781, device='cuda:0')
episode: 401 training return: tensor(-63301.0117, device='cuda:0')
episode: 402 training return: tensor(-177959.9531, device='cuda:0')
episode: 403 training return: tensor(-42817.4297, device='cuda:0')
epoch: 101 test_true_pfm: -18.121073511565776
episode: 404 training return: tensor(-27137.5059, device='cuda:0')
episode: 405 training return: tensor(-18139.7676, device='cuda:0')
episode: 406 training return: tensor(-49205.2188, device='cuda:0')
episode: 407 training return: tensor(-31947.1152, device='cuda:0')
epoch: 102 test_true_pfm: -18.747425530517642
episode: 408 training return: tensor(-98948.3047, device='cuda:0')
episode: 409 training return: tensor(-45441.8750, device='cuda:0')
episode: 410 training return: tensor(-71506.5469, device='cuda:0')
episode: 411 training return: tensor(-52579.6992, device='cuda:0')
epoch: 103 test_true_pfm: -18.869907495216438
episode: 412 training return: tensor(-46160.1055, device='cuda:0')
episode: 413 training return: tensor(-19298.1211, device='cuda:0')
episode: 414 training return: tensor(-45413.8516, device='cuda:0')
episode: 415 training return: tensor(-22076.1426, device='cuda:0')
epoch: 104 test_true_pfm: -18.30497601113317
episode: 416 training return: tensor(-9234.5059, device='cuda:0')
episode: 417 training return: tensor(-3821.9124, device='cuda:0')
episode: 418 training return: tensor(-62068.1172, device='cuda:0')
episode: 419 training return: tensor(-72621.3359, device='cuda:0')
epoch: 105 test_true_pfm: -17.861509462865445
episode: 420 training return: tensor(-53302.4922, device='cuda:0')
episode: 421 training return: tensor(-58277.9141, device='cuda:0')
episode: 422 training return: tensor(-22611.1602, device='cuda:0')
episode: 423 training return: tensor(-75212.1094, device='cuda:0')
epoch: 106 test_true_pfm: -14.391396364451177
episode: 424 training return: tensor(-153181.9531, device='cuda:0')
episode: 425 training return: tensor(-14529.2236, device='cuda:0')
episode: 426 training return: tensor(-24309.4277, device='cuda:0')
episode: 427 training return: tensor(-20940.2891, device='cuda:0')
epoch: 107 test_true_pfm: -17.98045233400667
episode: 428 training return: tensor(-114580.4609, device='cuda:0')
episode: 429 training return: tensor(-26798.7578, device='cuda:0')
episode: 430 training return: tensor(-27530.2578, device='cuda:0')
episode: 431 training return: tensor(-37961.5117, device='cuda:0')
epoch: 108 test_true_pfm: -17.910598934335262
episode: 432 training return: tensor(-51338.6133, device='cuda:0')
episode: 433 training return: tensor(-103056.5859, device='cuda:0')
episode: 434 training return: tensor(-42869.5938, device='cuda:0')
episode: 435 training return: tensor(-19570.3691, device='cuda:0')
epoch: 109 test_true_pfm: -20.686394935412835
episode: 436 training return: tensor(-20590.3809, device='cuda:0')
episode: 437 training return: tensor(-23624.9805, device='cuda:0')
episode: 438 training return: tensor(-23719.5332, device='cuda:0')
episode: 439 training return: tensor(-29029.1973, device='cuda:0')
epoch: 110 test_true_pfm: -17.18248353339955
episode: 440 training return: tensor(-11260.6660, device='cuda:0')
episode: 441 training return: tensor(-7095.4233, device='cuda:0')
episode: 442 training return: tensor(-21308.7188, device='cuda:0')
episode: 443 training return: tensor(-105902.3281, device='cuda:0')
epoch: 111 test_true_pfm: -19.482572439787603
episode: 444 training return: tensor(-48507.6758, device='cuda:0')
episode: 445 training return: tensor(-33288.9648, device='cuda:0')
episode: 446 training return: tensor(-34291.0977, device='cuda:0')
episode: 447 training return: tensor(-55910.6172, device='cuda:0')
epoch: 112 test_true_pfm: -18.60085006589048
episode: 448 training return: tensor(-38275.3086, device='cuda:0')
episode: 449 training return: tensor(-38557.5117, device='cuda:0')
episode: 450 training return: tensor(-65012.4258, device='cuda:0')
episode: 451 training return: tensor(-78767.5938, device='cuda:0')
epoch: 113 test_true_pfm: -18.05449739500264
episode: 452 training return: tensor(-25531.8262, device='cuda:0')
episode: 453 training return: tensor(-24189.7305, device='cuda:0')
episode: 454 training return: tensor(-43489.1797, device='cuda:0')
episode: 455 training return: tensor(-94806.8047, device='cuda:0')
epoch: 114 test_true_pfm: -18.06113391281981
episode: 456 training return: tensor(-38145.1680, device='cuda:0')
episode: 457 training return: tensor(-37487.0820, device='cuda:0')
episode: 458 training return: tensor(-18938.5215, device='cuda:0')
episode: 459 training return: tensor(-11112.3135, device='cuda:0')
epoch: 115 test_true_pfm: -15.422141715458144
episode: 460 training return: tensor(-39681.9102, device='cuda:0')
episode: 461 training return: tensor(-9018.9893, device='cuda:0')
episode: 462 training return: tensor(-211234.0156, device='cuda:0')
episode: 463 training return: tensor(-12175.2617, device='cuda:0')
epoch: 116 test_true_pfm: -15.961965371315358
episode: 464 training return: tensor(-31997.8984, device='cuda:0')
episode: 465 training return: tensor(-173397.1250, device='cuda:0')
episode: 466 training return: tensor(-19585.2168, device='cuda:0')
episode: 467 training return: tensor(-37817.7773, device='cuda:0')
epoch: 117 test_true_pfm: -16.934106115970035
episode: 468 training return: tensor(-96982.4688, device='cuda:0')
episode: 469 training return: tensor(-132704.9531, device='cuda:0')
episode: 470 training return: tensor(-10396.8750, device='cuda:0')
episode: 471 training return: tensor(-15586.0576, device='cuda:0')
epoch: 118 test_true_pfm: -17.170361880528617
episode: 472 training return: tensor(-15843.4004, device='cuda:0')
episode: 473 training return: tensor(-75977.7422, device='cuda:0')
episode: 474 training return: tensor(-89018.9297, device='cuda:0')
episode: 475 training return: tensor(-102793.6172, device='cuda:0')
epoch: 119 test_true_pfm: -18.82493691889594
episode: 476 training return: tensor(-118947.8750, device='cuda:0')
episode: 477 training return: tensor(-10173.2686, device='cuda:0')
episode: 478 training return: tensor(-27883.5840, device='cuda:0')
episode: 479 training return: tensor(-41448.8281, device='cuda:0')
epoch: 120 test_true_pfm: -15.025286075739222
episode: 480 training return: tensor(-159795.3281, device='cuda:0')
episode: 481 training return: tensor(-146622., device='cuda:0')
episode: 482 training return: tensor(-19775.7168, device='cuda:0')
episode: 483 training return: tensor(-169622.2656, device='cuda:0')
epoch: 121 test_true_pfm: -20.28242483891291
episode: 484 training return: tensor(-122459.6484, device='cuda:0')
episode: 485 training return: tensor(-72519.5234, device='cuda:0')
episode: 486 training return: tensor(-36938.4258, device='cuda:0')
episode: 487 training return: tensor(-65752.8828, device='cuda:0')
epoch: 122 test_true_pfm: -19.220038486216033
episode: 488 training return: tensor(-22091.3125, device='cuda:0')
episode: 489 training return: tensor(-55963.4375, device='cuda:0')
episode: 490 training return: tensor(-129541.0625, device='cuda:0')
episode: 491 training return: tensor(-146113.4219, device='cuda:0')
epoch: 123 test_true_pfm: -16.895396817721043
episode: 492 training return: tensor(-18671.8125, device='cuda:0')
episode: 493 training return: tensor(-57462.8867, device='cuda:0')
episode: 494 training return: tensor(-30440.3477, device='cuda:0')
episode: 495 training return: tensor(-60427.5039, device='cuda:0')
epoch: 124 test_true_pfm: -16.571955050979874
episode: 496 training return: tensor(-52951.6992, device='cuda:0')
episode: 497 training return: tensor(-86638.8672, device='cuda:0')
episode: 498 training return: tensor(-257241.3125, device='cuda:0')
episode: 499 training return: tensor(-24675.8984, device='cuda:0')
epoch: 125 test_true_pfm: -21.451721479845208
episode: 500 training return: tensor(-86218.5938, device='cuda:0')
episode: 501 training return: tensor(-97836.0469, device='cuda:0')
episode: 502 training return: tensor(-34584.5898, device='cuda:0')
episode: 503 training return: tensor(-65156.3477, device='cuda:0')
epoch: 126 test_true_pfm: -19.074375566286143
episode: 504 training return: tensor(-52680.7461, device='cuda:0')
episode: 505 training return: tensor(-78746.2422, device='cuda:0')
episode: 506 training return: tensor(-28821.2754, device='cuda:0')
episode: 507 training return: tensor(-106518.1406, device='cuda:0')
epoch: 127 test_true_pfm: -19.61668930123319
episode: 508 training return: tensor(-30994.4414, device='cuda:0')
episode: 509 training return: tensor(-42051.4844, device='cuda:0')
episode: 510 training return: tensor(-124497.3750, device='cuda:0')
episode: 511 training return: tensor(-139179.4062, device='cuda:0')
epoch: 128 test_true_pfm: -19.941156885002933
episode: 512 training return: tensor(-254110.5156, device='cuda:0')
episode: 513 training return: tensor(-64247.6484, device='cuda:0')
episode: 514 training return: tensor(-31253.9883, device='cuda:0')
episode: 515 training return: tensor(-89523.9453, device='cuda:0')
epoch: 129 test_true_pfm: -18.119377065463986
episode: 516 training return: tensor(-27087.9375, device='cuda:0')
episode: 517 training return: tensor(-20427.2188, device='cuda:0')
episode: 518 training return: tensor(-31810.3535, device='cuda:0')
episode: 519 training return: tensor(-56621.1875, device='cuda:0')
epoch: 130 test_true_pfm: -18.69436287888371
episode: 520 training return: tensor(-78631.8750, device='cuda:0')
episode: 521 training return: tensor(-61467.7227, device='cuda:0')
episode: 522 training return: tensor(-15009.0742, device='cuda:0')
episode: 523 training return: tensor(-114454.2812, device='cuda:0')
epoch: 131 test_true_pfm: -18.96159226010522
episode: 524 training return: tensor(-113435.5703, device='cuda:0')
episode: 525 training return: tensor(-31275.8965, device='cuda:0')
episode: 526 training return: tensor(-9174.1045, device='cuda:0')
episode: 527 training return: tensor(-2725787.5000, device='cuda:0')
epoch: 132 test_true_pfm: -16.788256694149403
episode: 528 training return: tensor(-18698.1309, device='cuda:0')
episode: 529 training return: tensor(-28781.5801, device='cuda:0')
episode: 530 training return: tensor(-114754.6484, device='cuda:0')
episode: 531 training return: tensor(-31178.3477, device='cuda:0')
epoch: 133 test_true_pfm: -17.423982964487262
episode: 532 training return: tensor(-23139.4258, device='cuda:0')
episode: 533 training return: tensor(-18659.1270, device='cuda:0')
episode: 534 training return: tensor(-21692.2734, device='cuda:0')
episode: 535 training return: tensor(-27823.1367, device='cuda:0')
epoch: 134 test_true_pfm: -20.091897426569442
episode: 536 training return: tensor(-30966.4980, device='cuda:0')
episode: 537 training return: tensor(-29609.9238, device='cuda:0')
episode: 538 training return: tensor(-22034.3906, device='cuda:0')
episode: 539 training return: tensor(-91775.0078, device='cuda:0')
epoch: 135 test_true_pfm: -18.408941810616717
episode: 540 training return: tensor(-23018.7852, device='cuda:0')
episode: 541 training return: tensor(-40266.8242, device='cuda:0')
episode: 542 training return: tensor(-79063.2812, device='cuda:0')
episode: 543 training return: tensor(-140443.5156, device='cuda:0')
epoch: 136 test_true_pfm: -18.434803969441056
episode: 544 training return: tensor(-24588.4746, device='cuda:0')
episode: 545 training return: tensor(-31113.4121, device='cuda:0')
episode: 546 training return: tensor(-49541.4961, device='cuda:0')
episode: 547 training return: tensor(-43909.6953, device='cuda:0')
epoch: 137 test_true_pfm: -12.044872485865756
episode: 548 training return: tensor(-14458.2832, device='cuda:0')
episode: 549 training return: tensor(-108357.2422, device='cuda:0')
episode: 550 training return: tensor(-29530.1953, device='cuda:0')
episode: 551 training return: tensor(-389659.6250, device='cuda:0')
epoch: 138 test_true_pfm: -17.98918116534562
episode: 552 training return: tensor(-54833.2969, device='cuda:0')
episode: 553 training return: tensor(-14131.2197, device='cuda:0')
episode: 554 training return: tensor(-81116.3906, device='cuda:0')
episode: 555 training return: tensor(-156653.2969, device='cuda:0')
epoch: 139 test_true_pfm: -20.219619429102266
episode: 556 training return: tensor(-43774.8672, device='cuda:0')
episode: 557 training return: tensor(-100552.3203, device='cuda:0')
episode: 558 training return: tensor(-23218930., device='cuda:0')
episode: 559 training return: tensor(-14871.7090, device='cuda:0')
epoch: 140 test_true_pfm: -10.830234659158696
episode: 560 training return: tensor(-51356.1406, device='cuda:0')
episode: 561 training return: tensor(-62988.3867, device='cuda:0')
episode: 562 training return: tensor(-182375.2031, device='cuda:0')
episode: 563 training return: tensor(-157090.6094, device='cuda:0')
epoch: 141 test_true_pfm: -20.152596812276737
episode: 564 training return: tensor(-97254.4062, device='cuda:0')
episode: 565 training return: tensor(-49408.6133, device='cuda:0')
episode: 566 training return: tensor(-73856.9375, device='cuda:0')
episode: 567 training return: tensor(-56366.8359, device='cuda:0')
epoch: 142 test_true_pfm: -17.989860605575437
episode: 568 training return: tensor(-98230.2656, device='cuda:0')
episode: 569 training return: tensor(-46229.3008, device='cuda:0')
episode: 570 training return: tensor(-73027.4141, device='cuda:0')
episode: 571 training return: tensor(-44845.4258, device='cuda:0')
epoch: 143 test_true_pfm: -16.430148346235306
episode: 572 training return: tensor(-17550.9727, device='cuda:0')
episode: 573 training return: tensor(-46567.8164, device='cuda:0')
episode: 574 training return: tensor(-72316.0312, device='cuda:0')
episode: 575 training return: tensor(-160633.4688, device='cuda:0')
epoch: 144 test_true_pfm: -18.896238028997693
episode: 576 training return: tensor(-251682.6250, device='cuda:0')
episode: 577 training return: tensor(-194565.3125, device='cuda:0')
episode: 578 training return: tensor(-98719.0781, device='cuda:0')
episode: 579 training return: tensor(-10683.2197, device='cuda:0')
epoch: 145 test_true_pfm: -13.36668611126324
episode: 580 training return: tensor(-10566.9150, device='cuda:0')
episode: 581 training return: tensor(-96407.8594, device='cuda:0')
episode: 582 training return: tensor(-282542.1562, device='cuda:0')
episode: 583 training return: tensor(-4861658.5000, device='cuda:0')
epoch: 146 test_true_pfm: -13.019336954330138
episode: 584 training return: tensor(-89862.4766, device='cuda:0')
episode: 585 training return: tensor(-3377614.7500, device='cuda:0')
episode: 586 training return: tensor(-41474.8984, device='cuda:0')
episode: 587 training return: tensor(-36812.2930, device='cuda:0')
epoch: 147 test_true_pfm: -16.068393267135477
episode: 588 training return: tensor(-42478.1602, device='cuda:0')
episode: 589 training return: tensor(-2589883., device='cuda:0')
episode: 590 training return: tensor(-211632.0938, device='cuda:0')
episode: 591 training return: tensor(-24886.4375, device='cuda:0')
epoch: 148 test_true_pfm: -20.204983447149043
episode: 592 training return: tensor(-43042.4219, device='cuda:0')
episode: 593 training return: tensor(-54816.2070, device='cuda:0')
episode: 594 training return: tensor(-18710.6211, device='cuda:0')
episode: 595 training return: tensor(-78738.6953, device='cuda:0')
epoch: 149 test_true_pfm: -12.18128825822706
episode: 596 training return: tensor(-55616.5547, device='cuda:0')
episode: 597 training return: tensor(-31378.4551, device='cuda:0')
episode: 598 training return: tensor(-105191.1484, device='cuda:0')
episode: 599 training return: tensor(-35700.1758, device='cuda:0')
epoch: 150 test_true_pfm: -11.907894116874994
