['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'brac', '--traj', 'expert', '--seed', '4', '--data', '100000']
epoch: 0 training_loss 4.522505516037345 test_loss: -0.042923504114151
epoch: 1 training_loss -1.484318223912269 test_loss: -2.3623687744140627
epoch: 2 training_loss -3.2794971668720247 test_loss: -3.995884323120117
epoch: 3 training_loss -4.160109903812408 test_loss: -4.540119171142578
epoch: 4 training_loss -5.200511355400085 test_loss: -5.62475814819336
epoch: 5 training_loss -5.869714474678039 test_loss: -6.182232284545899
epoch: 6 training_loss -6.279751524925232 test_loss: -6.511294555664063
epoch: 7 training_loss -6.96940505027771 test_loss: -7.175630187988281
epoch: 8 training_loss -7.39253104686737 test_loss: -7.641867828369141
epoch: 9 training_loss -7.734570236206054 test_loss: -7.815635681152344
epoch: 10 training_loss -8.0930513048172 test_loss: -8.420148468017578
epoch: 11 training_loss -8.425941367149353 test_loss: -8.542213439941406
epoch: 12 training_loss -8.697284789085389 test_loss: -8.598441314697265
epoch: 13 training_loss -8.972367916107178 test_loss: -9.081761932373047
epoch: 14 training_loss -9.227649030685425 test_loss: -9.415589141845704
epoch: 15 training_loss -9.490591650009156 test_loss: -9.722806549072265
epoch: 16 training_loss -9.661657161712647 test_loss: -9.674853515625
epoch: 17 training_loss -9.844074726104736 test_loss: -9.911541748046876
epoch: 18 training_loss -10.062853965759277 test_loss: -10.093077850341796
epoch: 19 training_loss -10.318054666519165 test_loss: -10.399072265625
epoch: 20 training_loss -10.394887762069702 test_loss: -10.349150848388671
epoch: 21 training_loss -10.548165483474731 test_loss: -10.495987701416016
epoch: 22 training_loss -10.603048944473267 test_loss: -10.688656616210938
epoch: 23 training_loss -10.87449179649353 test_loss: -10.925833129882813
epoch: 24 training_loss -10.890836267471313 test_loss: -10.834055328369141
epoch: 25 training_loss -11.100229063034057 test_loss: -11.102323150634765
epoch: 26 training_loss -11.220675611495972 test_loss: -11.34664306640625
epoch: 27 training_loss -11.381204652786256 test_loss: -11.41277847290039
epoch: 28 training_loss -11.419421901702881 test_loss: -11.618827056884765
epoch: 29 training_loss -11.576072406768798 test_loss: -11.750496673583985
epoch: 30 training_loss -11.771612854003907 test_loss: -11.526216888427735
epoch: 31 training_loss -11.809916973114014 test_loss: -11.764640045166015
epoch: 32 training_loss -11.823475465774536 test_loss: -11.825091552734374
epoch: 33 training_loss -12.011631345748901 test_loss: -11.898728179931641
epoch: 34 training_loss -12.054348373413086 test_loss: -12.053090667724609
epoch: 35 training_loss -12.221165170669556 test_loss: -12.116260528564453
epoch: 36 training_loss -12.262110052108765 test_loss: -12.135958099365235
epoch: 37 training_loss -12.252053050994872 test_loss: -12.400113677978515
epoch: 38 training_loss -12.45551362991333 test_loss: -12.559218597412109
epoch: 39 training_loss -12.513514156341552 test_loss: -12.491836547851562
epoch: 40 training_loss -12.593163690567017 test_loss: -12.539899444580078
epoch: 41 training_loss -12.659323968887328 test_loss: -12.510186767578125
epoch: 42 training_loss -12.735143795013428 test_loss: -12.845046997070312
epoch: 43 training_loss -12.795410137176514 test_loss: -12.679662322998047
epoch: 44 training_loss -12.964952688217164 test_loss: -12.816275024414063
epoch: 45 training_loss -12.867046899795533 test_loss: -13.050877380371094
epoch: 46 training_loss -13.051378631591797 test_loss: -12.913095092773437
epoch: 47 training_loss -13.031697883605958 test_loss: -13.012136840820313
epoch: 48 training_loss -13.158556060791016 test_loss: -13.144085693359376
epoch: 49 training_loss -13.18984432220459 test_loss: -13.268251037597656
epoch: 50 training_loss -13.302122392654418 test_loss: -13.32861785888672
epoch: 51 training_loss -13.213838672637939 test_loss: -13.222509765625
epoch: 52 training_loss -13.342921657562256 test_loss: -13.348486328125
epoch: 53 training_loss -13.337107954025269 test_loss: -13.334375
epoch: 54 training_loss -13.462455825805664 test_loss: -13.333477783203126
epoch: 55 training_loss -13.453942594528199 test_loss: -13.520857238769532
epoch: 56 training_loss -13.640525274276733 test_loss: -13.482260131835938
epoch: 57 training_loss -13.606415157318116 test_loss: -13.410189819335937
epoch: 58 training_loss -13.655065746307374 test_loss: -13.719511413574219
epoch: 59 training_loss -13.61628116607666 test_loss: -13.711283874511718
epoch: 60 training_loss -13.775565071105957 test_loss: -13.634858703613281
epoch: 61 training_loss -13.740998086929322 test_loss: -13.764971923828124
epoch: 62 training_loss -13.725552253723144 test_loss: -13.789775085449218
epoch: 63 training_loss -13.79901647567749 test_loss: -13.758201599121094
epoch: 64 training_loss -13.851018381118774 test_loss: -13.826475524902344
epoch: 65 training_loss -13.898944339752198 test_loss: -13.782417297363281
epoch: 66 training_loss -13.85088719367981 test_loss: -14.048129272460937
epoch: 67 training_loss -13.947781505584716 test_loss: -14.100479125976562
epoch: 68 training_loss -13.897060403823852 test_loss: -13.918017578125
epoch: 69 training_loss -13.917085275650024 test_loss: -14.15550537109375
epoch: 70 training_loss -13.970369100570679 test_loss: -13.9073486328125
epoch: 71 training_loss -14.048002939224244 test_loss: -14.098959350585938
epoch: 72 training_loss -14.140541725158691 test_loss: -14.260215759277344
epoch: 73 training_loss -14.105331115722656 test_loss: -14.230805969238281
epoch: 74 training_loss -14.159402751922608 test_loss: -14.104769897460937
epoch: 75 training_loss -14.141656131744385 test_loss: -14.023786926269532
epoch: 76 training_loss -14.203238592147827 test_loss: -14.187222290039063
epoch: 77 training_loss -14.247482500076295 test_loss: -14.304801940917969
epoch: 78 training_loss -14.23129487991333 test_loss: -14.181558227539062
epoch: 79 training_loss -14.297564563751221 test_loss: -14.292861938476562
epoch: 80 training_loss -14.298721055984497 test_loss: -14.255599975585938
epoch: 81 training_loss -14.302242460250854 test_loss: -14.202206420898438
epoch: 82 training_loss -14.331236171722413 test_loss: -14.312959289550781
epoch: 83 training_loss -14.352957000732422 test_loss: -14.432437133789062
epoch: 84 training_loss -14.373346309661866 test_loss: -14.390910339355468
epoch: 85 training_loss -14.420642652511596 test_loss: -14.391128540039062
epoch: 86 training_loss -14.4027969455719 test_loss: -14.397775268554687
epoch: 87 training_loss -14.50175901412964 test_loss: -14.452328491210938
epoch: 88 training_loss -14.500710306167603 test_loss: -14.458615112304688
epoch: 89 training_loss -14.476304092407226 test_loss: -14.47716827392578
epoch: 90 training_loss -14.565663232803345 test_loss: -14.464625549316406
epoch: 91 training_loss -14.523287706375122 test_loss: -14.70598907470703
epoch: 92 training_loss -14.615866537094115 test_loss: -14.71319122314453
epoch: 93 training_loss -14.602297506332398 test_loss: -14.700901794433594
epoch: 94 training_loss -14.626912164688111 test_loss: -14.58858642578125
epoch: 95 training_loss -14.671220874786377 test_loss: -14.636473083496094
epoch: 96 training_loss -14.626250228881837 test_loss: -14.650067138671876
epoch: 97 training_loss -14.741855707168579 test_loss: -14.55948486328125
epoch: 98 training_loss -14.707802152633667 test_loss: -14.700398254394532
epoch: 99 training_loss -14.644177141189575 test_loss: -14.74488525390625
epoch: 100 training_loss -14.734034013748168 test_loss: -14.735989379882813
epoch: 101 training_loss -14.74877311706543 test_loss: -14.722908020019531
epoch: 102 training_loss -14.7759951877594 test_loss: -14.714277648925782
epoch: 103 training_loss -14.814943447113038 test_loss: -14.743971252441407
epoch: 104 training_loss -14.860495948791504 test_loss: -14.707365417480469
epoch: 105 training_loss -14.897793712615966 test_loss: -14.638163757324218
epoch: 106 training_loss -14.816969747543334 test_loss: -14.811286926269531
epoch: 107 training_loss -14.85008783340454 test_loss: -14.87747802734375
epoch: 108 training_loss -14.873535223007202 test_loss: -14.916567993164062
epoch: 109 training_loss -14.9703178691864 test_loss: -14.947773742675782
epoch: 110 training_loss -14.946184663772582 test_loss: -14.900672912597656
epoch: 111 training_loss -14.916099042892457 test_loss: -14.902703857421875
epoch: 112 training_loss -14.921311302185059 test_loss: -15.015248107910157
epoch: 113 training_loss -14.913945932388305 test_loss: -14.849844360351563
epoch: 114 training_loss -14.89976273536682 test_loss: -15.042033386230468
epoch: 115 training_loss -15.003362188339233 test_loss: -15.097889709472657
epoch: 116 training_loss -15.065794563293457 test_loss: -14.952961730957032
epoch: 117 training_loss -14.996531734466553 test_loss: -15.040579223632813
epoch: 118 training_loss -15.022183103561401 test_loss: -15.142637634277344
epoch: 119 training_loss -15.003652238845826 test_loss: -15.143096923828125
epoch: 120 training_loss -15.020536966323853 test_loss: -15.001068115234375
epoch: 121 training_loss -15.03230266571045 test_loss: -14.916676330566407
epoch: 122 training_loss -15.061824846267701 test_loss: -15.055508422851563
epoch: 123 training_loss -15.13902618408203 test_loss: -15.216116333007813
epoch: 124 training_loss -15.14742018699646 test_loss: -15.152890014648438
epoch: 125 training_loss -15.180404329299927 test_loss: -15.176300048828125
epoch: 126 training_loss -15.207001800537109 test_loss: -15.15514678955078
epoch: 127 training_loss -15.19192445755005 test_loss: -15.129302978515625
epoch: 128 training_loss -15.218206396102905 test_loss: -15.075735473632813
epoch: 129 training_loss -15.193032026290894 test_loss: -15.100776672363281
epoch: 130 training_loss -15.254273796081543 test_loss: -15.214459228515626
epoch: 131 training_loss -15.295415525436402 test_loss: -15.253147888183594
epoch: 132 training_loss -15.28401997566223 test_loss: -15.27978057861328
epoch: 133 training_loss -15.26975947380066 test_loss: -15.286671447753907
epoch: 134 training_loss -15.264511365890503 test_loss: -15.275039672851562
epoch: 135 training_loss -15.28249376296997 test_loss: -15.200686645507812
epoch: 136 training_loss -15.278467626571656 test_loss: -15.335884094238281
epoch: 137 training_loss -15.315327739715576 test_loss: -15.395074462890625
epoch: 138 training_loss -15.28641872406006 test_loss: -15.272573852539063
epoch: 139 training_loss -15.34129737854004 test_loss: -15.418357849121094
epoch: 140 training_loss -15.353337535858154 test_loss: -15.298060607910156
epoch: 141 training_loss -15.376110191345214 test_loss: -15.34814453125
epoch: 142 training_loss -15.429220390319824 test_loss: -15.388053894042969
epoch: 143 training_loss -15.38075306892395 test_loss: -15.255494689941406
epoch: 144 training_loss -15.367105894088745 test_loss: -15.411210632324218
epoch: 145 training_loss -15.403395309448243 test_loss: -15.573295593261719
epoch: 146 training_loss -15.438863048553467 test_loss: -15.516957092285157
epoch: 147 training_loss -15.432231884002686 test_loss: -15.404917907714843
epoch: 148 training_loss -15.433784379959107 test_loss: -15.552188110351562
epoch: 149 training_loss -15.430641765594482 test_loss: -15.396224975585938
2071.0486896567004
episode: 0 training return: tensor(-5071.5732, device='cuda:0')
episode: 1 training return: tensor(-4267.1753, device='cuda:0')
episode: 2 training return: tensor(-4856.7607, device='cuda:0')
episode: 3 training return: tensor(-5472.7485, device='cuda:0')
epoch: 1 test_true_pfm: 347.9657117221631
episode: 4 training return: tensor(-3576.5852, device='cuda:0')
episode: 5 training return: tensor(-5131.7539, device='cuda:0')
episode: 6 training return: tensor(-5764.9019, device='cuda:0')
episode: 7 training return: tensor(-4889.8115, device='cuda:0')
epoch: 2 test_true_pfm: 199.01215814924674
episode: 8 training return: tensor(-5323.0503, device='cuda:0')
episode: 9 training return: tensor(-4991.1367, device='cuda:0')
episode: 10 training return: tensor(-2387.8108, device='cuda:0')
episode: 11 training return: tensor(-2201.4041, device='cuda:0')
epoch: 3 test_true_pfm: 141.91343871468698
episode: 12 training return: tensor(-2152.4922, device='cuda:0')
episode: 13 training return: tensor(-2024.7772, device='cuda:0')
episode: 14 training return: tensor(-2011.2050, device='cuda:0')
episode: 15 training return: tensor(-1971.3228, device='cuda:0')
epoch: 4 test_true_pfm: 92.75416508490277
episode: 16 training return: tensor(-1957.1509, device='cuda:0')
episode: 17 training return: tensor(-1920.6547, device='cuda:0')
episode: 18 training return: tensor(-1798.2507, device='cuda:0')
episode: 19 training return: tensor(-1712.8405, device='cuda:0')
epoch: 5 test_true_pfm: -205.82937895140424
episode: 20 training return: tensor(-1741.8875, device='cuda:0')
episode: 21 training return: tensor(-1702.6865, device='cuda:0')
episode: 22 training return: tensor(-1699.8158, device='cuda:0')
episode: 23 training return: tensor(-1639.5394, device='cuda:0')
epoch: 6 test_true_pfm: -195.144920097025
episode: 24 training return: tensor(-1620.5623, device='cuda:0')
episode: 25 training return: tensor(-1583.8645, device='cuda:0')
episode: 26 training return: tensor(-1552.1962, device='cuda:0')
episode: 27 training return: tensor(-1566.4364, device='cuda:0')
epoch: 7 test_true_pfm: -181.72913723755718
episode: 28 training return: tensor(-1563.9563, device='cuda:0')
episode: 29 training return: tensor(-1536.7593, device='cuda:0')
episode: 30 training return: tensor(-1527.8370, device='cuda:0')
episode: 31 training return: tensor(-1571.4579, device='cuda:0')
epoch: 8 test_true_pfm: -188.1189194434659
episode: 32 training return: tensor(-1554.8693, device='cuda:0')
episode: 33 training return: tensor(-1680.4946, device='cuda:0')
episode: 34 training return: tensor(-1783.9392, device='cuda:0')
episode: 35 training return: tensor(-2297.8582, device='cuda:0')
epoch: 9 test_true_pfm: -143.00294666662748
episode: 36 training return: tensor(-2402.9165, device='cuda:0')
episode: 37 training return: tensor(-1725.3280, device='cuda:0')
episode: 38 training return: tensor(-1933.2059, device='cuda:0')
episode: 39 training return: tensor(-1596.4482, device='cuda:0')
epoch: 10 test_true_pfm: -134.56372952358228
episode: 40 training return: tensor(-1833.1752, device='cuda:0')
episode: 41 training return: tensor(-1571.4777, device='cuda:0')
episode: 42 training return: tensor(-1568.6608, device='cuda:0')
episode: 43 training return: tensor(-1544.1133, device='cuda:0')
epoch: 11 test_true_pfm: -130.19237428765405
episode: 44 training return: tensor(-1555.1395, device='cuda:0')
episode: 45 training return: tensor(-1560.0110, device='cuda:0')
episode: 46 training return: tensor(-1553.4674, device='cuda:0')
episode: 47 training return: tensor(-1524.0363, device='cuda:0')
epoch: 12 test_true_pfm: -130.8968118911673
episode: 48 training return: tensor(-1531.9424, device='cuda:0')
episode: 49 training return: tensor(-1530.6963, device='cuda:0')
episode: 50 training return: tensor(-1570.0623, device='cuda:0')
episode: 51 training return: tensor(-1525.3207, device='cuda:0')
epoch: 13 test_true_pfm: -139.63357279538735
episode: 52 training return: tensor(-1533.8019, device='cuda:0')
episode: 53 training return: tensor(-1533.0361, device='cuda:0')
episode: 54 training return: tensor(-1482.1743, device='cuda:0')
episode: 55 training return: tensor(-1486.5853, device='cuda:0')
epoch: 14 test_true_pfm: -131.81219936394845
episode: 56 training return: tensor(-1493.4025, device='cuda:0')
episode: 57 training return: tensor(-1522.7969, device='cuda:0')
episode: 58 training return: tensor(-1503.9763, device='cuda:0')
episode: 59 training return: tensor(-1527.2251, device='cuda:0')
epoch: 15 test_true_pfm: -125.9592729098916
episode: 60 training return: tensor(-1485.2300, device='cuda:0')
episode: 61 training return: tensor(-1491.1833, device='cuda:0')
episode: 62 training return: tensor(-1515.9469, device='cuda:0')
episode: 63 training return: tensor(-1519.8640, device='cuda:0')
epoch: 16 test_true_pfm: -127.82669507886983
episode: 64 training return: tensor(-1494.2745, device='cuda:0')
episode: 65 training return: tensor(-1521.0824, device='cuda:0')
episode: 66 training return: tensor(-1469.5348, device='cuda:0')
episode: 67 training return: tensor(-1515.5670, device='cuda:0')
epoch: 17 test_true_pfm: -118.55173870982954
episode: 68 training return: tensor(-1502.1417, device='cuda:0')
episode: 69 training return: tensor(-1526.5482, device='cuda:0')
episode: 70 training return: tensor(-1514.6890, device='cuda:0')
episode: 71 training return: tensor(-1502.8136, device='cuda:0')
epoch: 18 test_true_pfm: -115.24591310479467
episode: 72 training return: tensor(-1497.5901, device='cuda:0')
episode: 73 training return: tensor(-1535.3307, device='cuda:0')
episode: 74 training return: tensor(-1472.4329, device='cuda:0')
episode: 75 training return: tensor(-1512.5723, device='cuda:0')
epoch: 19 test_true_pfm: -126.11230038191779
episode: 76 training return: tensor(-1522.8969, device='cuda:0')
episode: 77 training return: tensor(-1521.4915, device='cuda:0')
episode: 78 training return: tensor(-1493.8040, device='cuda:0')
episode: 79 training return: tensor(-1503.1093, device='cuda:0')
epoch: 20 test_true_pfm: -133.6666794649001
episode: 80 training return: tensor(-1522.5986, device='cuda:0')
episode: 81 training return: tensor(-1514.8645, device='cuda:0')
episode: 82 training return: tensor(-1520.8163, device='cuda:0')
episode: 83 training return: tensor(-1539.8693, device='cuda:0')
epoch: 21 test_true_pfm: -121.6862971478389
episode: 84 training return: tensor(-1550.6412, device='cuda:0')
episode: 85 training return: tensor(-1523.4880, device='cuda:0')
episode: 86 training return: tensor(-1514.8303, device='cuda:0')
episode: 87 training return: tensor(-1539.4492, device='cuda:0')
epoch: 22 test_true_pfm: -107.67075896013255
episode: 88 training return: tensor(-1521.4950, device='cuda:0')
episode: 89 training return: tensor(-1522.4309, device='cuda:0')
episode: 90 training return: tensor(-1530.5031, device='cuda:0')
episode: 91 training return: tensor(-1525.0251, device='cuda:0')
epoch: 23 test_true_pfm: -124.50427019571994
episode: 92 training return: tensor(-1507.9080, device='cuda:0')
episode: 93 training return: tensor(-1508.1747, device='cuda:0')
episode: 94 training return: tensor(-1542.5481, device='cuda:0')
episode: 95 training return: tensor(-1549.4958, device='cuda:0')
epoch: 24 test_true_pfm: -127.46273634565144
episode: 96 training return: tensor(-1536.4614, device='cuda:0')
episode: 97 training return: tensor(-1506.3800, device='cuda:0')
episode: 98 training return: tensor(-1565.9229, device='cuda:0')
episode: 99 training return: tensor(-1524.5448, device='cuda:0')
epoch: 25 test_true_pfm: -118.5335341332318
episode: 100 training return: tensor(-1531.8601, device='cuda:0')
episode: 101 training return: tensor(-1510.3279, device='cuda:0')
episode: 102 training return: tensor(-1505.7952, device='cuda:0')
episode: 103 training return: tensor(-1571.5408, device='cuda:0')
epoch: 26 test_true_pfm: -128.8893908015836
episode: 104 training return: tensor(-1533.2560, device='cuda:0')
episode: 105 training return: tensor(-1511.7378, device='cuda:0')
episode: 106 training return: tensor(-1574.5669, device='cuda:0')
episode: 107 training return: tensor(-1509.9484, device='cuda:0')
epoch: 27 test_true_pfm: -131.9430112287889
episode: 108 training return: tensor(-1531.1216, device='cuda:0')
episode: 109 training return: tensor(-1541.2738, device='cuda:0')
episode: 110 training return: tensor(-1506.6984, device='cuda:0')
episode: 111 training return: tensor(-1564.9664, device='cuda:0')
epoch: 28 test_true_pfm: -139.27295531975253
episode: 112 training return: tensor(-1552.1753, device='cuda:0')
episode: 113 training return: tensor(-1520.7891, device='cuda:0')
episode: 114 training return: tensor(-1494.8855, device='cuda:0')
episode: 115 training return: tensor(-1534.9816, device='cuda:0')
epoch: 29 test_true_pfm: -116.47366419936282
episode: 116 training return: tensor(-1534.6591, device='cuda:0')
episode: 117 training return: tensor(-1530.8411, device='cuda:0')
episode: 118 training return: tensor(-1557.3817, device='cuda:0')
episode: 119 training return: tensor(-1547.2399, device='cuda:0')
epoch: 30 test_true_pfm: -107.1249998267652
episode: 120 training return: tensor(-1535.5800, device='cuda:0')
episode: 121 training return: tensor(-1519.8020, device='cuda:0')
episode: 122 training return: tensor(-1561.0197, device='cuda:0')
episode: 123 training return: tensor(-1534.6763, device='cuda:0')
epoch: 31 test_true_pfm: -106.79037665877821
episode: 124 training return: tensor(-1517.1591, device='cuda:0')
episode: 125 training return: tensor(-1534.4215, device='cuda:0')
episode: 126 training return: tensor(-1544.9678, device='cuda:0')
episode: 127 training return: tensor(-1513.1244, device='cuda:0')
epoch: 32 test_true_pfm: -122.27699119200622
episode: 128 training return: tensor(-1522.5980, device='cuda:0')
episode: 129 training return: tensor(-1509.7581, device='cuda:0')
episode: 130 training return: tensor(-1536.3508, device='cuda:0')
episode: 131 training return: tensor(-1543.5096, device='cuda:0')
epoch: 33 test_true_pfm: -110.2298307203722
episode: 132 training return: tensor(-1514.6880, device='cuda:0')
episode: 133 training return: tensor(-1542.5745, device='cuda:0')
episode: 134 training return: tensor(-1519.2698, device='cuda:0')
episode: 135 training return: tensor(-1505.9680, device='cuda:0')
epoch: 34 test_true_pfm: -111.0992912014533
episode: 136 training return: tensor(-1541.0428, device='cuda:0')
episode: 137 training return: tensor(-1531.8292, device='cuda:0')
episode: 138 training return: tensor(-1528.2089, device='cuda:0')
episode: 139 training return: tensor(-1562.6078, device='cuda:0')
epoch: 35 test_true_pfm: -104.93770804688931
episode: 140 training return: tensor(-1523.0905, device='cuda:0')
episode: 141 training return: tensor(-1557.7491, device='cuda:0')
episode: 142 training return: tensor(-1531.5121, device='cuda:0')
episode: 143 training return: tensor(-1537.2137, device='cuda:0')
epoch: 36 test_true_pfm: -105.67036361601288
episode: 144 training return: tensor(-1536.7238, device='cuda:0')
episode: 145 training return: tensor(-1523.1375, device='cuda:0')
episode: 146 training return: tensor(-1516.0510, device='cuda:0')
episode: 147 training return: tensor(-1541.7683, device='cuda:0')
epoch: 37 test_true_pfm: -118.34383531043407
episode: 148 training return: tensor(-1505.2003, device='cuda:0')
episode: 149 training return: tensor(-1510.9758, device='cuda:0')
episode: 150 training return: tensor(-1554.9570, device='cuda:0')
episode: 151 training return: tensor(-1533.8756, device='cuda:0')
epoch: 38 test_true_pfm: -100.95673002782087
episode: 152 training return: tensor(-1554.0798, device='cuda:0')
episode: 153 training return: tensor(-1529.6887, device='cuda:0')
episode: 154 training return: tensor(-1516.8253, device='cuda:0')
episode: 155 training return: tensor(-1523.7539, device='cuda:0')
epoch: 39 test_true_pfm: -110.27069914818998
episode: 156 training return: tensor(-1547.2687, device='cuda:0')
episode: 157 training return: tensor(-1532.4320, device='cuda:0')
episode: 158 training return: tensor(-1522.5560, device='cuda:0')
episode: 159 training return: tensor(-1544.5103, device='cuda:0')
epoch: 40 test_true_pfm: -106.2052529446222
episode: 160 training return: tensor(-1528.2296, device='cuda:0')
episode: 161 training return: tensor(-1526.1605, device='cuda:0')
episode: 162 training return: tensor(-1553.7137, device='cuda:0')
episode: 163 training return: tensor(-1535.2029, device='cuda:0')
epoch: 41 test_true_pfm: -107.8052854837752
episode: 164 training return: tensor(-1558.4719, device='cuda:0')
episode: 165 training return: tensor(-1528.3636, device='cuda:0')
episode: 166 training return: tensor(-1515.4474, device='cuda:0')
episode: 167 training return: tensor(-1534.7744, device='cuda:0')
epoch: 42 test_true_pfm: -84.93748119356106
episode: 168 training return: tensor(-1517.4111, device='cuda:0')
episode: 169 training return: tensor(-1514.4923, device='cuda:0')
episode: 170 training return: tensor(-1536.6501, device='cuda:0')
episode: 171 training return: tensor(-1533.5168, device='cuda:0')
epoch: 43 test_true_pfm: -117.24456156891345
episode: 172 training return: tensor(-1546.5168, device='cuda:0')
episode: 173 training return: tensor(-1534.1613, device='cuda:0')
episode: 174 training return: tensor(-1520.5684, device='cuda:0')
episode: 175 training return: tensor(-1514.8368, device='cuda:0')
epoch: 44 test_true_pfm: -90.50970570779208
episode: 176 training return: tensor(-1544.0861, device='cuda:0')
episode: 177 training return: tensor(-1544.9924, device='cuda:0')
episode: 178 training return: tensor(-1548.7860, device='cuda:0')
episode: 179 training return: tensor(-1524.5023, device='cuda:0')
epoch: 45 test_true_pfm: -107.47276495839009
episode: 180 training return: tensor(-1517.2526, device='cuda:0')
episode: 181 training return: tensor(-1528.5398, device='cuda:0')
episode: 182 training return: tensor(-1515.9722, device='cuda:0')
episode: 183 training return: tensor(-1529.1752, device='cuda:0')
epoch: 46 test_true_pfm: -89.90730734990284
episode: 184 training return: tensor(-1519.3751, device='cuda:0')
episode: 185 training return: tensor(-1558.1689, device='cuda:0')
episode: 186 training return: tensor(-1502.0750, device='cuda:0')
episode: 187 training return: tensor(-1519.2627, device='cuda:0')
epoch: 47 test_true_pfm: -103.43742785400423
episode: 188 training return: tensor(-1535.2876, device='cuda:0')
episode: 189 training return: tensor(-1547.9323, device='cuda:0')
episode: 190 training return: tensor(-1507.5542, device='cuda:0')
episode: 191 training return: tensor(-1505.2694, device='cuda:0')
epoch: 48 test_true_pfm: -104.19749267119535
episode: 192 training return: tensor(-1515.9091, device='cuda:0')
episode: 193 training return: tensor(-1506.3876, device='cuda:0')
episode: 194 training return: tensor(-1510.2052, device='cuda:0')
episode: 195 training return: tensor(-1528.4644, device='cuda:0')
epoch: 49 test_true_pfm: -106.56579923391483
episode: 196 training return: tensor(-1541.6846, device='cuda:0')
episode: 197 training return: tensor(-1545.1903, device='cuda:0')
episode: 198 training return: tensor(-1562.6151, device='cuda:0')
episode: 199 training return: tensor(-1540.0284, device='cuda:0')
epoch: 50 test_true_pfm: -99.9749117284685
episode: 200 training return: tensor(-1520.5560, device='cuda:0')
episode: 201 training return: tensor(-1546.2018, device='cuda:0')
episode: 202 training return: tensor(-1520.8293, device='cuda:0')
episode: 203 training return: tensor(-1508.5681, device='cuda:0')
epoch: 51 test_true_pfm: -99.32774060513474
episode: 204 training return: tensor(-1543.7452, device='cuda:0')
episode: 205 training return: tensor(-1522.2983, device='cuda:0')
episode: 206 training return: tensor(-1517.9503, device='cuda:0')
episode: 207 training return: tensor(-1501.5483, device='cuda:0')
epoch: 52 test_true_pfm: -92.05976315967364
episode: 208 training return: tensor(-1502.4197, device='cuda:0')
episode: 209 training return: tensor(-1546.3163, device='cuda:0')
episode: 210 training return: tensor(-1537.8571, device='cuda:0')
episode: 211 training return: tensor(-1543.8835, device='cuda:0')
epoch: 53 test_true_pfm: -88.1673721393397
episode: 212 training return: tensor(-1528.9286, device='cuda:0')
episode: 213 training return: tensor(-1551.4858, device='cuda:0')
episode: 214 training return: tensor(-1534.9762, device='cuda:0')
episode: 215 training return: tensor(-1530.6765, device='cuda:0')
epoch: 54 test_true_pfm: -90.19923115103938
episode: 216 training return: tensor(-1492.4252, device='cuda:0')
episode: 217 training return: tensor(-1504.8359, device='cuda:0')
episode: 218 training return: tensor(-1530.8319, device='cuda:0')
episode: 219 training return: tensor(-1530.4083, device='cuda:0')
epoch: 55 test_true_pfm: -94.50101051691622
episode: 220 training return: tensor(-1500.5829, device='cuda:0')
episode: 221 training return: tensor(-1503.7443, device='cuda:0')
episode: 222 training return: tensor(-1537.5597, device='cuda:0')
episode: 223 training return: tensor(-1560.4446, device='cuda:0')
epoch: 56 test_true_pfm: -96.63175198949165
episode: 224 training return: tensor(-1504.3168, device='cuda:0')
episode: 225 training return: tensor(-1552.2272, device='cuda:0')
episode: 226 training return: tensor(-1560.7776, device='cuda:0')
episode: 227 training return: tensor(-1542.5775, device='cuda:0')
epoch: 57 test_true_pfm: -114.6478953828208
episode: 228 training return: tensor(-1503.3031, device='cuda:0')
episode: 229 training return: tensor(-1536.2316, device='cuda:0')
episode: 230 training return: tensor(-1542.6646, device='cuda:0')
episode: 231 training return: tensor(-1515.4767, device='cuda:0')
epoch: 58 test_true_pfm: -102.13504352608743
episode: 232 training return: tensor(-1521.3750, device='cuda:0')
episode: 233 training return: tensor(-1544.0154, device='cuda:0')
episode: 234 training return: tensor(-1521.6495, device='cuda:0')
episode: 235 training return: tensor(-1522.2119, device='cuda:0')
epoch: 59 test_true_pfm: -104.32264883327109
episode: 236 training return: tensor(-1535.9639, device='cuda:0')
episode: 237 training return: tensor(-1580.8567, device='cuda:0')
episode: 238 training return: tensor(-1537.3256, device='cuda:0')
episode: 239 training return: tensor(-1514.6049, device='cuda:0')
epoch: 60 test_true_pfm: -101.921647361056
episode: 240 training return: tensor(-1526.5640, device='cuda:0')
episode: 241 training return: tensor(-1577.8873, device='cuda:0')
episode: 242 training return: tensor(-1562.4803, device='cuda:0')
episode: 243 training return: tensor(-1513.7065, device='cuda:0')
epoch: 61 test_true_pfm: -100.85773730460647
episode: 244 training return: tensor(-1562.6129, device='cuda:0')
episode: 245 training return: tensor(-1555.0396, device='cuda:0')
episode: 246 training return: tensor(-1530.6514, device='cuda:0')
episode: 247 training return: tensor(-1510.7939, device='cuda:0')
epoch: 62 test_true_pfm: -86.41463729323561
episode: 248 training return: tensor(-1539.8134, device='cuda:0')
episode: 249 training return: tensor(-1531.1079, device='cuda:0')
episode: 250 training return: tensor(-1536.2842, device='cuda:0')
episode: 251 training return: tensor(-1540.0894, device='cuda:0')
epoch: 63 test_true_pfm: -103.28359875877506
episode: 252 training return: tensor(-1542.5566, device='cuda:0')
episode: 253 training return: tensor(-1498.7399, device='cuda:0')
episode: 254 training return: tensor(-1521.3575, device='cuda:0')
episode: 255 training return: tensor(-1519.8804, device='cuda:0')
epoch: 64 test_true_pfm: -94.40678562653034
episode: 256 training return: tensor(-1540.9863, device='cuda:0')
episode: 257 training return: tensor(-1535.3875, device='cuda:0')
episode: 258 training return: tensor(-1531.9117, device='cuda:0')
episode: 259 training return: tensor(-1529.0396, device='cuda:0')
epoch: 65 test_true_pfm: -92.81320832971987
episode: 260 training return: tensor(-1501.3982, device='cuda:0')
episode: 261 training return: tensor(-1517.0762, device='cuda:0')
episode: 262 training return: tensor(-1527.8051, device='cuda:0')
episode: 263 training return: tensor(-1518.7174, device='cuda:0')
epoch: 66 test_true_pfm: -95.62679459605086
episode: 264 training return: tensor(-1522.6840, device='cuda:0')
episode: 265 training return: tensor(-1519.0681, device='cuda:0')
episode: 266 training return: tensor(-1550.2809, device='cuda:0')
episode: 267 training return: tensor(-1519.2274, device='cuda:0')
epoch: 67 test_true_pfm: -96.75482765606404
episode: 268 training return: tensor(-1539.5071, device='cuda:0')
episode: 269 training return: tensor(-1533.5748, device='cuda:0')
episode: 270 training return: tensor(-1548.6333, device='cuda:0')
episode: 271 training return: tensor(-1540.8728, device='cuda:0')
epoch: 68 test_true_pfm: -96.78566069045418
episode: 272 training return: tensor(-1498.6124, device='cuda:0')
episode: 273 training return: tensor(-1525.6881, device='cuda:0')
episode: 274 training return: tensor(-1530.9821, device='cuda:0')
episode: 275 training return: tensor(-1546.5312, device='cuda:0')
epoch: 69 test_true_pfm: -87.3635589425027
episode: 276 training return: tensor(-1526.0801, device='cuda:0')
episode: 277 training return: tensor(-1535.6938, device='cuda:0')
episode: 278 training return: tensor(-1533.2467, device='cuda:0')
episode: 279 training return: tensor(-1521.6287, device='cuda:0')
epoch: 70 test_true_pfm: -84.21937279308163
episode: 280 training return: tensor(-1522.5780, device='cuda:0')
episode: 281 training return: tensor(-1549.5902, device='cuda:0')
episode: 282 training return: tensor(-1521.0308, device='cuda:0')
episode: 283 training return: tensor(-1538.9503, device='cuda:0')
epoch: 71 test_true_pfm: -97.7433694702176
episode: 284 training return: tensor(-1517.7209, device='cuda:0')
episode: 285 training return: tensor(-1533.3324, device='cuda:0')
episode: 286 training return: tensor(-1531.1079, device='cuda:0')
episode: 287 training return: tensor(-1535.8436, device='cuda:0')
epoch: 72 test_true_pfm: -101.0407836087969
episode: 288 training return: tensor(-1539.2113, device='cuda:0')
episode: 289 training return: tensor(-1505.2203, device='cuda:0')
episode: 290 training return: tensor(-1498.6422, device='cuda:0')
episode: 291 training return: tensor(-1540.7411, device='cuda:0')
epoch: 73 test_true_pfm: -88.58698312317654
episode: 292 training return: tensor(-1499.6118, device='cuda:0')
episode: 293 training return: tensor(-1501.6289, device='cuda:0')
episode: 294 training return: tensor(-1483.9114, device='cuda:0')
episode: 295 training return: tensor(-1511.1707, device='cuda:0')
epoch: 74 test_true_pfm: -82.88008988888122
episode: 296 training return: tensor(-1533.5476, device='cuda:0')
episode: 297 training return: tensor(-1515.6180, device='cuda:0')
episode: 298 training return: tensor(-1516.7408, device='cuda:0')
episode: 299 training return: tensor(-1515.4816, device='cuda:0')
epoch: 75 test_true_pfm: -100.53942863628255
episode: 300 training return: tensor(-1545.6881, device='cuda:0')
episode: 301 training return: tensor(-1520.4429, device='cuda:0')
episode: 302 training return: tensor(-1518.4750, device='cuda:0')
episode: 303 training return: tensor(-1504.0607, device='cuda:0')
epoch: 76 test_true_pfm: -94.60217615440838
episode: 304 training return: tensor(-1506.6340, device='cuda:0')
episode: 305 training return: tensor(-1508.2593, device='cuda:0')
episode: 306 training return: tensor(-1536.5216, device='cuda:0')
episode: 307 training return: tensor(-1533.6392, device='cuda:0')
epoch: 77 test_true_pfm: -93.51265529398852
episode: 308 training return: tensor(-1513.3208, device='cuda:0')
episode: 309 training return: tensor(-1532.5338, device='cuda:0')
episode: 310 training return: tensor(-1578.6666, device='cuda:0')
episode: 311 training return: tensor(-1512.8462, device='cuda:0')
epoch: 78 test_true_pfm: -91.28172147447765
episode: 312 training return: tensor(-1563.6901, device='cuda:0')
episode: 313 training return: tensor(-1524.0225, device='cuda:0')
episode: 314 training return: tensor(-1540.5454, device='cuda:0')
episode: 315 training return: tensor(-1520.8990, device='cuda:0')
epoch: 79 test_true_pfm: -88.48822999977557
episode: 316 training return: tensor(-1519.5868, device='cuda:0')
episode: 317 training return: tensor(-1529.1293, device='cuda:0')
episode: 318 training return: tensor(-1512.6312, device='cuda:0')
episode: 319 training return: tensor(-1525.4158, device='cuda:0')
epoch: 80 test_true_pfm: -95.87586175125695
episode: 320 training return: tensor(-1520.2185, device='cuda:0')
episode: 321 training return: tensor(-1499.6323, device='cuda:0')
episode: 322 training return: tensor(-1497.6641, device='cuda:0')
episode: 323 training return: tensor(-1542.0276, device='cuda:0')
epoch: 81 test_true_pfm: -87.56483132148396
episode: 324 training return: tensor(-1560.9772, device='cuda:0')
episode: 325 training return: tensor(-1529.4121, device='cuda:0')
episode: 326 training return: tensor(-1561.3667, device='cuda:0')
episode: 327 training return: tensor(-1513.3314, device='cuda:0')
epoch: 82 test_true_pfm: -89.01660667474448
episode: 328 training return: tensor(-1515.8409, device='cuda:0')
episode: 329 training return: tensor(-1496.2717, device='cuda:0')
episode: 330 training return: tensor(-1492.8861, device='cuda:0')
episode: 331 training return: tensor(-1532.2029, device='cuda:0')
epoch: 83 test_true_pfm: -82.7979754575566
episode: 332 training return: tensor(-1565.1792, device='cuda:0')
episode: 333 training return: tensor(-1522.6927, device='cuda:0')
episode: 334 training return: tensor(-1504.5435, device='cuda:0')
episode: 335 training return: tensor(-1497.1692, device='cuda:0')
epoch: 84 test_true_pfm: -97.59367036667156
episode: 336 training return: tensor(-1520.3607, device='cuda:0')
episode: 337 training return: tensor(-1525.2821, device='cuda:0')
episode: 338 training return: tensor(-1514.1249, device='cuda:0')
episode: 339 training return: tensor(-1538.0414, device='cuda:0')
epoch: 85 test_true_pfm: -81.78206689691868
episode: 340 training return: tensor(-1530.6884, device='cuda:0')
episode: 341 training return: tensor(-1543.5181, device='cuda:0')
episode: 342 training return: tensor(-1539.7572, device='cuda:0')
episode: 343 training return: tensor(-1517.2227, device='cuda:0')
epoch: 86 test_true_pfm: -94.24831578237644
episode: 344 training return: tensor(-1523.7019, device='cuda:0')
episode: 345 training return: tensor(-1532.7842, device='cuda:0')
episode: 346 training return: tensor(-1532.6752, device='cuda:0')
episode: 347 training return: tensor(-1540.1794, device='cuda:0')
epoch: 87 test_true_pfm: -90.18094204725423
episode: 348 training return: tensor(-1506.0927, device='cuda:0')
episode: 349 training return: tensor(-1512.9866, device='cuda:0')
episode: 350 training return: tensor(-1509.5911, device='cuda:0')
episode: 351 training return: tensor(-1502.8680, device='cuda:0')
epoch: 88 test_true_pfm: -89.9419596828428
episode: 352 training return: tensor(-1495.9227, device='cuda:0')
episode: 353 training return: tensor(-1523.0331, device='cuda:0')
episode: 354 training return: tensor(-1519.4816, device='cuda:0')
episode: 355 training return: tensor(-1498.2997, device='cuda:0')
epoch: 89 test_true_pfm: -92.41866737954768
episode: 356 training return: tensor(-1504.7633, device='cuda:0')
episode: 357 training return: tensor(-1498.1075, device='cuda:0')
episode: 358 training return: tensor(-1525.4816, device='cuda:0')
episode: 359 training return: tensor(-1484.5239, device='cuda:0')
epoch: 90 test_true_pfm: -88.75566549160898
episode: 360 training return: tensor(-1536.6082, device='cuda:0')
episode: 361 training return: tensor(-1502.0006, device='cuda:0')
episode: 362 training return: tensor(-1500.3391, device='cuda:0')
episode: 363 training return: tensor(-1498.4432, device='cuda:0')
epoch: 91 test_true_pfm: -93.52270648214676
episode: 364 training return: tensor(-1516.6501, device='cuda:0')
episode: 365 training return: tensor(-1533.4985, device='cuda:0')
episode: 366 training return: tensor(-1519.4973, device='cuda:0')
episode: 367 training return: tensor(-1534.2811, device='cuda:0')
epoch: 92 test_true_pfm: -77.45736480687071
episode: 368 training return: tensor(-1537.3906, device='cuda:0')
episode: 369 training return: tensor(-1523.4335, device='cuda:0')
episode: 370 training return: tensor(-1528.8108, device='cuda:0')
episode: 371 training return: tensor(-1517.2415, device='cuda:0')
epoch: 93 test_true_pfm: -83.51812584876119
episode: 372 training return: tensor(-1509.7745, device='cuda:0')
episode: 373 training return: tensor(-1542.3796, device='cuda:0')
episode: 374 training return: tensor(-1510.7373, device='cuda:0')
episode: 375 training return: tensor(-1521.8141, device='cuda:0')
epoch: 94 test_true_pfm: -93.37333325682312
episode: 376 training return: tensor(-1538.0854, device='cuda:0')
episode: 377 training return: tensor(-1513.7557, device='cuda:0')
episode: 378 training return: tensor(-1496.9554, device='cuda:0')
episode: 379 training return: tensor(-1513.0034, device='cuda:0')
epoch: 95 test_true_pfm: -84.62668677164693
episode: 380 training return: tensor(-1512.0800, device='cuda:0')
episode: 381 training return: tensor(-1544.7502, device='cuda:0')
episode: 382 training return: tensor(-1518.3308, device='cuda:0')
episode: 383 training return: tensor(-1545.6331, device='cuda:0')
epoch: 96 test_true_pfm: -92.06547987424085
episode: 384 training return: tensor(-1524.3298, device='cuda:0')
episode: 385 training return: tensor(-1508.8528, device='cuda:0')
episode: 386 training return: tensor(-1540.0653, device='cuda:0')
episode: 387 training return: tensor(-1513.2467, device='cuda:0')
epoch: 97 test_true_pfm: -82.7889541325094
episode: 388 training return: tensor(-1568.5486, device='cuda:0')
episode: 389 training return: tensor(-1494.5939, device='cuda:0')
episode: 390 training return: tensor(-1516.3528, device='cuda:0')
episode: 391 training return: tensor(-1513.8612, device='cuda:0')
epoch: 98 test_true_pfm: -90.77071839391247
episode: 392 training return: tensor(-1508.2214, device='cuda:0')
episode: 393 training return: tensor(-1507.1238, device='cuda:0')
episode: 394 training return: tensor(-1540.7561, device='cuda:0')
episode: 395 training return: tensor(-1535.1285, device='cuda:0')
epoch: 99 test_true_pfm: -85.41004476310303
episode: 396 training return: tensor(-1486.9633, device='cuda:0')
episode: 397 training return: tensor(-1494.0887, device='cuda:0')
episode: 398 training return: tensor(-1524.2040, device='cuda:0')
episode: 399 training return: tensor(-1545.5845, device='cuda:0')
epoch: 100 test_true_pfm: -77.37119826909333
episode: 400 training return: tensor(-1535.2672, device='cuda:0')
episode: 401 training return: tensor(-1523.0515, device='cuda:0')
episode: 402 training return: tensor(-1562.4194, device='cuda:0')
episode: 403 training return: tensor(-1487.9103, device='cuda:0')
epoch: 101 test_true_pfm: -74.72030846907984
episode: 404 training return: tensor(-1518.5043, device='cuda:0')
episode: 405 training return: tensor(-1529.1465, device='cuda:0')
episode: 406 training return: tensor(-1488.6998, device='cuda:0')
episode: 407 training return: tensor(-1517.0608, device='cuda:0')
epoch: 102 test_true_pfm: -80.97506927813916
episode: 408 training return: tensor(-1496.6934, device='cuda:0')
episode: 409 training return: tensor(-1508.9025, device='cuda:0')
episode: 410 training return: tensor(-1511.3999, device='cuda:0')
episode: 411 training return: tensor(-1523.4670, device='cuda:0')
epoch: 103 test_true_pfm: -91.45958858940942
episode: 412 training return: tensor(-1502.4901, device='cuda:0')
episode: 413 training return: tensor(-1514.1056, device='cuda:0')
episode: 414 training return: tensor(-1544.6722, device='cuda:0')
episode: 415 training return: tensor(-1516.0349, device='cuda:0')
epoch: 104 test_true_pfm: -81.61969956350458
episode: 416 training return: tensor(-1489.8639, device='cuda:0')
episode: 417 training return: tensor(-1519.3534, device='cuda:0')
episode: 418 training return: tensor(-1520.8353, device='cuda:0')
episode: 419 training return: tensor(-1532.9679, device='cuda:0')
epoch: 105 test_true_pfm: -85.78722171508014
episode: 420 training return: tensor(-1516.5400, device='cuda:0')
episode: 421 training return: tensor(-1509.3804, device='cuda:0')
episode: 422 training return: tensor(-1494.8983, device='cuda:0')
episode: 423 training return: tensor(-1518.5424, device='cuda:0')
epoch: 106 test_true_pfm: -78.97909062178066
episode: 424 training return: tensor(-1512.1063, device='cuda:0')
episode: 425 training return: tensor(-1497.0874, device='cuda:0')
episode: 426 training return: tensor(-1496.9976, device='cuda:0')
episode: 427 training return: tensor(-1519.4594, device='cuda:0')
epoch: 107 test_true_pfm: -87.58101173359164
episode: 428 training return: tensor(-1521.3252, device='cuda:0')
episode: 429 training return: tensor(-1502.4152, device='cuda:0')
episode: 430 training return: tensor(-1493.3323, device='cuda:0')
episode: 431 training return: tensor(-1522.1747, device='cuda:0')
epoch: 108 test_true_pfm: -85.75337330765213
episode: 432 training return: tensor(-1499.0835, device='cuda:0')
episode: 433 training return: tensor(-1515.3507, device='cuda:0')
episode: 434 training return: tensor(-1519.4363, device='cuda:0')
episode: 435 training return: tensor(-1518.3408, device='cuda:0')
epoch: 109 test_true_pfm: -80.6251206127021
episode: 436 training return: tensor(-1517.0325, device='cuda:0')
episode: 437 training return: tensor(-1536.3929, device='cuda:0')
episode: 438 training return: tensor(-1526.7936, device='cuda:0')
episode: 439 training return: tensor(-1513.1727, device='cuda:0')
epoch: 110 test_true_pfm: -88.40741847693715
episode: 440 training return: tensor(-1517.6490, device='cuda:0')
episode: 441 training return: tensor(-1503.9819, device='cuda:0')
episode: 442 training return: tensor(-1521.1129, device='cuda:0')
episode: 443 training return: tensor(-1532.5614, device='cuda:0')
epoch: 111 test_true_pfm: -82.96460804863024
episode: 444 training return: tensor(-1493.1544, device='cuda:0')
episode: 445 training return: tensor(-1516.6531, device='cuda:0')
episode: 446 training return: tensor(-1537.6246, device='cuda:0')
episode: 447 training return: tensor(-1529.0077, device='cuda:0')
epoch: 112 test_true_pfm: -84.26628805860894
episode: 448 training return: tensor(-1482.4390, device='cuda:0')
episode: 449 training return: tensor(-1548.5809, device='cuda:0')
episode: 450 training return: tensor(-1530.6190, device='cuda:0')
episode: 451 training return: tensor(-1511.8690, device='cuda:0')
epoch: 113 test_true_pfm: -92.64366865585026
episode: 452 training return: tensor(-1505.9044, device='cuda:0')
episode: 453 training return: tensor(-1509.4796, device='cuda:0')
episode: 454 training return: tensor(-1538.6611, device='cuda:0')
episode: 455 training return: tensor(-1550.7391, device='cuda:0')
epoch: 114 test_true_pfm: -91.43838432754167
episode: 456 training return: tensor(-1525.7043, device='cuda:0')
episode: 457 training return: tensor(-1539.8400, device='cuda:0')
episode: 458 training return: tensor(-1478.4525, device='cuda:0')
episode: 459 training return: tensor(-1486.5564, device='cuda:0')
epoch: 115 test_true_pfm: -86.88633654591855
episode: 460 training return: tensor(-1510.1743, device='cuda:0')
episode: 461 training return: tensor(-1530.7720, device='cuda:0')
episode: 462 training return: tensor(-1520.1449, device='cuda:0')
episode: 463 training return: tensor(-1544.2776, device='cuda:0')
epoch: 116 test_true_pfm: -82.10542453768602
episode: 464 training return: tensor(-1505.6863, device='cuda:0')
episode: 465 training return: tensor(-1521.4398, device='cuda:0')
episode: 466 training return: tensor(-1493.5486, device='cuda:0')
episode: 467 training return: tensor(-1496.7961, device='cuda:0')
epoch: 117 test_true_pfm: -80.28948527417727
episode: 468 training return: tensor(-1521.4674, device='cuda:0')
episode: 469 training return: tensor(-1527.4114, device='cuda:0')
episode: 470 training return: tensor(-1547.8445, device='cuda:0')
episode: 471 training return: tensor(-1497.7698, device='cuda:0')
epoch: 118 test_true_pfm: -89.31343495452643
episode: 472 training return: tensor(-1517.2192, device='cuda:0')
episode: 473 training return: tensor(-1505.9209, device='cuda:0')
episode: 474 training return: tensor(-1504.0460, device='cuda:0')
episode: 475 training return: tensor(-1496.7603, device='cuda:0')
epoch: 119 test_true_pfm: -90.44037190341088
episode: 476 training return: tensor(-1503.8917, device='cuda:0')
episode: 477 training return: tensor(-1525.6687, device='cuda:0')
episode: 478 training return: tensor(-1558.1134, device='cuda:0')
episode: 479 training return: tensor(-1525.6161, device='cuda:0')
epoch: 120 test_true_pfm: -94.12241620735375
episode: 480 training return: tensor(-1504.0078, device='cuda:0')
episode: 481 training return: tensor(-1504.6044, device='cuda:0')
episode: 482 training return: tensor(-1522.4967, device='cuda:0')
episode: 483 training return: tensor(-1518.8287, device='cuda:0')
epoch: 121 test_true_pfm: -92.45803004733118
episode: 484 training return: tensor(-1496.4012, device='cuda:0')
episode: 485 training return: tensor(-1514.9209, device='cuda:0')
episode: 486 training return: tensor(-1520.1805, device='cuda:0')
episode: 487 training return: tensor(-1502.8583, device='cuda:0')
epoch: 122 test_true_pfm: -84.15342623732631
episode: 488 training return: tensor(-1525.3171, device='cuda:0')
episode: 489 training return: tensor(-1509.1088, device='cuda:0')
episode: 490 training return: tensor(-1501.2194, device='cuda:0')
episode: 491 training return: tensor(-1513.5376, device='cuda:0')
epoch: 123 test_true_pfm: -88.72432414859198
episode: 492 training return: tensor(-1512.1964, device='cuda:0')
episode: 493 training return: tensor(-1496.5750, device='cuda:0')
episode: 494 training return: tensor(-1525.0293, device='cuda:0')
episode: 495 training return: tensor(-1507.6288, device='cuda:0')
epoch: 124 test_true_pfm: -98.60665655861114
episode: 496 training return: tensor(-1531.9829, device='cuda:0')
episode: 497 training return: tensor(-1487.1755, device='cuda:0')
episode: 498 training return: tensor(-1527.6151, device='cuda:0')
episode: 499 training return: tensor(-1527.8262, device='cuda:0')
epoch: 125 test_true_pfm: -92.38206889692871
episode: 500 training return: tensor(-1535.1029, device='cuda:0')
episode: 501 training return: tensor(-1486.7689, device='cuda:0')
episode: 502 training return: tensor(-1535.6899, device='cuda:0')
episode: 503 training return: tensor(-1501.0281, device='cuda:0')
epoch: 126 test_true_pfm: -84.87804915634382
episode: 504 training return: tensor(-1524.9309, device='cuda:0')
episode: 505 training return: tensor(-1522.1702, device='cuda:0')
episode: 506 training return: tensor(-1494.6060, device='cuda:0')
episode: 507 training return: tensor(-1514.3433, device='cuda:0')
epoch: 127 test_true_pfm: -91.03686197010802
episode: 508 training return: tensor(-1532.5686, device='cuda:0')
episode: 509 training return: tensor(-1522.4521, device='cuda:0')
episode: 510 training return: tensor(-1536.8416, device='cuda:0')
episode: 511 training return: tensor(-1510.7546, device='cuda:0')
epoch: 128 test_true_pfm: -70.17066401940443
episode: 512 training return: tensor(-1516.4940, device='cuda:0')
episode: 513 training return: tensor(-1518.0201, device='cuda:0')
episode: 514 training return: tensor(-1539.5138, device='cuda:0')
episode: 515 training return: tensor(-1532.1991, device='cuda:0')
epoch: 129 test_true_pfm: -81.31762795902812
episode: 516 training return: tensor(-1518.5869, device='cuda:0')
episode: 517 training return: tensor(-1553.8922, device='cuda:0')
episode: 518 training return: tensor(-1530.7555, device='cuda:0')
episode: 519 training return: tensor(-1533.1244, device='cuda:0')
epoch: 130 test_true_pfm: -79.11878894791607
episode: 520 training return: tensor(-1485.6440, device='cuda:0')
episode: 521 training return: tensor(-1537.7648, device='cuda:0')
episode: 522 training return: tensor(-1488.3442, device='cuda:0')
episode: 523 training return: tensor(-1519.4597, device='cuda:0')
epoch: 131 test_true_pfm: -81.10395706710644
episode: 524 training return: tensor(-1492.8264, device='cuda:0')
episode: 525 training return: tensor(-1557.7333, device='cuda:0')
episode: 526 training return: tensor(-1507.1340, device='cuda:0')
episode: 527 training return: tensor(-1527.0427, device='cuda:0')
epoch: 132 test_true_pfm: -83.03005230383327
episode: 528 training return: tensor(-1536.6710, device='cuda:0')
episode: 529 training return: tensor(-1523.3618, device='cuda:0')
episode: 530 training return: tensor(-1534.4034, device='cuda:0')
episode: 531 training return: tensor(-1524.6943, device='cuda:0')
epoch: 133 test_true_pfm: -81.41711484730064
episode: 532 training return: tensor(-1521.8824, device='cuda:0')
episode: 533 training return: tensor(-1534.1558, device='cuda:0')
episode: 534 training return: tensor(-1541.4799, device='cuda:0')
episode: 535 training return: tensor(-1561.8625, device='cuda:0')
epoch: 134 test_true_pfm: -77.48799708190192
episode: 536 training return: tensor(-1509.9548, device='cuda:0')
episode: 537 training return: tensor(-1561.6886, device='cuda:0')
episode: 538 training return: tensor(-1532.8860, device='cuda:0')
episode: 539 training return: tensor(-1521.3143, device='cuda:0')
epoch: 135 test_true_pfm: -67.03128431505026
episode: 540 training return: tensor(-1577.5875, device='cuda:0')
episode: 541 training return: tensor(-1516.0520, device='cuda:0')
episode: 542 training return: tensor(-1553.0192, device='cuda:0')
episode: 543 training return: tensor(-1530.5239, device='cuda:0')
epoch: 136 test_true_pfm: -86.17895864197777
episode: 544 training return: tensor(-1518.3185, device='cuda:0')
episode: 545 training return: tensor(-1523.8873, device='cuda:0')
episode: 546 training return: tensor(-1530.4890, device='cuda:0')
episode: 547 training return: tensor(-1500.8539, device='cuda:0')
epoch: 137 test_true_pfm: -79.00099933983735
episode: 548 training return: tensor(-1525.9083, device='cuda:0')
episode: 549 training return: tensor(-1548.7532, device='cuda:0')
episode: 550 training return: tensor(-1536.9147, device='cuda:0')
episode: 551 training return: tensor(-1569.8633, device='cuda:0')
epoch: 138 test_true_pfm: -81.30239436913057
episode: 552 training return: tensor(-1474.3488, device='cuda:0')
episode: 553 training return: tensor(-1519.8843, device='cuda:0')
episode: 554 training return: tensor(-1502.8728, device='cuda:0')
episode: 555 training return: tensor(-1486.8586, device='cuda:0')
epoch: 139 test_true_pfm: -82.56044144153668
episode: 556 training return: tensor(-1518.3088, device='cuda:0')
episode: 557 training return: tensor(-1531.4380, device='cuda:0')
episode: 558 training return: tensor(-1522.9106, device='cuda:0')
episode: 559 training return: tensor(-1511.2716, device='cuda:0')
epoch: 140 test_true_pfm: -69.44087162397905
episode: 560 training return: tensor(-1498.2953, device='cuda:0')
episode: 561 training return: tensor(-1533.7853, device='cuda:0')
episode: 562 training return: tensor(-1516.2622, device='cuda:0')
episode: 563 training return: tensor(-1524.6842, device='cuda:0')
epoch: 141 test_true_pfm: -93.61228905323337
episode: 564 training return: tensor(-1495.5098, device='cuda:0')
episode: 565 training return: tensor(-1526.5441, device='cuda:0')
episode: 566 training return: tensor(-1471.3110, device='cuda:0')
episode: 567 training return: tensor(-1500.3734, device='cuda:0')
epoch: 142 test_true_pfm: -93.91741744344608
episode: 568 training return: tensor(-1527.5812, device='cuda:0')
episode: 569 training return: tensor(-1519.2275, device='cuda:0')
episode: 570 training return: tensor(-1533.0044, device='cuda:0')
episode: 571 training return: tensor(-1523.7800, device='cuda:0')
epoch: 143 test_true_pfm: -86.70443086023295
episode: 572 training return: tensor(-1502.9668, device='cuda:0')
episode: 573 training return: tensor(-1499.1373, device='cuda:0')
episode: 574 training return: tensor(-1503.0858, device='cuda:0')
episode: 575 training return: tensor(-1546.8302, device='cuda:0')
epoch: 144 test_true_pfm: -85.15280645415605
episode: 576 training return: tensor(-1548.4645, device='cuda:0')
episode: 577 training return: tensor(-1495.9855, device='cuda:0')
episode: 578 training return: tensor(-1494.8230, device='cuda:0')
episode: 579 training return: tensor(-1510.0507, device='cuda:0')
epoch: 145 test_true_pfm: -82.58292354689102
episode: 580 training return: tensor(-1520.3868, device='cuda:0')
episode: 581 training return: tensor(-1499.9213, device='cuda:0')
episode: 582 training return: tensor(-1526.2909, device='cuda:0')
episode: 583 training return: tensor(-1502.4191, device='cuda:0')
epoch: 146 test_true_pfm: -79.44948732031433
episode: 584 training return: tensor(-1498.3590, device='cuda:0')
episode: 585 training return: tensor(-1524.5803, device='cuda:0')
episode: 586 training return: tensor(-1502.4873, device='cuda:0')
episode: 587 training return: tensor(-1483.0536, device='cuda:0')
epoch: 147 test_true_pfm: -82.03834457394525
episode: 588 training return: tensor(-1521.4763, device='cuda:0')
episode: 589 training return: tensor(-1521.6188, device='cuda:0')
episode: 590 training return: tensor(-1515.0773, device='cuda:0')
episode: 591 training return: tensor(-1508.9996, device='cuda:0')
epoch: 148 test_true_pfm: -76.11662536027454
episode: 592 training return: tensor(-1516.0151, device='cuda:0')
episode: 593 training return: tensor(-1508.1953, device='cuda:0')
episode: 594 training return: tensor(-1502.3640, device='cuda:0')
episode: 595 training return: tensor(-1492.5380, device='cuda:0')
epoch: 149 test_true_pfm: -83.22686984908756
episode: 596 training return: tensor(-1524.6853, device='cuda:0')
episode: 597 training return: tensor(-1542.2704, device='cuda:0')
episode: 598 training return: tensor(-1530.2820, device='cuda:0')
episode: 599 training return: tensor(-1526.2443, device='cuda:0')
epoch: 150 test_true_pfm: -84.75547192171614
