['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'brac', '--traj', 'expert', '--seed', '1', '--data', '100000']
epoch: 0 training_loss 4.916243605148047 test_loss: 0.02004207968711853
epoch: 1 training_loss -1.3327790040522813 test_loss: -2.2828765869140626
epoch: 2 training_loss -3.231838661432266 test_loss: -3.955613708496094
epoch: 3 training_loss -4.49722758769989 test_loss: -4.909618759155274
epoch: 4 training_loss -5.4155260276794435 test_loss: -5.826023864746094
epoch: 5 training_loss -6.287525048255921 test_loss: -6.775489044189453
epoch: 6 training_loss -6.904724698066712 test_loss: -7.127474212646485
epoch: 7 training_loss -7.296928191184998 test_loss: -7.293062591552735
epoch: 8 training_loss -7.797997760772705 test_loss: -7.94903335571289
epoch: 9 training_loss -8.054938998222351 test_loss: -8.323973846435546
epoch: 10 training_loss -8.41216383934021 test_loss: -8.634291076660157
epoch: 11 training_loss -8.754036436080932 test_loss: -8.766563415527344
epoch: 12 training_loss -8.997296161651612 test_loss: -9.229761505126953
epoch: 13 training_loss -9.240088748931885 test_loss: -9.583966064453126
epoch: 14 training_loss -9.522026977539063 test_loss: -9.527244567871094
epoch: 15 training_loss -9.715067863464355 test_loss: -9.769827270507813
epoch: 16 training_loss -9.91507381439209 test_loss: -9.949840545654297
epoch: 17 training_loss -10.10166251182556 test_loss: -10.33733901977539
epoch: 18 training_loss -10.359685878753663 test_loss: -10.395970153808594
epoch: 19 training_loss -10.44549973487854 test_loss: -10.353117370605469
epoch: 20 training_loss -10.538706531524658 test_loss: -10.719493103027343
epoch: 21 training_loss -10.779119634628296 test_loss: -10.715375518798828
epoch: 22 training_loss -10.862662258148193 test_loss: -11.007820892333985
epoch: 23 training_loss -11.098661832809448 test_loss: -11.122751617431641
epoch: 24 training_loss -11.238240003585815 test_loss: -11.389112091064453
epoch: 25 training_loss -11.310506534576415 test_loss: -11.487013244628907
epoch: 26 training_loss -11.420017404556274 test_loss: -11.595111083984374
epoch: 27 training_loss -11.550887002944947 test_loss: -11.511546325683593
epoch: 28 training_loss -11.706644744873048 test_loss: -11.892452239990234
epoch: 29 training_loss -11.829264640808105 test_loss: -11.914900970458984
epoch: 30 training_loss -11.911740913391114 test_loss: -11.90564422607422
epoch: 31 training_loss -12.082344570159911 test_loss: -12.123704528808593
epoch: 32 training_loss -12.093957214355468 test_loss: -12.336944580078125
epoch: 33 training_loss -12.37095594406128 test_loss: -12.069182586669921
epoch: 34 training_loss -12.320576448440551 test_loss: -12.494842529296875
epoch: 35 training_loss -12.396160192489624 test_loss: -12.392197418212891
epoch: 36 training_loss -12.556301174163819 test_loss: -12.496800994873047
epoch: 37 training_loss -12.663584661483764 test_loss: -12.6479248046875
epoch: 38 training_loss -12.735226745605468 test_loss: -12.85509033203125
epoch: 39 training_loss -12.880951013565063 test_loss: -12.797405242919922
epoch: 40 training_loss -12.868760414123535 test_loss: -12.776785278320313
epoch: 41 training_loss -12.967601385116577 test_loss: -12.944601440429688
epoch: 42 training_loss -13.050133962631225 test_loss: -13.102912902832031
epoch: 43 training_loss -13.031479768753051 test_loss: -12.982987976074218
epoch: 44 training_loss -13.192430610656737 test_loss: -13.038214111328125
epoch: 45 training_loss -13.114259233474732 test_loss: -13.1759033203125
epoch: 46 training_loss -13.253193302154541 test_loss: -13.444496154785156
epoch: 47 training_loss -13.320094623565673 test_loss: -13.410102844238281
epoch: 48 training_loss -13.38442901611328 test_loss: -13.36171875
epoch: 49 training_loss -13.49230450630188 test_loss: -13.306854248046875
epoch: 50 training_loss -13.466047563552856 test_loss: -13.5489013671875
epoch: 51 training_loss -13.52886516571045 test_loss: -13.437814331054687
epoch: 52 training_loss -13.552494287490845 test_loss: -13.641761779785156
epoch: 53 training_loss -13.67649290084839 test_loss: -13.807696533203124
epoch: 54 training_loss -13.742329578399659 test_loss: -13.579324340820312
epoch: 55 training_loss -13.66803071975708 test_loss: -13.824568176269532
epoch: 56 training_loss -13.714164581298828 test_loss: -13.762388610839844
epoch: 57 training_loss -13.795813789367676 test_loss: -14.013682556152343
epoch: 58 training_loss -13.840638151168823 test_loss: -13.61455841064453
epoch: 59 training_loss -13.824075298309326 test_loss: -13.50690155029297
epoch: 60 training_loss -13.960341787338256 test_loss: -14.023533630371094
epoch: 61 training_loss -13.951221551895141 test_loss: -13.780633544921875
epoch: 62 training_loss -14.02314190864563 test_loss: -13.794586181640625
epoch: 63 training_loss -14.027079477310181 test_loss: -13.950901794433594
epoch: 64 training_loss -14.049813413619995 test_loss: -14.19977264404297
epoch: 65 training_loss -14.104983282089233 test_loss: -14.094758605957031
epoch: 66 training_loss -14.116962156295777 test_loss: -14.060772705078126
epoch: 67 training_loss -14.161794300079345 test_loss: -14.204705810546875
epoch: 68 training_loss -14.174568796157837 test_loss: -14.154754638671875
epoch: 69 training_loss -14.152782201766968 test_loss: -14.4570556640625
epoch: 70 training_loss -14.220809993743897 test_loss: -14.0543212890625
epoch: 71 training_loss -14.321559028625488 test_loss: -14.464173889160156
epoch: 72 training_loss -14.297201051712037 test_loss: -14.402365112304688
epoch: 73 training_loss -14.377054386138916 test_loss: -14.298548889160156
epoch: 74 training_loss -14.345861158370973 test_loss: -14.436738586425781
epoch: 75 training_loss -14.314199352264405 test_loss: -14.489321899414062
epoch: 76 training_loss -14.524184131622315 test_loss: -14.362281799316406
epoch: 77 training_loss -14.352359018325807 test_loss: -14.276397705078125
epoch: 78 training_loss -14.444935541152955 test_loss: -14.390275573730468
epoch: 79 training_loss -14.482243986129761 test_loss: -14.490928649902344
epoch: 80 training_loss -14.500752849578857 test_loss: -14.538739013671876
epoch: 81 training_loss -14.513442993164062 test_loss: -14.579988098144531
epoch: 82 training_loss -14.57174898147583 test_loss: -14.356320190429688
epoch: 83 training_loss -14.600102500915527 test_loss: -14.623393249511718
epoch: 84 training_loss -14.592243547439574 test_loss: -14.495225524902343
epoch: 85 training_loss -14.516606845855712 test_loss: -14.671099853515624
epoch: 86 training_loss -14.576896867752076 test_loss: -14.809625244140625
epoch: 87 training_loss -14.723623762130737 test_loss: -14.696670532226562
epoch: 88 training_loss -14.704021368026734 test_loss: -14.79082794189453
epoch: 89 training_loss -14.755387287139893 test_loss: -14.617582702636719
epoch: 90 training_loss -14.754566745758057 test_loss: -14.746527099609375
epoch: 91 training_loss -14.750920400619506 test_loss: -14.689974975585937
epoch: 92 training_loss -14.822454395294189 test_loss: -14.785575866699219
epoch: 93 training_loss -14.807934675216675 test_loss: -14.817906188964844
epoch: 94 training_loss -14.866553449630738 test_loss: -14.847479248046875
epoch: 95 training_loss -14.842766828536988 test_loss: -14.958833312988281
epoch: 96 training_loss -14.810172967910766 test_loss: -14.704173278808593
epoch: 97 training_loss -14.859739799499511 test_loss: -14.992343139648437
epoch: 98 training_loss -14.874027910232543 test_loss: -14.894931030273437
epoch: 99 training_loss -14.965227746963501 test_loss: -15.0780517578125
epoch: 100 training_loss -14.9498433303833 test_loss: -14.794670104980469
epoch: 101 training_loss -14.976951837539673 test_loss: -15.086680603027343
epoch: 102 training_loss -15.02123927116394 test_loss: -14.874015808105469
epoch: 103 training_loss -14.992084579467774 test_loss: -15.032157897949219
epoch: 104 training_loss -15.036489305496216 test_loss: -15.082984924316406
epoch: 105 training_loss -15.097579870224 test_loss: -14.95691375732422
epoch: 106 training_loss -15.102097187042236 test_loss: -14.885906982421876
epoch: 107 training_loss -15.084886875152588 test_loss: -15.138807678222657
epoch: 108 training_loss -15.067671537399292 test_loss: -15.162953186035157
epoch: 109 training_loss -15.128060064315797 test_loss: -15.016143798828125
epoch: 110 training_loss -15.106524171829223 test_loss: -15.010032653808594
epoch: 111 training_loss -15.152852754592896 test_loss: -15.1320556640625
epoch: 112 training_loss -15.21461564064026 test_loss: -15.194718933105468
epoch: 113 training_loss -15.23352396965027 test_loss: -15.140948486328124
epoch: 114 training_loss -15.233861885070802 test_loss: -15.18877716064453
epoch: 115 training_loss -15.12835548400879 test_loss: -15.017984008789062
epoch: 116 training_loss -15.199300880432128 test_loss: -15.2548583984375
epoch: 117 training_loss -15.24465883255005 test_loss: -15.194003295898437
epoch: 118 training_loss -15.293570156097411 test_loss: -15.4325927734375
epoch: 119 training_loss -15.204322834014892 test_loss: -15.281390380859374
epoch: 120 training_loss -15.210718059539795 test_loss: -15.2759521484375
epoch: 121 training_loss -15.233788251876831 test_loss: -15.034254455566407
epoch: 122 training_loss -15.284874143600463 test_loss: -15.282627868652344
epoch: 123 training_loss -15.319689674377441 test_loss: -15.079240417480468
epoch: 124 training_loss -15.323391923904419 test_loss: -15.351913452148438
epoch: 125 training_loss -15.34426459312439 test_loss: -15.23343505859375
epoch: 126 training_loss -15.287799835205078 test_loss: -15.344102478027343
epoch: 127 training_loss -15.270682430267334 test_loss: -15.241403198242187
epoch: 128 training_loss -15.37167802810669 test_loss: -15.449156188964844
epoch: 129 training_loss -15.392927646636963 test_loss: -15.422129821777343
epoch: 130 training_loss -15.390326871871949 test_loss: -15.314212036132812
epoch: 131 training_loss -15.450533065795899 test_loss: -15.26916046142578
epoch: 132 training_loss -15.403472528457641 test_loss: -15.489158630371094
epoch: 133 training_loss -15.46609827041626 test_loss: -15.441526794433594
epoch: 134 training_loss -15.484392023086547 test_loss: -15.380059814453125
epoch: 135 training_loss -15.460502347946168 test_loss: -15.389175415039062
epoch: 136 training_loss -15.454686412811279 test_loss: -15.54703369140625
epoch: 137 training_loss -15.458339080810546 test_loss: -15.523942565917968
epoch: 138 training_loss -15.458273096084595 test_loss: -15.516679382324218
epoch: 139 training_loss -15.53000069618225 test_loss: -15.532878112792968
epoch: 140 training_loss -15.49339334487915 test_loss: -15.678633117675782
epoch: 141 training_loss -15.559787521362304 test_loss: -15.385362243652343
epoch: 142 training_loss -15.537637901306152 test_loss: -15.468943786621093
epoch: 143 training_loss -15.510391130447388 test_loss: -15.598619079589843
epoch: 144 training_loss -15.591807308197021 test_loss: -15.532664489746093
epoch: 145 training_loss -15.653975715637207 test_loss: -15.601992797851562
epoch: 146 training_loss -15.617693872451783 test_loss: -15.561032104492188
epoch: 147 training_loss -15.523084268569946 test_loss: -15.567660522460937
epoch: 148 training_loss -15.555055017471313 test_loss: -15.578839111328126
epoch: 149 training_loss -15.52195523262024 test_loss: -15.643473815917968
5843.038528169418
episode: 0 training return: tensor(-4918.4048, device='cuda:0')
episode: 1 training return: tensor(-5314.1519, device='cuda:0')
episode: 2 training return: tensor(-5480.4468, device='cuda:0')
episode: 3 training return: tensor(-4875.6987, device='cuda:0')
epoch: 1 test_true_pfm: -294.59517597945137
episode: 4 training return: tensor(-4835.9497, device='cuda:0')
episode: 5 training return: tensor(-5494.1055, device='cuda:0')
episode: 6 training return: tensor(-4681.6187, device='cuda:0')
episode: 7 training return: tensor(-5416.8530, device='cuda:0')
epoch: 2 test_true_pfm: -146.84271356193418
episode: 8 training return: tensor(-5389.2822, device='cuda:0')
episode: 9 training return: tensor(-5288.5742, device='cuda:0')
episode: 10 training return: tensor(-1727.5563, device='cuda:0')
episode: 11 training return: tensor(-2026.2299, device='cuda:0')
epoch: 3 test_true_pfm: -209.8540692294529
episode: 12 training return: tensor(-2363.2983, device='cuda:0')
episode: 13 training return: tensor(-2410.1780, device='cuda:0')
episode: 14 training return: tensor(-2359.1543, device='cuda:0')
episode: 15 training return: tensor(-2235.5916, device='cuda:0')
epoch: 4 test_true_pfm: -109.37160124578861
episode: 16 training return: tensor(-2341.2727, device='cuda:0')
episode: 17 training return: tensor(-2032.5791, device='cuda:0')
episode: 18 training return: tensor(-1838.8824, device='cuda:0')
episode: 19 training return: tensor(-1789.5347, device='cuda:0')
epoch: 5 test_true_pfm: -95.51744745106453
episode: 20 training return: tensor(-1804.7407, device='cuda:0')
episode: 21 training return: tensor(-1767.0762, device='cuda:0')
episode: 22 training return: tensor(-1773.3073, device='cuda:0')
episode: 23 training return: tensor(-1769.3975, device='cuda:0')
epoch: 6 test_true_pfm: -47.3085490813892
episode: 24 training return: tensor(-1719.4843, device='cuda:0')
episode: 25 training return: tensor(-1727.0685, device='cuda:0')
episode: 26 training return: tensor(-1717.2582, device='cuda:0')
episode: 27 training return: tensor(-1729.9969, device='cuda:0')
epoch: 7 test_true_pfm: -54.000659803470604
episode: 28 training return: tensor(-1726.8378, device='cuda:0')
episode: 29 training return: tensor(-1729.3925, device='cuda:0')
episode: 30 training return: tensor(-1747.7174, device='cuda:0')
episode: 31 training return: tensor(-1736.9620, device='cuda:0')
epoch: 8 test_true_pfm: -50.84629383168169
episode: 32 training return: tensor(-1726.8784, device='cuda:0')
episode: 33 training return: tensor(-1737.5458, device='cuda:0')
episode: 34 training return: tensor(-1720.7740, device='cuda:0')
episode: 35 training return: tensor(-1747.5703, device='cuda:0')
epoch: 9 test_true_pfm: -69.70515354130059
episode: 36 training return: tensor(-1730.6624, device='cuda:0')
episode: 37 training return: tensor(-1750.3654, device='cuda:0')
episode: 38 training return: tensor(-1745.1616, device='cuda:0')
episode: 39 training return: tensor(-1741.9791, device='cuda:0')
epoch: 10 test_true_pfm: -34.06814955979346
episode: 40 training return: tensor(-1743.4678, device='cuda:0')
episode: 41 training return: tensor(-1747.5896, device='cuda:0')
episode: 42 training return: tensor(-1732.5098, device='cuda:0')
episode: 43 training return: tensor(-1767.9047, device='cuda:0')
epoch: 11 test_true_pfm: -20.531984133498458
episode: 44 training return: tensor(-1753.4792, device='cuda:0')
episode: 45 training return: tensor(-1752.7362, device='cuda:0')
episode: 46 training return: tensor(-1759.0038, device='cuda:0')
episode: 47 training return: tensor(-1754.2239, device='cuda:0')
epoch: 12 test_true_pfm: -33.46550376912061
episode: 48 training return: tensor(-1774.5929, device='cuda:0')
episode: 49 training return: tensor(-1752.4886, device='cuda:0')
episode: 50 training return: tensor(-1764.0116, device='cuda:0')
episode: 51 training return: tensor(-1778.6595, device='cuda:0')
epoch: 13 test_true_pfm: -27.78471522897512
episode: 52 training return: tensor(-1767.3813, device='cuda:0')
episode: 53 training return: tensor(-1758.7413, device='cuda:0')
episode: 54 training return: tensor(-1813.9348, device='cuda:0')
episode: 55 training return: tensor(-1778.1461, device='cuda:0')
epoch: 14 test_true_pfm: -43.40312781939952
episode: 56 training return: tensor(-1763.7864, device='cuda:0')
episode: 57 training return: tensor(-1775.3530, device='cuda:0')
episode: 58 training return: tensor(-1770.6362, device='cuda:0')
episode: 59 training return: tensor(-1761.1718, device='cuda:0')
epoch: 15 test_true_pfm: -23.288820261916328
episode: 60 training return: tensor(-1779.2949, device='cuda:0')
episode: 61 training return: tensor(-1763.2729, device='cuda:0')
episode: 62 training return: tensor(-1775.0359, device='cuda:0')
episode: 63 training return: tensor(-1763.2805, device='cuda:0')
epoch: 16 test_true_pfm: -28.99729005953581
episode: 64 training return: tensor(-1794.6915, device='cuda:0')
episode: 65 training return: tensor(-1794.2068, device='cuda:0')
episode: 66 training return: tensor(-1782.4933, device='cuda:0')
episode: 67 training return: tensor(-1792.6714, device='cuda:0')
epoch: 17 test_true_pfm: -52.8910030457921
episode: 68 training return: tensor(-1793.9556, device='cuda:0')
episode: 69 training return: tensor(-1777.8828, device='cuda:0')
episode: 70 training return: tensor(-1794.7622, device='cuda:0')
episode: 71 training return: tensor(-1786.4707, device='cuda:0')
epoch: 18 test_true_pfm: -29.988548438975787
episode: 72 training return: tensor(-1770.4208, device='cuda:0')
episode: 73 training return: tensor(-1764.8011, device='cuda:0')
episode: 74 training return: tensor(-1797.6411, device='cuda:0')
episode: 75 training return: tensor(-1784.6179, device='cuda:0')
epoch: 19 test_true_pfm: -32.179330975667504
episode: 76 training return: tensor(-1768.7335, device='cuda:0')
episode: 77 training return: tensor(-1805.7184, device='cuda:0')
episode: 78 training return: tensor(-1789.3210, device='cuda:0')
episode: 79 training return: tensor(-1797.8719, device='cuda:0')
epoch: 20 test_true_pfm: -30.41350273923601
episode: 80 training return: tensor(-1775.0762, device='cuda:0')
episode: 81 training return: tensor(-1794.7786, device='cuda:0')
episode: 82 training return: tensor(-1782.7153, device='cuda:0')
episode: 83 training return: tensor(-1784.0392, device='cuda:0')
epoch: 21 test_true_pfm: -37.88268247356903
episode: 84 training return: tensor(-1789.5679, device='cuda:0')
episode: 85 training return: tensor(-1777.9919, device='cuda:0')
episode: 86 training return: tensor(-1808.8900, device='cuda:0')
episode: 87 training return: tensor(-1823.3795, device='cuda:0')
epoch: 22 test_true_pfm: -45.244566328589976
episode: 88 training return: tensor(-1812.6443, device='cuda:0')
episode: 89 training return: tensor(-1778.5132, device='cuda:0')
episode: 90 training return: tensor(-1815.3645, device='cuda:0')
episode: 91 training return: tensor(-1798.5509, device='cuda:0')
epoch: 23 test_true_pfm: -39.16741011730473
episode: 92 training return: tensor(-1791.7167, device='cuda:0')
episode: 93 training return: tensor(-1777.7823, device='cuda:0')
episode: 94 training return: tensor(-1790.9630, device='cuda:0')
episode: 95 training return: tensor(-1794.0061, device='cuda:0')
epoch: 24 test_true_pfm: -31.497562987434197
episode: 96 training return: tensor(-1792.0787, device='cuda:0')
episode: 97 training return: tensor(-1798.8650, device='cuda:0')
episode: 98 training return: tensor(-1800.2103, device='cuda:0')
episode: 99 training return: tensor(-1805.1014, device='cuda:0')
epoch: 25 test_true_pfm: -26.745317768324515
episode: 100 training return: tensor(-1790.7272, device='cuda:0')
episode: 101 training return: tensor(-1796.4561, device='cuda:0')
episode: 102 training return: tensor(-1798.3276, device='cuda:0')
episode: 103 training return: tensor(-1800.7882, device='cuda:0')
epoch: 26 test_true_pfm: -30.173422877096403
episode: 104 training return: tensor(-1808.7137, device='cuda:0')
episode: 105 training return: tensor(-1805.6400, device='cuda:0')
episode: 106 training return: tensor(-1798.4556, device='cuda:0')
episode: 107 training return: tensor(-1761.9712, device='cuda:0')
epoch: 27 test_true_pfm: -41.08182756407885
episode: 108 training return: tensor(-1785.3640, device='cuda:0')
episode: 109 training return: tensor(-1786.0735, device='cuda:0')
episode: 110 training return: tensor(-1816.4943, device='cuda:0')
episode: 111 training return: tensor(-1799.2478, device='cuda:0')
epoch: 28 test_true_pfm: -40.17321431753781
episode: 112 training return: tensor(-1773.9288, device='cuda:0')
episode: 113 training return: tensor(-1785.4879, device='cuda:0')
episode: 114 training return: tensor(-1800.0813, device='cuda:0')
episode: 115 training return: tensor(-1820.9753, device='cuda:0')
epoch: 29 test_true_pfm: -31.40298304607859
episode: 116 training return: tensor(-1805.7128, device='cuda:0')
episode: 117 training return: tensor(-1807.9092, device='cuda:0')
episode: 118 training return: tensor(-1780.2965, device='cuda:0')
episode: 119 training return: tensor(-1792.0983, device='cuda:0')
epoch: 30 test_true_pfm: -31.844801638321226
episode: 120 training return: tensor(-1799.6400, device='cuda:0')
episode: 121 training return: tensor(-1828.4254, device='cuda:0')
episode: 122 training return: tensor(-1789.4297, device='cuda:0')
episode: 123 training return: tensor(-1815.1853, device='cuda:0')
epoch: 31 test_true_pfm: -32.3055239896108
episode: 124 training return: tensor(-1787.2488, device='cuda:0')
episode: 125 training return: tensor(-1782.4380, device='cuda:0')
episode: 126 training return: tensor(-1793.0331, device='cuda:0')
episode: 127 training return: tensor(-1818.4482, device='cuda:0')
epoch: 32 test_true_pfm: -39.51675659212643
episode: 128 training return: tensor(-1804.1677, device='cuda:0')
episode: 129 training return: tensor(-1781.4818, device='cuda:0')
episode: 130 training return: tensor(-1772.1471, device='cuda:0')
episode: 131 training return: tensor(-1794.0845, device='cuda:0')
epoch: 33 test_true_pfm: -25.552279869608853
episode: 132 training return: tensor(-1803.2930, device='cuda:0')
episode: 133 training return: tensor(-1811.6373, device='cuda:0')
episode: 134 training return: tensor(-1803.1820, device='cuda:0')
episode: 135 training return: tensor(-1809.5162, device='cuda:0')
epoch: 34 test_true_pfm: -24.697966524768642
episode: 136 training return: tensor(-1807.6416, device='cuda:0')
episode: 137 training return: tensor(-1772.6133, device='cuda:0')
episode: 138 training return: tensor(-1778.0383, device='cuda:0')
episode: 139 training return: tensor(-1794.7173, device='cuda:0')
epoch: 35 test_true_pfm: -29.721475840159258
episode: 140 training return: tensor(-1795.8643, device='cuda:0')
episode: 141 training return: tensor(-1798.0640, device='cuda:0')
episode: 142 training return: tensor(-1795.3180, device='cuda:0')
episode: 143 training return: tensor(-1791.6249, device='cuda:0')
epoch: 36 test_true_pfm: -16.901862937600143
episode: 144 training return: tensor(-1797.9452, device='cuda:0')
episode: 145 training return: tensor(-1764.7000, device='cuda:0')
episode: 146 training return: tensor(-1786.2677, device='cuda:0')
episode: 147 training return: tensor(-1781.9364, device='cuda:0')
epoch: 37 test_true_pfm: -31.790185502141544
episode: 148 training return: tensor(-1793.7961, device='cuda:0')
episode: 149 training return: tensor(-1791.3717, device='cuda:0')
episode: 150 training return: tensor(-1788.3035, device='cuda:0')
episode: 151 training return: tensor(-1786.9764, device='cuda:0')
epoch: 38 test_true_pfm: -26.82993286792967
episode: 152 training return: tensor(-1811.0348, device='cuda:0')
episode: 153 training return: tensor(-1790.7122, device='cuda:0')
episode: 154 training return: tensor(-1806.7026, device='cuda:0')
episode: 155 training return: tensor(-1785.9327, device='cuda:0')
epoch: 39 test_true_pfm: -31.13730528090241
episode: 156 training return: tensor(-1792.5627, device='cuda:0')
episode: 157 training return: tensor(-1797.1978, device='cuda:0')
episode: 158 training return: tensor(-1803.2908, device='cuda:0')
episode: 159 training return: tensor(-1786.4343, device='cuda:0')
epoch: 40 test_true_pfm: -28.63296208768526
episode: 160 training return: tensor(-1795.1219, device='cuda:0')
episode: 161 training return: tensor(-1786.7924, device='cuda:0')
episode: 162 training return: tensor(-1768.6194, device='cuda:0')
episode: 163 training return: tensor(-1805.4784, device='cuda:0')
epoch: 41 test_true_pfm: -36.10615483510991
episode: 164 training return: tensor(-1775.5642, device='cuda:0')
episode: 165 training return: tensor(-1822.5227, device='cuda:0')
episode: 166 training return: tensor(-1790.5656, device='cuda:0')
episode: 167 training return: tensor(-1788.1959, device='cuda:0')
epoch: 42 test_true_pfm: -25.919793240104994
episode: 168 training return: tensor(-1784.8448, device='cuda:0')
episode: 169 training return: tensor(-1788.4861, device='cuda:0')
episode: 170 training return: tensor(-1805.2372, device='cuda:0')
episode: 171 training return: tensor(-1780.3685, device='cuda:0')
epoch: 43 test_true_pfm: -35.33536081650266
episode: 172 training return: tensor(-1795.1377, device='cuda:0')
episode: 173 training return: tensor(-1804.8542, device='cuda:0')
episode: 174 training return: tensor(-1784.4465, device='cuda:0')
episode: 175 training return: tensor(-1785.6549, device='cuda:0')
epoch: 44 test_true_pfm: -26.520657478877798
episode: 176 training return: tensor(-1787.0648, device='cuda:0')
episode: 177 training return: tensor(-1785.7504, device='cuda:0')
episode: 178 training return: tensor(-1778.1427, device='cuda:0')
episode: 179 training return: tensor(-1797.4441, device='cuda:0')
epoch: 45 test_true_pfm: -24.26206530772588
episode: 180 training return: tensor(-1797.7850, device='cuda:0')
episode: 181 training return: tensor(-1800.1539, device='cuda:0')
episode: 182 training return: tensor(-1791.8616, device='cuda:0')
episode: 183 training return: tensor(-1780.8011, device='cuda:0')
epoch: 46 test_true_pfm: -34.849800807931565
episode: 184 training return: tensor(-1802.8676, device='cuda:0')
episode: 185 training return: tensor(-1800.9706, device='cuda:0')
episode: 186 training return: tensor(-1792.2854, device='cuda:0')
episode: 187 training return: tensor(-1802.3958, device='cuda:0')
epoch: 47 test_true_pfm: -37.867706507750846
episode: 188 training return: tensor(-1790.6885, device='cuda:0')
episode: 189 training return: tensor(-1781.4310, device='cuda:0')
episode: 190 training return: tensor(-1796.5624, device='cuda:0')
episode: 191 training return: tensor(-1797.9084, device='cuda:0')
epoch: 48 test_true_pfm: -29.962258077293374
episode: 192 training return: tensor(-1818.2285, device='cuda:0')
episode: 193 training return: tensor(-1794.7332, device='cuda:0')
episode: 194 training return: tensor(-1804.7758, device='cuda:0')
episode: 195 training return: tensor(-1797.9355, device='cuda:0')
epoch: 49 test_true_pfm: -35.19595777974639
episode: 196 training return: tensor(-1798.2650, device='cuda:0')
episode: 197 training return: tensor(-1783.5342, device='cuda:0')
episode: 198 training return: tensor(-1800.0389, device='cuda:0')
episode: 199 training return: tensor(-1780.0640, device='cuda:0')
epoch: 50 test_true_pfm: -38.59205783151363
episode: 200 training return: tensor(-1774.1241, device='cuda:0')
episode: 201 training return: tensor(-1798.4379, device='cuda:0')
episode: 202 training return: tensor(-1771.3351, device='cuda:0')
episode: 203 training return: tensor(-1810.3705, device='cuda:0')
epoch: 51 test_true_pfm: -37.52197630012035
episode: 204 training return: tensor(-1814.0190, device='cuda:0')
episode: 205 training return: tensor(-1800.1024, device='cuda:0')
episode: 206 training return: tensor(-1791.2344, device='cuda:0')
episode: 207 training return: tensor(-1776.5736, device='cuda:0')
epoch: 52 test_true_pfm: -23.714579526807725
episode: 208 training return: tensor(-1821.1027, device='cuda:0')
episode: 209 training return: tensor(-1811.3055, device='cuda:0')
episode: 210 training return: tensor(-1788.7640, device='cuda:0')
episode: 211 training return: tensor(-1774.9486, device='cuda:0')
epoch: 53 test_true_pfm: -39.91258062601019
episode: 212 training return: tensor(-1803.1493, device='cuda:0')
episode: 213 training return: tensor(-1788.9923, device='cuda:0')
episode: 214 training return: tensor(-1811.1694, device='cuda:0')
episode: 215 training return: tensor(-1761.6554, device='cuda:0')
epoch: 54 test_true_pfm: -46.93712375095501
episode: 216 training return: tensor(-1813.3022, device='cuda:0')
episode: 217 training return: tensor(-1799.8413, device='cuda:0')
episode: 218 training return: tensor(-1787.5936, device='cuda:0')
episode: 219 training return: tensor(-1800.4799, device='cuda:0')
epoch: 55 test_true_pfm: -28.269327317087185
episode: 220 training return: tensor(-1798.0959, device='cuda:0')
episode: 221 training return: tensor(-1781.8534, device='cuda:0')
episode: 222 training return: tensor(-1777.2396, device='cuda:0')
episode: 223 training return: tensor(-1803.4907, device='cuda:0')
epoch: 56 test_true_pfm: -38.228696020784184
episode: 224 training return: tensor(-1797.6093, device='cuda:0')
episode: 225 training return: tensor(-1811.0498, device='cuda:0')
episode: 226 training return: tensor(-1782.5579, device='cuda:0')
episode: 227 training return: tensor(-1793.5963, device='cuda:0')
epoch: 57 test_true_pfm: -30.452836671798675
episode: 228 training return: tensor(-1796.6677, device='cuda:0')
episode: 229 training return: tensor(-1782.5867, device='cuda:0')
episode: 230 training return: tensor(-1785.5042, device='cuda:0')
episode: 231 training return: tensor(-1774.5538, device='cuda:0')
epoch: 58 test_true_pfm: -38.55737715646274
episode: 232 training return: tensor(-1785.5172, device='cuda:0')
episode: 233 training return: tensor(-1783.7686, device='cuda:0')
episode: 234 training return: tensor(-1787.7971, device='cuda:0')
episode: 235 training return: tensor(-1793.1831, device='cuda:0')
epoch: 59 test_true_pfm: -43.747224754373974
episode: 236 training return: tensor(-1788.1858, device='cuda:0')
episode: 237 training return: tensor(-1788.3439, device='cuda:0')
episode: 238 training return: tensor(-1768.0756, device='cuda:0')
episode: 239 training return: tensor(-1782.4611, device='cuda:0')
epoch: 60 test_true_pfm: -35.99270674135283
episode: 240 training return: tensor(-1813.6255, device='cuda:0')
episode: 241 training return: tensor(-1796.4049, device='cuda:0')
episode: 242 training return: tensor(-1790.9675, device='cuda:0')
episode: 243 training return: tensor(-1789.9227, device='cuda:0')
epoch: 61 test_true_pfm: -24.899099048386347
episode: 244 training return: tensor(-1787.7213, device='cuda:0')
episode: 245 training return: tensor(-1792.3575, device='cuda:0')
episode: 246 training return: tensor(-1766.1053, device='cuda:0')
episode: 247 training return: tensor(-1781.6283, device='cuda:0')
epoch: 62 test_true_pfm: -37.2574002007647
episode: 248 training return: tensor(-1791.4126, device='cuda:0')
episode: 249 training return: tensor(-1786.5352, device='cuda:0')
episode: 250 training return: tensor(-1805.5420, device='cuda:0')
episode: 251 training return: tensor(-1796.1984, device='cuda:0')
epoch: 63 test_true_pfm: -41.55592042252291
episode: 252 training return: tensor(-1785.0011, device='cuda:0')
episode: 253 training return: tensor(-1794.6779, device='cuda:0')
episode: 254 training return: tensor(-1807.9406, device='cuda:0')
episode: 255 training return: tensor(-1801.3096, device='cuda:0')
epoch: 64 test_true_pfm: -35.126703696475836
episode: 256 training return: tensor(-1784.3137, device='cuda:0')
episode: 257 training return: tensor(-1790.4913, device='cuda:0')
episode: 258 training return: tensor(-1800.4745, device='cuda:0')
episode: 259 training return: tensor(-1790.5148, device='cuda:0')
epoch: 65 test_true_pfm: -36.806497988012204
episode: 260 training return: tensor(-1792.4489, device='cuda:0')
episode: 261 training return: tensor(-1818.9265, device='cuda:0')
episode: 262 training return: tensor(-1796.6587, device='cuda:0')
episode: 263 training return: tensor(-1787.7292, device='cuda:0')
epoch: 66 test_true_pfm: -34.03902783542804
episode: 264 training return: tensor(-1780.3763, device='cuda:0')
episode: 265 training return: tensor(-1792.6188, device='cuda:0')
episode: 266 training return: tensor(-1783.7937, device='cuda:0')
episode: 267 training return: tensor(-1806.5338, device='cuda:0')
epoch: 67 test_true_pfm: -32.268609082971686
episode: 268 training return: tensor(-1808.5654, device='cuda:0')
episode: 269 training return: tensor(-1787.8257, device='cuda:0')
episode: 270 training return: tensor(-1785.4130, device='cuda:0')
episode: 271 training return: tensor(-1811.1571, device='cuda:0')
epoch: 68 test_true_pfm: -33.88110922262629
episode: 272 training return: tensor(-1788.5099, device='cuda:0')
episode: 273 training return: tensor(-1789.9558, device='cuda:0')
episode: 274 training return: tensor(-1789.5609, device='cuda:0')
episode: 275 training return: tensor(-1801.9406, device='cuda:0')
epoch: 69 test_true_pfm: -32.52002385553234
episode: 276 training return: tensor(-1787.9752, device='cuda:0')
episode: 277 training return: tensor(-1767.7476, device='cuda:0')
episode: 278 training return: tensor(-1795.1094, device='cuda:0')
episode: 279 training return: tensor(-1781.3958, device='cuda:0')
epoch: 70 test_true_pfm: -38.55033003039461
episode: 280 training return: tensor(-1784.1227, device='cuda:0')
episode: 281 training return: tensor(-1796.1213, device='cuda:0')
episode: 282 training return: tensor(-1796.6610, device='cuda:0')
episode: 283 training return: tensor(-1793.2776, device='cuda:0')
epoch: 71 test_true_pfm: -34.009658389979776
episode: 284 training return: tensor(-1782.7456, device='cuda:0')
episode: 285 training return: tensor(-1785.1885, device='cuda:0')
episode: 286 training return: tensor(-1783.7666, device='cuda:0')
episode: 287 training return: tensor(-1780.5986, device='cuda:0')
epoch: 72 test_true_pfm: -24.383590004518954
episode: 288 training return: tensor(-1788.9432, device='cuda:0')
episode: 289 training return: tensor(-1790.7710, device='cuda:0')
episode: 290 training return: tensor(-1792.9652, device='cuda:0')
episode: 291 training return: tensor(-1782.0565, device='cuda:0')
epoch: 73 test_true_pfm: -34.570671346838665
episode: 292 training return: tensor(-1792.7883, device='cuda:0')
episode: 293 training return: tensor(-1792.8528, device='cuda:0')
episode: 294 training return: tensor(-1804.1891, device='cuda:0')
episode: 295 training return: tensor(-1773.8193, device='cuda:0')
epoch: 74 test_true_pfm: -14.82960427768578
episode: 296 training return: tensor(-1784.9921, device='cuda:0')
episode: 297 training return: tensor(-1783.0300, device='cuda:0')
episode: 298 training return: tensor(-1798.0043, device='cuda:0')
episode: 299 training return: tensor(-1796.3724, device='cuda:0')
epoch: 75 test_true_pfm: -28.74456537561429
episode: 300 training return: tensor(-1790.2809, device='cuda:0')
episode: 301 training return: tensor(-1790.1204, device='cuda:0')
episode: 302 training return: tensor(-1792.3574, device='cuda:0')
episode: 303 training return: tensor(-1796.9825, device='cuda:0')
epoch: 76 test_true_pfm: -27.069285982202093
episode: 304 training return: tensor(-1783.0328, device='cuda:0')
episode: 305 training return: tensor(-1786.1899, device='cuda:0')
episode: 306 training return: tensor(-1826.3542, device='cuda:0')
episode: 307 training return: tensor(-1789.9181, device='cuda:0')
epoch: 77 test_true_pfm: -29.347114147998386
episode: 308 training return: tensor(-1785.8276, device='cuda:0')
episode: 309 training return: tensor(-1789.4590, device='cuda:0')
episode: 310 training return: tensor(-1801.8564, device='cuda:0')
episode: 311 training return: tensor(-1787.4690, device='cuda:0')
epoch: 78 test_true_pfm: -38.37032968422653
episode: 312 training return: tensor(-1806.4977, device='cuda:0')
episode: 313 training return: tensor(-1794.6002, device='cuda:0')
episode: 314 training return: tensor(-1783.4592, device='cuda:0')
episode: 315 training return: tensor(-1801.0435, device='cuda:0')
epoch: 79 test_true_pfm: -27.55779214534901
episode: 316 training return: tensor(-1781.2717, device='cuda:0')
episode: 317 training return: tensor(-1790.8065, device='cuda:0')
episode: 318 training return: tensor(-1783.0702, device='cuda:0')
episode: 319 training return: tensor(-1803.7683, device='cuda:0')
epoch: 80 test_true_pfm: -31.419297502685456
episode: 320 training return: tensor(-1790.1716, device='cuda:0')
episode: 321 training return: tensor(-1791.6406, device='cuda:0')
episode: 322 training return: tensor(-1787.3666, device='cuda:0')
episode: 323 training return: tensor(-1807.5299, device='cuda:0')
epoch: 81 test_true_pfm: -28.117471397196024
episode: 324 training return: tensor(-1806.4750, device='cuda:0')
episode: 325 training return: tensor(-1795.6273, device='cuda:0')
episode: 326 training return: tensor(-1782.6772, device='cuda:0')
episode: 327 training return: tensor(-1805.2250, device='cuda:0')
epoch: 82 test_true_pfm: -37.13042567682385
episode: 328 training return: tensor(-1785.3871, device='cuda:0')
episode: 329 training return: tensor(-1794.1023, device='cuda:0')
episode: 330 training return: tensor(-1785.3512, device='cuda:0')
episode: 331 training return: tensor(-1779.1053, device='cuda:0')
epoch: 83 test_true_pfm: -31.241774436610864
episode: 332 training return: tensor(-1783.1685, device='cuda:0')
episode: 333 training return: tensor(-1780.2164, device='cuda:0')
episode: 334 training return: tensor(-1789.8694, device='cuda:0')
episode: 335 training return: tensor(-1781.3462, device='cuda:0')
epoch: 84 test_true_pfm: -25.509393376176508
episode: 336 training return: tensor(-1792.4668, device='cuda:0')
episode: 337 training return: tensor(-1771.4482, device='cuda:0')
episode: 338 training return: tensor(-1802.1162, device='cuda:0')
episode: 339 training return: tensor(-1805.9088, device='cuda:0')
epoch: 85 test_true_pfm: -34.42704255960091
episode: 340 training return: tensor(-1776.5590, device='cuda:0')
episode: 341 training return: tensor(-1794.1412, device='cuda:0')
episode: 342 training return: tensor(-1785.9220, device='cuda:0')
episode: 343 training return: tensor(-1778.3834, device='cuda:0')
epoch: 86 test_true_pfm: -19.60373205014314
episode: 344 training return: tensor(-1814.9570, device='cuda:0')
episode: 345 training return: tensor(-1790.3081, device='cuda:0')
episode: 346 training return: tensor(-1802.7983, device='cuda:0')
episode: 347 training return: tensor(-1798.2395, device='cuda:0')
epoch: 87 test_true_pfm: -32.290334149403265
episode: 348 training return: tensor(-1778.5365, device='cuda:0')
episode: 349 training return: tensor(-1798.4803, device='cuda:0')
episode: 350 training return: tensor(-1771.0092, device='cuda:0')
episode: 351 training return: tensor(-1781.8962, device='cuda:0')
epoch: 88 test_true_pfm: -27.18819283297134
episode: 352 training return: tensor(-1786.3630, device='cuda:0')
episode: 353 training return: tensor(-1786.1088, device='cuda:0')
episode: 354 training return: tensor(-1784.7434, device='cuda:0')
episode: 355 training return: tensor(-1785.7579, device='cuda:0')
epoch: 89 test_true_pfm: -21.93407115724648
episode: 356 training return: tensor(-1797.8119, device='cuda:0')
episode: 357 training return: tensor(-1776.4213, device='cuda:0')
episode: 358 training return: tensor(-1787.3190, device='cuda:0')
episode: 359 training return: tensor(-1791.2030, device='cuda:0')
epoch: 90 test_true_pfm: -36.103238229113195
episode: 360 training return: tensor(-1777.1307, device='cuda:0')
episode: 361 training return: tensor(-1793.8251, device='cuda:0')
episode: 362 training return: tensor(-1789.3820, device='cuda:0')
episode: 363 training return: tensor(-1780.8048, device='cuda:0')
epoch: 91 test_true_pfm: -24.048972159957387
episode: 364 training return: tensor(-1789.4863, device='cuda:0')
episode: 365 training return: tensor(-1780.4872, device='cuda:0')
episode: 366 training return: tensor(-1780.0724, device='cuda:0')
episode: 367 training return: tensor(-1789.2777, device='cuda:0')
epoch: 92 test_true_pfm: -30.4951358698647
episode: 368 training return: tensor(-1781.5383, device='cuda:0')
episode: 369 training return: tensor(-1804.8439, device='cuda:0')
episode: 370 training return: tensor(-1802.6979, device='cuda:0')
episode: 371 training return: tensor(-1786.5823, device='cuda:0')
epoch: 93 test_true_pfm: -30.467199957518744
episode: 372 training return: tensor(-1799.2921, device='cuda:0')
episode: 373 training return: tensor(-1799.4565, device='cuda:0')
episode: 374 training return: tensor(-1796.1703, device='cuda:0')
episode: 375 training return: tensor(-1814.4813, device='cuda:0')
epoch: 94 test_true_pfm: -22.739755638275515
episode: 376 training return: tensor(-1795.5421, device='cuda:0')
episode: 377 training return: tensor(-1791.2655, device='cuda:0')
episode: 378 training return: tensor(-1805.2874, device='cuda:0')
episode: 379 training return: tensor(-1792.0942, device='cuda:0')
epoch: 95 test_true_pfm: -26.69533726702207
episode: 380 training return: tensor(-1803.4095, device='cuda:0')
episode: 381 training return: tensor(-1807.0795, device='cuda:0')
episode: 382 training return: tensor(-1776.8306, device='cuda:0')
episode: 383 training return: tensor(-1792.8333, device='cuda:0')
epoch: 96 test_true_pfm: -29.79774798690168
episode: 384 training return: tensor(-1798.1814, device='cuda:0')
episode: 385 training return: tensor(-1785.5280, device='cuda:0')
episode: 386 training return: tensor(-1785.1870, device='cuda:0')
episode: 387 training return: tensor(-1792.1727, device='cuda:0')
epoch: 97 test_true_pfm: -29.190008944264775
episode: 388 training return: tensor(-1792.5433, device='cuda:0')
episode: 389 training return: tensor(-1788.3971, device='cuda:0')
episode: 390 training return: tensor(-1794.0493, device='cuda:0')
episode: 391 training return: tensor(-1784.3538, device='cuda:0')
epoch: 98 test_true_pfm: -29.916906151838393
episode: 392 training return: tensor(-1789.6243, device='cuda:0')
episode: 393 training return: tensor(-1800.1071, device='cuda:0')
episode: 394 training return: tensor(-1797.7422, device='cuda:0')
episode: 395 training return: tensor(-1786.6831, device='cuda:0')
epoch: 99 test_true_pfm: -32.89588000088583
episode: 396 training return: tensor(-1783.4423, device='cuda:0')
episode: 397 training return: tensor(-1787.4679, device='cuda:0')
episode: 398 training return: tensor(-1788.6519, device='cuda:0')
episode: 399 training return: tensor(-1790.0703, device='cuda:0')
epoch: 100 test_true_pfm: -29.061643462671466
episode: 400 training return: tensor(-1793.4935, device='cuda:0')
episode: 401 training return: tensor(-1780.1111, device='cuda:0')
episode: 402 training return: tensor(-1800.6902, device='cuda:0')
episode: 403 training return: tensor(-1780.4688, device='cuda:0')
epoch: 101 test_true_pfm: -30.50523792545705
episode: 404 training return: tensor(-1787.3303, device='cuda:0')
episode: 405 training return: tensor(-1788.1134, device='cuda:0')
episode: 406 training return: tensor(-1779.5807, device='cuda:0')
episode: 407 training return: tensor(-1795.2272, device='cuda:0')
epoch: 102 test_true_pfm: -27.2015839830317
episode: 408 training return: tensor(-1778.3455, device='cuda:0')
episode: 409 training return: tensor(-1795.2596, device='cuda:0')
episode: 410 training return: tensor(-1784.6825, device='cuda:0')
episode: 411 training return: tensor(-1789.4138, device='cuda:0')
epoch: 103 test_true_pfm: -29.753839589736856
episode: 412 training return: tensor(-1795.8988, device='cuda:0')
episode: 413 training return: tensor(-1774.5361, device='cuda:0')
episode: 414 training return: tensor(-1795.6306, device='cuda:0')
episode: 415 training return: tensor(-1797.1007, device='cuda:0')
epoch: 104 test_true_pfm: -25.749149808544388
episode: 416 training return: tensor(-1781.7277, device='cuda:0')
episode: 417 training return: tensor(-1800.5940, device='cuda:0')
episode: 418 training return: tensor(-1804.0872, device='cuda:0')
episode: 419 training return: tensor(-1779.2858, device='cuda:0')
epoch: 105 test_true_pfm: -24.021719814121123
episode: 420 training return: tensor(-1785.7977, device='cuda:0')
episode: 421 training return: tensor(-1778.1919, device='cuda:0')
episode: 422 training return: tensor(-1796.0685, device='cuda:0')
episode: 423 training return: tensor(-1778.9801, device='cuda:0')
epoch: 106 test_true_pfm: -33.70235665502011
episode: 424 training return: tensor(-1785.8455, device='cuda:0')
episode: 425 training return: tensor(-1783.6528, device='cuda:0')
episode: 426 training return: tensor(-1815.2129, device='cuda:0')
episode: 427 training return: tensor(-1792.1790, device='cuda:0')
epoch: 107 test_true_pfm: -21.326798606138578
episode: 428 training return: tensor(-1789.0513, device='cuda:0')
episode: 429 training return: tensor(-1796.9261, device='cuda:0')
episode: 430 training return: tensor(-1785.5436, device='cuda:0')
episode: 431 training return: tensor(-1772.4524, device='cuda:0')
epoch: 108 test_true_pfm: -33.73603172062835
episode: 432 training return: tensor(-1803.5887, device='cuda:0')
episode: 433 training return: tensor(-1790.8217, device='cuda:0')
episode: 434 training return: tensor(-1801.5071, device='cuda:0')
episode: 435 training return: tensor(-1778.4420, device='cuda:0')
epoch: 109 test_true_pfm: -26.9932899139698
episode: 436 training return: tensor(-1787.9259, device='cuda:0')
episode: 437 training return: tensor(-1805.9185, device='cuda:0')
episode: 438 training return: tensor(-1790.8140, device='cuda:0')
episode: 439 training return: tensor(-1786.0770, device='cuda:0')
epoch: 110 test_true_pfm: -33.57467902110148
episode: 440 training return: tensor(-1782.0565, device='cuda:0')
episode: 441 training return: tensor(-1810.2858, device='cuda:0')
episode: 442 training return: tensor(-1762.6902, device='cuda:0')
episode: 443 training return: tensor(-1793.2352, device='cuda:0')
epoch: 111 test_true_pfm: -28.73909845339033
episode: 444 training return: tensor(-1798.2739, device='cuda:0')
episode: 445 training return: tensor(-1811.6390, device='cuda:0')
episode: 446 training return: tensor(-1783.2972, device='cuda:0')
episode: 447 training return: tensor(-1798.9191, device='cuda:0')
epoch: 112 test_true_pfm: -31.85422711996399
episode: 448 training return: tensor(-1785.8717, device='cuda:0')
episode: 449 training return: tensor(-1795.1284, device='cuda:0')
episode: 450 training return: tensor(-1763.4054, device='cuda:0')
episode: 451 training return: tensor(-1788.9952, device='cuda:0')
epoch: 113 test_true_pfm: -30.644468786282147
episode: 452 training return: tensor(-1800.2412, device='cuda:0')
episode: 453 training return: tensor(-1797.6744, device='cuda:0')
episode: 454 training return: tensor(-1800.3672, device='cuda:0')
episode: 455 training return: tensor(-1778.4075, device='cuda:0')
epoch: 114 test_true_pfm: -30.86133313005966
episode: 456 training return: tensor(-1792.4149, device='cuda:0')
episode: 457 training return: tensor(-1784.1545, device='cuda:0')
episode: 458 training return: tensor(-1805.0903, device='cuda:0')
episode: 459 training return: tensor(-1811.6743, device='cuda:0')
epoch: 115 test_true_pfm: -31.383440367252746
episode: 460 training return: tensor(-1775.6620, device='cuda:0')
episode: 461 training return: tensor(-1770.8905, device='cuda:0')
episode: 462 training return: tensor(-1780.3243, device='cuda:0')
episode: 463 training return: tensor(-1810.0372, device='cuda:0')
epoch: 116 test_true_pfm: -20.72010829226949
episode: 464 training return: tensor(-1772.9093, device='cuda:0')
episode: 465 training return: tensor(-1784.5426, device='cuda:0')
episode: 466 training return: tensor(-1768.0476, device='cuda:0')
episode: 467 training return: tensor(-1796.4048, device='cuda:0')
epoch: 117 test_true_pfm: -29.95221852807397
episode: 468 training return: tensor(-1785.9666, device='cuda:0')
episode: 469 training return: tensor(-1785.0348, device='cuda:0')
episode: 470 training return: tensor(-1787.9301, device='cuda:0')
episode: 471 training return: tensor(-1783.9431, device='cuda:0')
epoch: 118 test_true_pfm: -28.952533548310754
episode: 472 training return: tensor(-1789.1320, device='cuda:0')
episode: 473 training return: tensor(-1788.6874, device='cuda:0')
episode: 474 training return: tensor(-1806.1125, device='cuda:0')
episode: 475 training return: tensor(-1788.1833, device='cuda:0')
epoch: 119 test_true_pfm: -34.619199431880965
episode: 476 training return: tensor(-1776.1533, device='cuda:0')
episode: 477 training return: tensor(-1811.0055, device='cuda:0')
episode: 478 training return: tensor(-1773.3809, device='cuda:0')
episode: 479 training return: tensor(-1789.0668, device='cuda:0')
epoch: 120 test_true_pfm: -32.28957796760579
episode: 480 training return: tensor(-1777.6351, device='cuda:0')
episode: 481 training return: tensor(-1790.3057, device='cuda:0')
episode: 482 training return: tensor(-1780.0552, device='cuda:0')
episode: 483 training return: tensor(-1794.2562, device='cuda:0')
epoch: 121 test_true_pfm: -27.44518609555028
episode: 484 training return: tensor(-1800.3213, device='cuda:0')
episode: 485 training return: tensor(-1782.0526, device='cuda:0')
episode: 486 training return: tensor(-1805.3682, device='cuda:0')
episode: 487 training return: tensor(-1780.4797, device='cuda:0')
epoch: 122 test_true_pfm: -28.04178872562352
episode: 488 training return: tensor(-1792.1713, device='cuda:0')
episode: 489 training return: tensor(-1773.7493, device='cuda:0')
episode: 490 training return: tensor(-1781.0536, device='cuda:0')
episode: 491 training return: tensor(-1783.8981, device='cuda:0')
epoch: 123 test_true_pfm: -28.482427888287134
episode: 492 training return: tensor(-1804.0050, device='cuda:0')
episode: 493 training return: tensor(-1789.6039, device='cuda:0')
episode: 494 training return: tensor(-1793.1653, device='cuda:0')
episode: 495 training return: tensor(-1782.8112, device='cuda:0')
epoch: 124 test_true_pfm: -25.99019155265587
episode: 496 training return: tensor(-1777.7861, device='cuda:0')
episode: 497 training return: tensor(-1778.9498, device='cuda:0')
episode: 498 training return: tensor(-1785.4799, device='cuda:0')
episode: 499 training return: tensor(-1780.5541, device='cuda:0')
epoch: 125 test_true_pfm: -21.775606257183824
episode: 500 training return: tensor(-1817.3922, device='cuda:0')
episode: 501 training return: tensor(-1771.1667, device='cuda:0')
episode: 502 training return: tensor(-1787.8173, device='cuda:0')
episode: 503 training return: tensor(-1779.3973, device='cuda:0')
epoch: 126 test_true_pfm: -35.76868179152542
episode: 504 training return: tensor(-1787.5027, device='cuda:0')
episode: 505 training return: tensor(-1792.2963, device='cuda:0')
episode: 506 training return: tensor(-1794.3794, device='cuda:0')
episode: 507 training return: tensor(-1783.4318, device='cuda:0')
epoch: 127 test_true_pfm: -31.31269509998295
episode: 508 training return: tensor(-1773.0477, device='cuda:0')
episode: 509 training return: tensor(-1778.9396, device='cuda:0')
episode: 510 training return: tensor(-1779.3671, device='cuda:0')
episode: 511 training return: tensor(-1790.5071, device='cuda:0')
epoch: 128 test_true_pfm: -29.807171856319588
episode: 512 training return: tensor(-1778.0189, device='cuda:0')
episode: 513 training return: tensor(-1782.3005, device='cuda:0')
episode: 514 training return: tensor(-1805.0667, device='cuda:0')
episode: 515 training return: tensor(-1790.3322, device='cuda:0')
epoch: 129 test_true_pfm: -31.446331438363043
episode: 516 training return: tensor(-1778.8708, device='cuda:0')
episode: 517 training return: tensor(-1779.5212, device='cuda:0')
episode: 518 training return: tensor(-1787.5529, device='cuda:0')
episode: 519 training return: tensor(-1800.4906, device='cuda:0')
epoch: 130 test_true_pfm: -30.83309692562513
episode: 520 training return: tensor(-1798.0640, device='cuda:0')
episode: 521 training return: tensor(-1779.6448, device='cuda:0')
episode: 522 training return: tensor(-1763.5724, device='cuda:0')
episode: 523 training return: tensor(-1782.1035, device='cuda:0')
epoch: 131 test_true_pfm: -32.097865887888695
episode: 524 training return: tensor(-1784.1561, device='cuda:0')
episode: 525 training return: tensor(-1802.4230, device='cuda:0')
episode: 526 training return: tensor(-1791.9852, device='cuda:0')
episode: 527 training return: tensor(-1789.6957, device='cuda:0')
epoch: 132 test_true_pfm: -33.05032291006804
episode: 528 training return: tensor(-1789.0068, device='cuda:0')
episode: 529 training return: tensor(-1793.7660, device='cuda:0')
episode: 530 training return: tensor(-1789.3696, device='cuda:0')
episode: 531 training return: tensor(-1773.5273, device='cuda:0')
epoch: 133 test_true_pfm: -25.956212009893594
episode: 532 training return: tensor(-1801.8940, device='cuda:0')
episode: 533 training return: tensor(-1772.2074, device='cuda:0')
episode: 534 training return: tensor(-1789.3467, device='cuda:0')
episode: 535 training return: tensor(-1783.7865, device='cuda:0')
epoch: 134 test_true_pfm: -30.98077416466388
episode: 536 training return: tensor(-1796.1708, device='cuda:0')
episode: 537 training return: tensor(-1785.6284, device='cuda:0')
episode: 538 training return: tensor(-1775.8649, device='cuda:0')
episode: 539 training return: tensor(-1795.1837, device='cuda:0')
epoch: 135 test_true_pfm: -35.76861778835342
episode: 540 training return: tensor(-1790.4668, device='cuda:0')
episode: 541 training return: tensor(-1800.4119, device='cuda:0')
episode: 542 training return: tensor(-1780.2570, device='cuda:0')
episode: 543 training return: tensor(-1777.7465, device='cuda:0')
epoch: 136 test_true_pfm: -29.62385775186353
episode: 544 training return: tensor(-1785.1327, device='cuda:0')
episode: 545 training return: tensor(-1800.6083, device='cuda:0')
episode: 546 training return: tensor(-1804.9244, device='cuda:0')
episode: 547 training return: tensor(-1777.4689, device='cuda:0')
epoch: 137 test_true_pfm: -22.007763107353213
episode: 548 training return: tensor(-1788.6898, device='cuda:0')
episode: 549 training return: tensor(-1786.1719, device='cuda:0')
episode: 550 training return: tensor(-1790.8831, device='cuda:0')
episode: 551 training return: tensor(-1791.0188, device='cuda:0')
epoch: 138 test_true_pfm: -30.95910155003401
episode: 552 training return: tensor(-1790.4301, device='cuda:0')
episode: 553 training return: tensor(-1802.8206, device='cuda:0')
episode: 554 training return: tensor(-1788.4064, device='cuda:0')
episode: 555 training return: tensor(-1768.9045, device='cuda:0')
epoch: 139 test_true_pfm: -34.90569484254299
episode: 556 training return: tensor(-1786.9486, device='cuda:0')
episode: 557 training return: tensor(-1783.9780, device='cuda:0')
episode: 558 training return: tensor(-1787.2715, device='cuda:0')
episode: 559 training return: tensor(-1813.6788, device='cuda:0')
epoch: 140 test_true_pfm: -23.4931935376169
episode: 560 training return: tensor(-1813.6421, device='cuda:0')
episode: 561 training return: tensor(-1771.5730, device='cuda:0')
episode: 562 training return: tensor(-1800.5211, device='cuda:0')
episode: 563 training return: tensor(-1807.9969, device='cuda:0')
epoch: 141 test_true_pfm: -35.03716680938785
episode: 564 training return: tensor(-1795.0684, device='cuda:0')
episode: 565 training return: tensor(-1796.5294, device='cuda:0')
episode: 566 training return: tensor(-1803.5170, device='cuda:0')
episode: 567 training return: tensor(-1771.8951, device='cuda:0')
epoch: 142 test_true_pfm: -31.88494853679124
episode: 568 training return: tensor(-1797.0717, device='cuda:0')
episode: 569 training return: tensor(-1779.6814, device='cuda:0')
episode: 570 training return: tensor(-1787.0792, device='cuda:0')
episode: 571 training return: tensor(-1797.4562, device='cuda:0')
epoch: 143 test_true_pfm: -35.05613998647321
episode: 572 training return: tensor(-1802.3411, device='cuda:0')
episode: 573 training return: tensor(-1820.8718, device='cuda:0')
episode: 574 training return: tensor(-1804.3319, device='cuda:0')
episode: 575 training return: tensor(-1793.6188, device='cuda:0')
epoch: 144 test_true_pfm: -33.90561994248002
episode: 576 training return: tensor(-1776.6725, device='cuda:0')
episode: 577 training return: tensor(-1771.3806, device='cuda:0')
episode: 578 training return: tensor(-1795.2014, device='cuda:0')
episode: 579 training return: tensor(-1776.3263, device='cuda:0')
epoch: 145 test_true_pfm: -39.5780871797167
episode: 580 training return: tensor(-1791.8955, device='cuda:0')
episode: 581 training return: tensor(-1766.2283, device='cuda:0')
episode: 582 training return: tensor(-1784.1724, device='cuda:0')
episode: 583 training return: tensor(-1776.0657, device='cuda:0')
epoch: 146 test_true_pfm: -33.45143769298195
episode: 584 training return: tensor(-1776.3082, device='cuda:0')
episode: 585 training return: tensor(-1779.1257, device='cuda:0')
episode: 586 training return: tensor(-1810.6016, device='cuda:0')
episode: 587 training return: tensor(-1793.9270, device='cuda:0')
epoch: 147 test_true_pfm: -39.240795903434936
episode: 588 training return: tensor(-1771.9603, device='cuda:0')
episode: 589 training return: tensor(-1823.7365, device='cuda:0')
episode: 590 training return: tensor(-1775.2241, device='cuda:0')
episode: 591 training return: tensor(-1792.7952, device='cuda:0')
epoch: 148 test_true_pfm: -32.965356615353706
episode: 592 training return: tensor(-1789.8785, device='cuda:0')
episode: 593 training return: tensor(-1782.6666, device='cuda:0')
episode: 594 training return: tensor(-1799.3362, device='cuda:0')
episode: 595 training return: tensor(-1806.0649, device='cuda:0')
epoch: 149 test_true_pfm: -33.596538940009935
episode: 596 training return: tensor(-1797.9473, device='cuda:0')
episode: 597 training return: tensor(-1772.1635, device='cuda:0')
episode: 598 training return: tensor(-1778.5298, device='cuda:0')
episode: 599 training return: tensor(-1803.9387, device='cuda:0')
epoch: 150 test_true_pfm: -40.8851665329476
