['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'brac', '--traj', 'expert', '--seed', '1', '--data', '100000']
epoch: 0 training_loss 2.4283718987554312 test_loss: -3.721263122558594
epoch: 1 training_loss -6.4890427803993225 test_loss: -8.054769897460938
epoch: 2 training_loss -9.200852227210998 test_loss: -10.19686508178711
epoch: 3 training_loss -10.486485166549683 test_loss: -10.869773864746094
epoch: 4 training_loss -11.285208959579467 test_loss: -11.580226135253906
epoch: 5 training_loss -11.849098463058471 test_loss: -12.166915130615234
epoch: 6 training_loss -12.172661838531495 test_loss: -12.634429168701171
epoch: 7 training_loss -12.587371206283569 test_loss: -12.87752685546875
epoch: 8 training_loss -12.83248366355896 test_loss: -12.752263641357422
epoch: 9 training_loss -13.078336734771728 test_loss: -13.152430725097656
epoch: 10 training_loss -13.181639032363892 test_loss: -13.340597534179688
epoch: 11 training_loss -13.41353244781494 test_loss: -13.3962158203125
epoch: 12 training_loss -13.571012649536133 test_loss: -13.623318481445313
epoch: 13 training_loss -13.713798694610595 test_loss: -13.700405883789063
epoch: 14 training_loss -13.940794878005981 test_loss: -13.784507751464844
epoch: 15 training_loss -13.969661626815796 test_loss: -13.878195190429688
epoch: 16 training_loss -14.133574199676513 test_loss: -14.166815185546875
epoch: 17 training_loss -14.110688772201538 test_loss: -14.20338134765625
epoch: 18 training_loss -14.311584396362305 test_loss: -14.317926025390625
epoch: 19 training_loss -14.441187896728515 test_loss: -14.532017517089844
epoch: 20 training_loss -14.496125297546387 test_loss: -14.51995849609375
epoch: 21 training_loss -14.486710920333863 test_loss: -14.5535400390625
epoch: 22 training_loss -14.548639945983886 test_loss: -14.539082336425782
epoch: 23 training_loss -14.716908893585206 test_loss: -14.796556091308593
epoch: 24 training_loss -14.707215003967285 test_loss: -14.547518920898437
epoch: 25 training_loss -14.710643186569214 test_loss: -15.00030059814453
epoch: 26 training_loss -14.828931837081909 test_loss: -14.900503540039063
epoch: 27 training_loss -14.874287881851195 test_loss: -15.18330078125
epoch: 28 training_loss -14.896221904754638 test_loss: -14.975677490234375
epoch: 29 training_loss -14.985016813278198 test_loss: -15.046965026855469
epoch: 30 training_loss -15.064015340805053 test_loss: -15.073922729492187
epoch: 31 training_loss -15.050347232818604 test_loss: -15.074349975585937
epoch: 32 training_loss -15.113596878051759 test_loss: -15.106588745117188
epoch: 33 training_loss -15.138040618896484 test_loss: -15.224815368652344
epoch: 34 training_loss -15.204834146499634 test_loss: -15.222677612304688
epoch: 35 training_loss -15.191383962631226 test_loss: -15.307441711425781
epoch: 36 training_loss -15.348551578521729 test_loss: -15.239373779296875
epoch: 37 training_loss -15.337599563598634 test_loss: -15.424169921875
epoch: 38 training_loss -15.391205739974975 test_loss: -15.238018798828126
epoch: 39 training_loss -15.390428638458252 test_loss: -15.342674255371094
epoch: 40 training_loss -15.459515390396119 test_loss: -15.423847961425782
epoch: 41 training_loss -15.414708662033082 test_loss: -15.50780029296875
epoch: 42 training_loss -15.469948606491089 test_loss: -15.39364013671875
epoch: 43 training_loss -15.506468992233277 test_loss: -15.614515686035157
epoch: 44 training_loss -15.59899055480957 test_loss: -15.55516357421875
epoch: 45 training_loss -15.58968644142151 test_loss: -15.37518310546875
epoch: 46 training_loss -15.578036375045777 test_loss: -15.50902862548828
epoch: 47 training_loss -15.568722143173218 test_loss: -15.726057434082032
epoch: 48 training_loss -15.68464973449707 test_loss: -15.438973999023437
epoch: 49 training_loss -15.66295196533203 test_loss: -15.695584106445313
epoch: 50 training_loss -15.628805418014526 test_loss: -15.793730163574219
epoch: 51 training_loss -15.69364567756653 test_loss: -15.85020751953125
epoch: 52 training_loss -15.790853757858276 test_loss: -15.81304931640625
epoch: 53 training_loss -15.770736408233642 test_loss: -15.925294494628906
epoch: 54 training_loss -15.845120487213135 test_loss: -15.816526794433594
epoch: 55 training_loss -15.858075847625733 test_loss: -15.904820251464844
epoch: 56 training_loss -15.812733001708985 test_loss: -15.936784362792968
epoch: 57 training_loss -15.818758735656738 test_loss: -15.879847717285156
epoch: 58 training_loss -15.880459156036377 test_loss: -15.842623901367187
epoch: 59 training_loss -15.854183826446533 test_loss: -15.801652526855468
epoch: 60 training_loss -15.9364537525177 test_loss: -15.983564758300782
epoch: 61 training_loss -15.922482833862304 test_loss: -15.869853210449218
epoch: 62 training_loss -15.918325815200806 test_loss: -15.824736022949219
epoch: 63 training_loss -15.986840620040894 test_loss: -16.03376159667969
epoch: 64 training_loss -16.01309555053711 test_loss: -16.063626098632813
epoch: 65 training_loss -15.956141729354858 test_loss: -16.025953674316405
epoch: 66 training_loss -16.027097091674804 test_loss: -16.03804931640625
epoch: 67 training_loss -15.991282739639281 test_loss: -16.126895141601562
epoch: 68 training_loss -15.990014247894287 test_loss: -15.924905395507812
epoch: 69 training_loss -16.105652084350584 test_loss: -16.178289794921874
epoch: 70 training_loss -16.05292922973633 test_loss: -16.0471923828125
epoch: 71 training_loss -16.050247707366943 test_loss: -16.020591735839844
epoch: 72 training_loss -16.107362098693848 test_loss: -16.38679656982422
epoch: 73 training_loss -16.092902965545655 test_loss: -16.06097412109375
epoch: 74 training_loss -16.113467750549315 test_loss: -16.193482971191408
epoch: 75 training_loss -16.201102600097656 test_loss: -16.126153564453126
epoch: 76 training_loss -16.20004981994629 test_loss: -16.195252990722658
epoch: 77 training_loss -16.1777702331543 test_loss: -16.11903076171875
epoch: 78 training_loss -16.170306243896484 test_loss: -16.26592102050781
epoch: 79 training_loss -16.262171325683592 test_loss: -16.303211975097657
epoch: 80 training_loss -16.207899255752565 test_loss: -16.326385498046875
epoch: 81 training_loss -16.27200668334961 test_loss: -16.292024230957033
epoch: 82 training_loss -16.24827512741089 test_loss: -16.311769104003908
epoch: 83 training_loss -16.258564014434814 test_loss: -16.10035858154297
epoch: 84 training_loss -16.273066291809084 test_loss: -16.30402374267578
epoch: 85 training_loss -16.31495858192444 test_loss: -16.39137420654297
epoch: 86 training_loss -16.246350326538085 test_loss: -16.057211303710936
epoch: 87 training_loss -16.2552756690979 test_loss: -16.128407287597657
epoch: 88 training_loss -16.3769172000885 test_loss: -16.28905487060547
epoch: 89 training_loss -16.3557004737854 test_loss: -16.179112243652344
epoch: 90 training_loss -16.32756471633911 test_loss: -16.33350524902344
epoch: 91 training_loss -16.390095252990722 test_loss: -16.582870483398438
epoch: 92 training_loss -16.42364052772522 test_loss: -16.427975463867188
epoch: 93 training_loss -16.343908224105835 test_loss: -16.50897979736328
epoch: 94 training_loss -16.375105228424072 test_loss: -16.444259643554688
epoch: 95 training_loss -16.41967565536499 test_loss: -16.30591278076172
epoch: 96 training_loss -16.402065448760986 test_loss: -16.55187072753906
epoch: 97 training_loss -16.435757446289063 test_loss: -16.426171875
epoch: 98 training_loss -16.47351559638977 test_loss: -16.392816162109376
epoch: 99 training_loss -16.41465766906738 test_loss: -16.355519104003907
epoch: 100 training_loss -16.368887910842897 test_loss: -16.404660034179688
epoch: 101 training_loss -16.474606924057007 test_loss: -16.578671264648438
epoch: 102 training_loss -16.501728382110596 test_loss: -16.574925231933594
epoch: 103 training_loss -16.497823181152345 test_loss: -16.505487060546876
epoch: 104 training_loss -16.421949672698975 test_loss: -16.523023986816405
epoch: 105 training_loss -16.49199824333191 test_loss: -16.584744262695313
epoch: 106 training_loss -16.500197296142577 test_loss: -16.509918212890625
epoch: 107 training_loss -16.504597578048706 test_loss: -16.48302307128906
epoch: 108 training_loss -16.54284200668335 test_loss: -16.465884399414062
epoch: 109 training_loss -16.53298728942871 test_loss: -16.618646240234376
epoch: 110 training_loss -16.559855852127075 test_loss: -16.233181762695313
epoch: 111 training_loss -16.52645287513733 test_loss: -16.492454528808594
epoch: 112 training_loss -16.570459356307982 test_loss: -16.436463928222658
epoch: 113 training_loss -16.641885814666747 test_loss: -16.580485534667968
epoch: 114 training_loss -16.582860279083253 test_loss: -16.54431610107422
epoch: 115 training_loss -16.50229729652405 test_loss: -16.441522216796876
epoch: 116 training_loss -16.53405866622925 test_loss: -16.63123779296875
epoch: 117 training_loss -16.60013861656189 test_loss: -16.693739318847655
epoch: 118 training_loss -16.580584297180177 test_loss: -16.713333129882812
epoch: 119 training_loss -16.59381760597229 test_loss: -16.599702453613283
epoch: 120 training_loss -16.642079257965086 test_loss: -16.583717346191406
epoch: 121 training_loss -16.643430643081665 test_loss: -16.663829040527343
epoch: 122 training_loss -16.582388830184936 test_loss: -16.627923583984376
epoch: 123 training_loss -16.69580270767212 test_loss: -16.76838836669922
epoch: 124 training_loss -16.640778484344484 test_loss: -16.559393310546874
epoch: 125 training_loss -16.61818176269531 test_loss: -16.739604187011718
epoch: 126 training_loss -16.627962036132814 test_loss: -16.635523986816406
epoch: 127 training_loss -16.697326192855837 test_loss: -16.64901123046875
epoch: 128 training_loss -16.65753643989563 test_loss: -16.835844421386717
epoch: 129 training_loss -16.675061721801757 test_loss: -16.820001220703126
epoch: 130 training_loss -16.599168519973755 test_loss: -16.77303466796875
epoch: 131 training_loss -16.622256145477294 test_loss: -16.68839874267578
epoch: 132 training_loss -16.697601051330565 test_loss: -16.579318237304687
epoch: 133 training_loss -16.76138236999512 test_loss: -16.715576171875
epoch: 134 training_loss -16.69901743888855 test_loss: -16.711631774902344
epoch: 135 training_loss -16.718531875610353 test_loss: -16.741746520996095
epoch: 136 training_loss -16.70948757171631 test_loss: -16.669253540039062
epoch: 137 training_loss -16.673570318222048 test_loss: -16.780593872070312
epoch: 138 training_loss -16.724006805419922 test_loss: -16.883543395996092
epoch: 139 training_loss -16.74197271347046 test_loss: -16.744261169433592
epoch: 140 training_loss -16.74292742729187 test_loss: -16.846939086914062
epoch: 141 training_loss -16.78747724533081 test_loss: -16.79473419189453
epoch: 142 training_loss -16.81125142097473 test_loss: -16.945643615722656
epoch: 143 training_loss -16.724845514297485 test_loss: -16.715904235839844
epoch: 144 training_loss -16.7162446308136 test_loss: -16.608282470703124
epoch: 145 training_loss -16.77227689743042 test_loss: -16.915310668945313
epoch: 146 training_loss -16.77677095413208 test_loss: -16.546688842773438
epoch: 147 training_loss -16.76644941329956 test_loss: -16.621304321289063
epoch: 148 training_loss -16.779492893218993 test_loss: -16.839369201660155
epoch: 149 training_loss -16.805776348114012 test_loss: -16.834808349609375
4030.1370776282383
episode: 0 training return: tensor(-3100.7261, device='cuda:0')
episode: 1 training return: tensor(-5.7613e+12, device='cuda:0')
episode: 2 training return: tensor(-2.4929e+11, device='cuda:0')
episode: 3 training return: tensor(-3845.0046, device='cuda:0')
epoch: 1 test_true_pfm: -26.188088521427762
episode: 4 training return: tensor(-9.7523e+09, device='cuda:0')
episode: 5 training return: tensor(-6.3177e+11, device='cuda:0')
episode: 6 training return: tensor(-5.4993e+11, device='cuda:0')
episode: 7 training return: tensor(-9861.7607, device='cuda:0')
epoch: 2 test_true_pfm: 203.34020264119383
episode: 8 training return: tensor(-4.5474e+11, device='cuda:0')
episode: 9 training return: tensor(-6.7651e+16, device='cuda:0')
episode: 10 training return: tensor(-8149.9585, device='cuda:0')
episode: 11 training return: tensor(-10273.4023, device='cuda:0')
epoch: 3 test_true_pfm: 124.68982893178195
episode: 12 training return: tensor(-9748.2715, device='cuda:0')
episode: 13 training return: tensor(-11040.9863, device='cuda:0')
episode: 14 training return: tensor(-8416.9727, device='cuda:0')
episode: 15 training return: tensor(-13136.7129, device='cuda:0')
epoch: 4 test_true_pfm: 217.3113201710195
episode: 16 training return: tensor(-18682.0664, device='cuda:0')
episode: 17 training return: tensor(-12442.2803, device='cuda:0')
episode: 18 training return: tensor(-13635.4521, device='cuda:0')
episode: 19 training return: tensor(-8591.4053, device='cuda:0')
epoch: 5 test_true_pfm: 28.53066692067836
episode: 20 training return: tensor(-13001.6523, device='cuda:0')
episode: 21 training return: tensor(-10656.5615, device='cuda:0')
episode: 22 training return: tensor(-9057.4072, device='cuda:0')
episode: 23 training return: tensor(-11734.3018, device='cuda:0')
epoch: 6 test_true_pfm: -329.69349193064767
episode: 24 training return: tensor(-10395.5186, device='cuda:0')
episode: 25 training return: tensor(-9951.2559, device='cuda:0')
episode: 26 training return: tensor(-9023.9180, device='cuda:0')
episode: 27 training return: tensor(-8003.9751, device='cuda:0')
epoch: 7 test_true_pfm: 66.82408413006091
episode: 28 training return: tensor(-8518.3330, device='cuda:0')
episode: 29 training return: tensor(-11975.5156, device='cuda:0')
episode: 30 training return: tensor(-10539.9258, device='cuda:0')
episode: 31 training return: tensor(-8187.8286, device='cuda:0')
epoch: 8 test_true_pfm: -77.96904826015796
episode: 32 training return: tensor(-66065632., device='cuda:0')
episode: 33 training return: tensor(-11277.2998, device='cuda:0')
episode: 34 training return: tensor(-9880.1309, device='cuda:0')
episode: 35 training return: tensor(-12522.9609, device='cuda:0')
epoch: 9 test_true_pfm: -28.37645811775081
episode: 36 training return: tensor(-11845.1777, device='cuda:0')
episode: 37 training return: tensor(-11976.9521, device='cuda:0')
episode: 38 training return: tensor(-10940.6670, device='cuda:0')
episode: 39 training return: tensor(-14431.2783, device='cuda:0')
epoch: 10 test_true_pfm: 51.209781700434725
episode: 40 training return: tensor(-9595.7334, device='cuda:0')
episode: 41 training return: tensor(-15336.8008, device='cuda:0')
episode: 42 training return: tensor(-10004.3457, device='cuda:0')
episode: 43 training return: tensor(-11195.2979, device='cuda:0')
epoch: 11 test_true_pfm: -5.478925477366592
episode: 44 training return: tensor(-20516.7812, device='cuda:0')
episode: 45 training return: tensor(-12207.4346, device='cuda:0')
episode: 46 training return: tensor(-10719.9443, device='cuda:0')
episode: 47 training return: tensor(-25591.9551, device='cuda:0')
epoch: 12 test_true_pfm: -19.473840505025287
episode: 48 training return: tensor(-8548.7305, device='cuda:0')
episode: 49 training return: tensor(-10064.1143, device='cuda:0')
episode: 50 training return: tensor(-13657.9873, device='cuda:0')
episode: 51 training return: tensor(-8607.0635, device='cuda:0')
epoch: 13 test_true_pfm: 72.43128166787103
episode: 52 training return: tensor(-9739.0850, device='cuda:0')
episode: 53 training return: tensor(-14457.4287, device='cuda:0')
episode: 54 training return: tensor(-13151.7979, device='cuda:0')
episode: 55 training return: tensor(-7557.0879, device='cuda:0')
epoch: 14 test_true_pfm: -38.0448405471844
episode: 56 training return: tensor(-7782.2969, device='cuda:0')
episode: 57 training return: tensor(-14741.3066, device='cuda:0')
episode: 58 training return: tensor(-10829.3975, device='cuda:0')
episode: 59 training return: tensor(-136394.4062, device='cuda:0')
epoch: 15 test_true_pfm: 133.80559719049862
episode: 60 training return: tensor(-32292.6836, device='cuda:0')
episode: 61 training return: tensor(-63561.4258, device='cuda:0')
episode: 62 training return: tensor(-45349.3398, device='cuda:0')
episode: 63 training return: tensor(-1.6210e+08, device='cuda:0')
epoch: 16 test_true_pfm: 311.4488588948771
episode: 64 training return: tensor(-544164.4375, device='cuda:0')
episode: 65 training return: tensor(-53076.1211, device='cuda:0')
episode: 66 training return: tensor(-3.0361e+09, device='cuda:0')
episode: 67 training return: tensor(-9404.1602, device='cuda:0')
epoch: 17 test_true_pfm: 285.14776248573247
episode: 68 training return: tensor(-107888.8203, device='cuda:0')
episode: 69 training return: tensor(-78979.0312, device='cuda:0')
episode: 70 training return: tensor(-10396.3750, device='cuda:0')
episode: 71 training return: tensor(-10938.8760, device='cuda:0')
epoch: 18 test_true_pfm: 159.02437585881486
episode: 72 training return: tensor(-66147.6484, device='cuda:0')
episode: 73 training return: tensor(-118250.5078, device='cuda:0')
episode: 74 training return: tensor(-524718.2500, device='cuda:0')
episode: 75 training return: tensor(-230336.5000, device='cuda:0')
epoch: 19 test_true_pfm: 108.54324223334163
episode: 76 training return: tensor(-82058.0703, device='cuda:0')
episode: 77 training return: tensor(-30206.6016, device='cuda:0')
episode: 78 training return: tensor(-12779.2588, device='cuda:0')
episode: 79 training return: tensor(-73678.2578, device='cuda:0')
epoch: 20 test_true_pfm: 34.79604389582315
episode: 80 training return: tensor(-10899.0488, device='cuda:0')
episode: 81 training return: tensor(-300023.3750, device='cuda:0')
episode: 82 training return: tensor(-19630.9648, device='cuda:0')
episode: 83 training return: tensor(-437388.6562, device='cuda:0')
epoch: 21 test_true_pfm: 11.301762777960812
episode: 84 training return: tensor(-534056.7500, device='cuda:0')
episode: 85 training return: tensor(-148153.4219, device='cuda:0')
episode: 86 training return: tensor(-272885.7812, device='cuda:0')
episode: 87 training return: tensor(-2386155., device='cuda:0')
epoch: 22 test_true_pfm: -63.680326908444215
episode: 88 training return: tensor(-7.0609e+09, device='cuda:0')
episode: 89 training return: tensor(-335054.2812, device='cuda:0')
episode: 90 training return: tensor(-623854.5625, device='cuda:0')
episode: 91 training return: tensor(-6.9454e+09, device='cuda:0')
epoch: 23 test_true_pfm: -16.63254640079402
episode: 92 training return: tensor(-7826.1919, device='cuda:0')
episode: 93 training return: tensor(-480360.3125, device='cuda:0')
episode: 94 training return: tensor(-65891.7031, device='cuda:0')
episode: 95 training return: tensor(-625840.5000, device='cuda:0')
epoch: 24 test_true_pfm: -110.44987308785574
episode: 96 training return: tensor(-4.7858e+09, device='cuda:0')
episode: 97 training return: tensor(-968209.3750, device='cuda:0')
episode: 98 training return: tensor(-656654.8125, device='cuda:0')
episode: 99 training return: tensor(-286520.8750, device='cuda:0')
epoch: 25 test_true_pfm: 78.69883989726016
episode: 100 training return: tensor(-28835.6660, device='cuda:0')
episode: 101 training return: tensor(-446789., device='cuda:0')
episode: 102 training return: tensor(-11721.2646, device='cuda:0')
episode: 103 training return: tensor(-235096.4375, device='cuda:0')
epoch: 26 test_true_pfm: 118.51831787557052
episode: 104 training return: tensor(-64253.3555, device='cuda:0')
episode: 105 training return: tensor(-114990.0391, device='cuda:0')
episode: 106 training return: tensor(-31328.1582, device='cuda:0')
episode: 107 training return: tensor(-5.6630e+09, device='cuda:0')
epoch: 27 test_true_pfm: 42.61126438379006
episode: 108 training return: tensor(-252077.3594, device='cuda:0')
episode: 109 training return: tensor(-7.0253e+09, device='cuda:0')
episode: 110 training return: tensor(-1.0925e+10, device='cuda:0')
episode: 111 training return: tensor(-45690.8164, device='cuda:0')
epoch: 28 test_true_pfm: -49.80213827456574
episode: 112 training return: tensor(-9.8834e+09, device='cuda:0')
episode: 113 training return: tensor(-1413222.5000, device='cuda:0')
episode: 114 training return: tensor(-90124.7578, device='cuda:0')
episode: 115 training return: tensor(-7761.3188, device='cuda:0')
epoch: 29 test_true_pfm: -17.537733477602217
episode: 116 training return: tensor(-7618.7061, device='cuda:0')
episode: 117 training return: tensor(-6.5563e+09, device='cuda:0')
episode: 118 training return: tensor(-6737.0962, device='cuda:0')
episode: 119 training return: tensor(-6190.2930, device='cuda:0')
epoch: 30 test_true_pfm: 49.190220347941896
episode: 120 training return: tensor(-69052.5312, device='cuda:0')
episode: 121 training return: tensor(-21437.9219, device='cuda:0')
episode: 122 training return: tensor(-6912.4375, device='cuda:0')
episode: 123 training return: tensor(-6444.7373, device='cuda:0')
epoch: 31 test_true_pfm: 234.81220213239104
episode: 124 training return: tensor(-6500.7905, device='cuda:0')
episode: 125 training return: tensor(-8367.8770, device='cuda:0')
episode: 126 training return: tensor(-112664.7500, device='cuda:0')
episode: 127 training return: tensor(-6688.1226, device='cuda:0')
epoch: 32 test_true_pfm: -142.73666434601182
episode: 128 training return: tensor(-8.2012e+09, device='cuda:0')
episode: 129 training return: tensor(-382684.5625, device='cuda:0')
episode: 130 training return: tensor(-6910.7393, device='cuda:0')
episode: 131 training return: tensor(-6909.8638, device='cuda:0')
epoch: 33 test_true_pfm: -72.56443926536976
episode: 132 training return: tensor(-7508.1484, device='cuda:0')
episode: 133 training return: tensor(-6914.4316, device='cuda:0')
episode: 134 training return: tensor(-6570.0723, device='cuda:0')
episode: 135 training return: tensor(-6594.2900, device='cuda:0')
epoch: 34 test_true_pfm: 176.2330334731552
episode: 136 training return: tensor(-7414.2100, device='cuda:0')
episode: 137 training return: tensor(-7325.6929, device='cuda:0')
episode: 138 training return: tensor(-38867.3125, device='cuda:0')
episode: 139 training return: tensor(-8370.8965, device='cuda:0')
epoch: 35 test_true_pfm: 223.1985665367757
episode: 140 training return: tensor(-7938.1909, device='cuda:0')
episode: 141 training return: tensor(-6996.6377, device='cuda:0')
episode: 142 training return: tensor(-11016.7646, device='cuda:0')
episode: 143 training return: tensor(-7406.7119, device='cuda:0')
epoch: 36 test_true_pfm: 15.656962648726397
episode: 144 training return: tensor(-6742.4497, device='cuda:0')
episode: 145 training return: tensor(-7647.2393, device='cuda:0')
episode: 146 training return: tensor(-7887.7554, device='cuda:0')
episode: 147 training return: tensor(-7095.9697, device='cuda:0')
epoch: 37 test_true_pfm: 216.4499502660327
episode: 148 training return: tensor(-9459.7656, device='cuda:0')
episode: 149 training return: tensor(-6846.5425, device='cuda:0')
episode: 150 training return: tensor(-7785.9365, device='cuda:0')
episode: 151 training return: tensor(-12529.1611, device='cuda:0')
epoch: 38 test_true_pfm: 93.79066157331606
episode: 152 training return: tensor(-13337.3818, device='cuda:0')
episode: 153 training return: tensor(-7602.3730, device='cuda:0')
episode: 154 training return: tensor(-6767.2896, device='cuda:0')
episode: 155 training return: tensor(-7056.8652, device='cuda:0')
epoch: 39 test_true_pfm: -3.9798767483435937
episode: 156 training return: tensor(-6124.4287, device='cuda:0')
episode: 157 training return: tensor(-6775.3804, device='cuda:0')
episode: 158 training return: tensor(-9168.4531, device='cuda:0')
episode: 159 training return: tensor(-7689.1948, device='cuda:0')
epoch: 40 test_true_pfm: 426.12955582821286
episode: 160 training return: tensor(-7483.4766, device='cuda:0')
episode: 161 training return: tensor(-8646.6523, device='cuda:0')
episode: 162 training return: tensor(-7726.8662, device='cuda:0')
episode: 163 training return: tensor(-64338.0469, device='cuda:0')
epoch: 41 test_true_pfm: 137.98002694573938
episode: 164 training return: tensor(-7970.4668, device='cuda:0')
episode: 165 training return: tensor(-7352.2241, device='cuda:0')
episode: 166 training return: tensor(-6813.6006, device='cuda:0')
episode: 167 training return: tensor(-6848.6519, device='cuda:0')
epoch: 42 test_true_pfm: 161.76960549449336
episode: 168 training return: tensor(-7447.7729, device='cuda:0')
episode: 169 training return: tensor(-7657.6245, device='cuda:0')
episode: 170 training return: tensor(-8797.9785, device='cuda:0')
episode: 171 training return: tensor(-7707.8892, device='cuda:0')
epoch: 43 test_true_pfm: 359.8653641719193
episode: 172 training return: tensor(-7923.8804, device='cuda:0')
episode: 173 training return: tensor(-8711.4150, device='cuda:0')
episode: 174 training return: tensor(-9018.8662, device='cuda:0')
episode: 175 training return: tensor(-7609.3320, device='cuda:0')
epoch: 44 test_true_pfm: 86.15967667783566
episode: 176 training return: tensor(-15681.4824, device='cuda:0')
episode: 177 training return: tensor(-8454.5010, device='cuda:0')
episode: 178 training return: tensor(-8809.6455, device='cuda:0')
episode: 179 training return: tensor(-8455.8164, device='cuda:0')
epoch: 45 test_true_pfm: 398.0938681822515
episode: 180 training return: tensor(-8137.4790, device='cuda:0')
episode: 181 training return: tensor(-8080.9307, device='cuda:0')
episode: 182 training return: tensor(-8419.3428, device='cuda:0')
episode: 183 training return: tensor(-12844.9150, device='cuda:0')
epoch: 46 test_true_pfm: 327.92664026923677
episode: 184 training return: tensor(-8358.8135, device='cuda:0')
episode: 185 training return: tensor(-7549.3242, device='cuda:0')
episode: 186 training return: tensor(-8088.9141, device='cuda:0')
episode: 187 training return: tensor(-7898.7129, device='cuda:0')
epoch: 47 test_true_pfm: 507.44461985085127
episode: 188 training return: tensor(-10040.5156, device='cuda:0')
episode: 189 training return: tensor(-7302.2246, device='cuda:0')
episode: 190 training return: tensor(-7947.3062, device='cuda:0')
episode: 191 training return: tensor(-6647.6099, device='cuda:0')
epoch: 48 test_true_pfm: 204.44354139833354
episode: 192 training return: tensor(-7362.5713, device='cuda:0')
episode: 193 training return: tensor(-9552.4922, device='cuda:0')
episode: 194 training return: tensor(-7614.3018, device='cuda:0')
episode: 195 training return: tensor(-6.8954e+10, device='cuda:0')
epoch: 49 test_true_pfm: -147.82954562789436
episode: 196 training return: tensor(-8.8020e+10, device='cuda:0')
episode: 197 training return: tensor(-11562.4473, device='cuda:0')
episode: 198 training return: tensor(-1.3574e+11, device='cuda:0')
episode: 199 training return: tensor(-10281.7686, device='cuda:0')
epoch: 50 test_true_pfm: 513.8221292242554
episode: 200 training return: tensor(-8667.9785, device='cuda:0')
episode: 201 training return: tensor(-9431.2393, device='cuda:0')
episode: 202 training return: tensor(-7283.5811, device='cuda:0')
episode: 203 training return: tensor(-6733.6270, device='cuda:0')
epoch: 51 test_true_pfm: 610.2775646233026
episode: 204 training return: tensor(-13166.6025, device='cuda:0')
episode: 205 training return: tensor(-7408.1650, device='cuda:0')
episode: 206 training return: tensor(-1.3709e+10, device='cuda:0')
episode: 207 training return: tensor(-35449420., device='cuda:0')
epoch: 52 test_true_pfm: 528.8325761954464
episode: 208 training return: tensor(-8076.5391, device='cuda:0')
episode: 209 training return: tensor(-9895.8896, device='cuda:0')
episode: 210 training return: tensor(-9616.6592, device='cuda:0')
episode: 211 training return: tensor(-8770.2158, device='cuda:0')
epoch: 53 test_true_pfm: 180.10099423147685
episode: 212 training return: tensor(-9316.9854, device='cuda:0')
episode: 213 training return: tensor(-10796.0684, device='cuda:0')
episode: 214 training return: tensor(-10080.3799, device='cuda:0')
episode: 215 training return: tensor(-10789.2676, device='cuda:0')
epoch: 54 test_true_pfm: 221.9757375252051
episode: 216 training return: tensor(-10990.8359, device='cuda:0')
episode: 217 training return: tensor(-11023.9424, device='cuda:0')
episode: 218 training return: tensor(-10170.2471, device='cuda:0')
episode: 219 training return: tensor(-9456.5801, device='cuda:0')
epoch: 55 test_true_pfm: 402.4847760557227
episode: 220 training return: tensor(-9258.2139, device='cuda:0')
episode: 221 training return: tensor(-9396.5068, device='cuda:0')
episode: 222 training return: tensor(-10414.9463, device='cuda:0')
episode: 223 training return: tensor(-6656.0991, device='cuda:0')
epoch: 56 test_true_pfm: -108.25811323624764
episode: 224 training return: tensor(-8610.4404, device='cuda:0')
episode: 225 training return: tensor(-7845.6899, device='cuda:0')
episode: 226 training return: tensor(-7983.3242, device='cuda:0')
episode: 227 training return: tensor(-9741.8730, device='cuda:0')
epoch: 57 test_true_pfm: 113.42800280963559
episode: 228 training return: tensor(-1.1299e+10, device='cuda:0')
episode: 229 training return: tensor(-7710.2432, device='cuda:0')
episode: 230 training return: tensor(-8499.8008, device='cuda:0')
episode: 231 training return: tensor(-7430.2695, device='cuda:0')
epoch: 58 test_true_pfm: 89.94887994372839
episode: 232 training return: tensor(-7840.6016, device='cuda:0')
episode: 233 training return: tensor(-9060.7393, device='cuda:0')
episode: 234 training return: tensor(-8582.4863, device='cuda:0')
episode: 235 training return: tensor(-8671.8828, device='cuda:0')
epoch: 59 test_true_pfm: 350.6554216787246
episode: 236 training return: tensor(-7684.9121, device='cuda:0')
episode: 237 training return: tensor(-8797.0146, device='cuda:0')
episode: 238 training return: tensor(-8385.2002, device='cuda:0')
episode: 239 training return: tensor(-8280.9023, device='cuda:0')
epoch: 60 test_true_pfm: 249.14748183314782
episode: 240 training return: tensor(-8185.2031, device='cuda:0')
episode: 241 training return: tensor(-7689.0430, device='cuda:0')
episode: 242 training return: tensor(-8446.4805, device='cuda:0')
episode: 243 training return: tensor(-8527.0342, device='cuda:0')
epoch: 61 test_true_pfm: 289.23398922974485
episode: 244 training return: tensor(-8558.1094, device='cuda:0')
episode: 245 training return: tensor(-8785.5742, device='cuda:0')
episode: 246 training return: tensor(-8318.0664, device='cuda:0')
episode: 247 training return: tensor(-7747.5405, device='cuda:0')
epoch: 62 test_true_pfm: 563.7829771051756
episode: 248 training return: tensor(-8644.6240, device='cuda:0')
episode: 249 training return: tensor(-9347.0479, device='cuda:0')
episode: 250 training return: tensor(-8797.9297, device='cuda:0')
episode: 251 training return: tensor(-7831.8906, device='cuda:0')
epoch: 63 test_true_pfm: 533.6492470583535
episode: 252 training return: tensor(-9326.3486, device='cuda:0')
episode: 253 training return: tensor(-9458.0254, device='cuda:0')
episode: 254 training return: tensor(-9765.2871, device='cuda:0')
episode: 255 training return: tensor(-8769.1299, device='cuda:0')
epoch: 64 test_true_pfm: 482.0650871198765
episode: 256 training return: tensor(-9960.7393, device='cuda:0')
episode: 257 training return: tensor(-9333.6738, device='cuda:0')
episode: 258 training return: tensor(-8207.0244, device='cuda:0')
episode: 259 training return: tensor(-7986.8521, device='cuda:0')
epoch: 65 test_true_pfm: 544.7486065527139
episode: 260 training return: tensor(-8316.2832, device='cuda:0')
episode: 261 training return: tensor(-8138.6328, device='cuda:0')
episode: 262 training return: tensor(-7560.8716, device='cuda:0')
episode: 263 training return: tensor(-8694.7939, device='cuda:0')
epoch: 66 test_true_pfm: 385.9216238409461
episode: 264 training return: tensor(-8135.2773, device='cuda:0')
episode: 265 training return: tensor(-9462.5146, device='cuda:0')
episode: 266 training return: tensor(-7952.7300, device='cuda:0')
episode: 267 training return: tensor(-7736.4663, device='cuda:0')
epoch: 67 test_true_pfm: 210.77593709081262
episode: 268 training return: tensor(-7303.2817, device='cuda:0')
episode: 269 training return: tensor(-7384.5410, device='cuda:0')
episode: 270 training return: tensor(-7817.1001, device='cuda:0')
episode: 271 training return: tensor(-7930.3599, device='cuda:0')
epoch: 68 test_true_pfm: 545.828821716433
episode: 272 training return: tensor(-7330.3857, device='cuda:0')
episode: 273 training return: tensor(-9422.7480, device='cuda:0')
episode: 274 training return: tensor(-8210.9609, device='cuda:0')
episode: 275 training return: tensor(-7928.5332, device='cuda:0')
epoch: 69 test_true_pfm: 477.9662160771598
episode: 276 training return: tensor(-8481.8477, device='cuda:0')
episode: 277 training return: tensor(-21681.0449, device='cuda:0')
episode: 278 training return: tensor(-7799.8013, device='cuda:0')
episode: 279 training return: tensor(-9679.3262, device='cuda:0')
epoch: 70 test_true_pfm: 680.4255452884728
episode: 280 training return: tensor(-7208.8574, device='cuda:0')
episode: 281 training return: tensor(-8378.3359, device='cuda:0')
episode: 282 training return: tensor(-8777.5312, device='cuda:0')
episode: 283 training return: tensor(-10548.7852, device='cuda:0')
epoch: 71 test_true_pfm: 445.8111894484118
episode: 284 training return: tensor(-4.8610e+11, device='cuda:0')
episode: 285 training return: tensor(-8234.9072, device='cuda:0')
episode: 286 training return: tensor(-7141.8301, device='cuda:0')
episode: 287 training return: tensor(-7329.5610, device='cuda:0')
epoch: 72 test_true_pfm: 441.41993050692264
episode: 288 training return: tensor(-8636.5771, device='cuda:0')
episode: 289 training return: tensor(-9602.3486, device='cuda:0')
episode: 290 training return: tensor(-7971.0317, device='cuda:0')
episode: 291 training return: tensor(-8064.7505, device='cuda:0')
epoch: 73 test_true_pfm: 531.6767596508497
episode: 292 training return: tensor(-8202.9795, device='cuda:0')
episode: 293 training return: tensor(-9216.9756, device='cuda:0')
episode: 294 training return: tensor(-8062.0337, device='cuda:0')
episode: 295 training return: tensor(-5.9182e+10, device='cuda:0')
epoch: 74 test_true_pfm: 399.8606026477302
episode: 296 training return: tensor(-7248.4575, device='cuda:0')
episode: 297 training return: tensor(-8134.4146, device='cuda:0')
episode: 298 training return: tensor(-8893.2637, device='cuda:0')
episode: 299 training return: tensor(-7538.5015, device='cuda:0')
epoch: 75 test_true_pfm: 396.7283827880956
episode: 300 training return: tensor(-10377.4922, device='cuda:0')
episode: 301 training return: tensor(-11581.9678, device='cuda:0')
episode: 302 training return: tensor(-10186.2070, device='cuda:0')
episode: 303 training return: tensor(-8950.7207, device='cuda:0')
epoch: 76 test_true_pfm: 133.9349822812277
episode: 304 training return: tensor(-9803.1592, device='cuda:0')
episode: 305 training return: tensor(-8633.9746, device='cuda:0')
episode: 306 training return: tensor(-9086.7979, device='cuda:0')
episode: 307 training return: tensor(-8557.8213, device='cuda:0')
epoch: 77 test_true_pfm: 350.4166547715663
episode: 308 training return: tensor(-8987.5674, device='cuda:0')
episode: 309 training return: tensor(-8359.5137, device='cuda:0')
episode: 310 training return: tensor(-8974.1650, device='cuda:0')
episode: 311 training return: tensor(-10792.0254, device='cuda:0')
epoch: 78 test_true_pfm: 532.9822067164893
episode: 312 training return: tensor(-55129.6953, device='cuda:0')
episode: 313 training return: tensor(-4.8581e+10, device='cuda:0')
episode: 314 training return: tensor(-476146., device='cuda:0')
episode: 315 training return: tensor(-289555.7500, device='cuda:0')
epoch: 79 test_true_pfm: 309.44893110398897
episode: 316 training return: tensor(-789354.7500, device='cuda:0')
episode: 317 training return: tensor(-7.4524e+09, device='cuda:0')
episode: 318 training return: tensor(-10026843., device='cuda:0')
episode: 319 training return: tensor(-28988.6992, device='cuda:0')
epoch: 80 test_true_pfm: -134.10197847236074
episode: 320 training return: tensor(-100849.8750, device='cuda:0')
episode: 321 training return: tensor(-15301.3955, device='cuda:0')
episode: 322 training return: tensor(-14187.7920, device='cuda:0')
episode: 323 training return: tensor(-43897.2148, device='cuda:0')
epoch: 81 test_true_pfm: -52.071712212699616
episode: 324 training return: tensor(-7.1620e+09, device='cuda:0')
episode: 325 training return: tensor(-1.4403e+10, device='cuda:0')
episode: 326 training return: tensor(-486446.3750, device='cuda:0')
episode: 327 training return: tensor(-2.5906e+13, device='cuda:0')
epoch: 82 test_true_pfm: 141.57388122674595
episode: 328 training return: tensor(-271591.3438, device='cuda:0')
episode: 329 training return: tensor(-16027122., device='cuda:0')
episode: 330 training return: tensor(-6.0281e+10, device='cuda:0')
episode: 331 training return: tensor(-1.0551e+11, device='cuda:0')
epoch: 83 test_true_pfm: -256.67863087836
episode: 332 training return: tensor(-1.2470e+11, device='cuda:0')
episode: 333 training return: tensor(-2.4268e+10, device='cuda:0')
episode: 334 training return: tensor(-5.6696e+10, device='cuda:0')
episode: 335 training return: tensor(-9.5269e+09, device='cuda:0')
epoch: 84 test_true_pfm: 118.52092668965258
episode: 336 training return: tensor(-8.0429e+12, device='cuda:0')
episode: 337 training return: tensor(-1.4230e+12, device='cuda:0')
episode: 338 training return: tensor(-2.2564e+11, device='cuda:0')
episode: 339 training return: tensor(-1.6807e+10, device='cuda:0')
epoch: 85 test_true_pfm: -194.17771202847777
episode: 340 training return: tensor(-9.4667e+15, device='cuda:0')
episode: 341 training return: tensor(-10652254., device='cuda:0')
episode: 342 training return: tensor(-7959191., device='cuda:0')
episode: 343 training return: tensor(-8277.1201, device='cuda:0')
epoch: 86 test_true_pfm: 115.38858558970772
episode: 344 training return: tensor(-9770173., device='cuda:0')
episode: 345 training return: tensor(-4.8957e+10, device='cuda:0')
episode: 346 training return: tensor(-141923.3594, device='cuda:0')
episode: 347 training return: tensor(-82462.0391, device='cuda:0')
epoch: 87 test_true_pfm: 260.992244497066
episode: 348 training return: tensor(-6.5972e+09, device='cuda:0')
episode: 349 training return: tensor(-5.1943e+10, device='cuda:0')
episode: 350 training return: tensor(-51844.2500, device='cuda:0')
episode: 351 training return: tensor(-1.2266e+10, device='cuda:0')
epoch: 88 test_true_pfm: 138.12474335017907
episode: 352 training return: tensor(-8.0577e+16, device='cuda:0')
episode: 353 training return: tensor(-1.0084e+10, device='cuda:0')
episode: 354 training return: tensor(-3.6769e+15, device='cuda:0')
episode: 355 training return: tensor(-11602.1084, device='cuda:0')
epoch: 89 test_true_pfm: 370.18835738682054
episode: 356 training return: tensor(-18662.6992, device='cuda:0')
episode: 357 training return: tensor(-9585356., device='cuda:0')
episode: 358 training return: tensor(-2.4742e+10, device='cuda:0')
episode: 359 training return: tensor(-2.5548e+17, device='cuda:0')
epoch: 90 test_true_pfm: 102.63400708652567
episode: 360 training return: tensor(-4.0500e+10, device='cuda:0')
episode: 361 training return: tensor(-204735.0469, device='cuda:0')
episode: 362 training return: tensor(-210291.1094, device='cuda:0')
episode: 363 training return: tensor(-51899.5859, device='cuda:0')
epoch: 91 test_true_pfm: 30.881410869638447
episode: 364 training return: tensor(-9.7842e+12, device='cuda:0')
episode: 365 training return: tensor(-14490.1143, device='cuda:0')
episode: 366 training return: tensor(-10952.5557, device='cuda:0')
episode: 367 training return: tensor(-11130.2959, device='cuda:0')
epoch: 92 test_true_pfm: 144.19081995749787
episode: 368 training return: tensor(-12717948., device='cuda:0')
episode: 369 training return: tensor(-7637.3979, device='cuda:0')
episode: 370 training return: tensor(-14017.3164, device='cuda:0')
episode: 371 training return: tensor(-16033.1885, device='cuda:0')
epoch: 93 test_true_pfm: 229.10077116377104
episode: 372 training return: tensor(-25438554., device='cuda:0')
episode: 373 training return: tensor(-31832.8750, device='cuda:0')
episode: 374 training return: tensor(-21619.2500, device='cuda:0')
episode: 375 training return: tensor(-13875.5635, device='cuda:0')
epoch: 94 test_true_pfm: 160.84927512867588
episode: 376 training return: tensor(-876065.6875, device='cuda:0')
episode: 377 training return: tensor(-9696.2627, device='cuda:0')
episode: 378 training return: tensor(-16233.1641, device='cuda:0')
episode: 379 training return: tensor(-11409.0967, device='cuda:0')
epoch: 95 test_true_pfm: 263.67660423227545
episode: 380 training return: tensor(-31507.2754, device='cuda:0')
episode: 381 training return: tensor(-8.3708e+09, device='cuda:0')
episode: 382 training return: tensor(-25259.7402, device='cuda:0')
episode: 383 training return: tensor(-65210.4258, device='cuda:0')
epoch: 96 test_true_pfm: 395.66941298399644
episode: 384 training return: tensor(-13863.6826, device='cuda:0')
episode: 385 training return: tensor(-10901.3672, device='cuda:0')
episode: 386 training return: tensor(-4.2503e+16, device='cuda:0')
episode: 387 training return: tensor(-4.0484e+09, device='cuda:0')
epoch: 97 test_true_pfm: -282.95726116907116
episode: 388 training return: tensor(-62272256., device='cuda:0')
episode: 389 training return: tensor(-77450240., device='cuda:0')
episode: 390 training return: tensor(-30678688., device='cuda:0')
episode: 391 training return: tensor(-1.1272e+11, device='cuda:0')
epoch: 98 test_true_pfm: -41.66396357327965
episode: 392 training return: tensor(-1.3532e+09, device='cuda:0')
episode: 393 training return: tensor(-62185396., device='cuda:0')
episode: 394 training return: tensor(-62337576., device='cuda:0')
episode: 395 training return: tensor(-90406528., device='cuda:0')
epoch: 99 test_true_pfm: -89.21568854779305
episode: 396 training return: tensor(-62890348., device='cuda:0')
episode: 397 training return: tensor(-39344760., device='cuda:0')
episode: 398 training return: tensor(-6.0272e+10, device='cuda:0')
episode: 399 training return: tensor(-94877320., device='cuda:0')
epoch: 100 test_true_pfm: -97.76826792459724
episode: 400 training return: tensor(-84978504., device='cuda:0')
episode: 401 training return: tensor(-53921944., device='cuda:0')
episode: 402 training return: tensor(-62181168., device='cuda:0')
episode: 403 training return: tensor(-1.1784e+11, device='cuda:0')
epoch: 101 test_true_pfm: -131.05877222020465
episode: 404 training return: tensor(-5.8396e+10, device='cuda:0')
episode: 405 training return: tensor(-1.1625e+08, device='cuda:0')
episode: 406 training return: tensor(-6.1511e+08, device='cuda:0')
episode: 407 training return: tensor(-12284.7842, device='cuda:0')
epoch: 102 test_true_pfm: -3.4745809156197534
episode: 408 training return: tensor(-77928400., device='cuda:0')
episode: 409 training return: tensor(-66148936., device='cuda:0')
episode: 410 training return: tensor(-62295696., device='cuda:0')
episode: 411 training return: tensor(-1.0074e+08, device='cuda:0')
epoch: 103 test_true_pfm: -65.33442263093612
episode: 412 training return: tensor(-1.2135e+08, device='cuda:0')
episode: 413 training return: tensor(-3.2329e+11, device='cuda:0')
episode: 414 training return: tensor(-61594168., device='cuda:0')
episode: 415 training return: tensor(-1.5558e+08, device='cuda:0')
epoch: 104 test_true_pfm: -84.77920870017873
episode: 416 training return: tensor(-61137784., device='cuda:0')
episode: 417 training return: tensor(-82096256., device='cuda:0')
episode: 418 training return: tensor(-34335.7656, device='cuda:0')
episode: 419 training return: tensor(-59576244., device='cuda:0')
epoch: 105 test_true_pfm: 122.26678897107941
episode: 420 training return: tensor(-12511.3271, device='cuda:0')
episode: 421 training return: tensor(-58329620., device='cuda:0')
episode: 422 training return: tensor(-55751140., device='cuda:0')
episode: 423 training return: tensor(-61442544., device='cuda:0')
epoch: 106 test_true_pfm: 67.7907514427799
episode: 424 training return: tensor(-87298832., device='cuda:0')
episode: 425 training return: tensor(-62947996., device='cuda:0')
episode: 426 training return: tensor(-76752096., device='cuda:0')
episode: 427 training return: tensor(-80579424., device='cuda:0')
epoch: 107 test_true_pfm: 24.086958903263092
episode: 428 training return: tensor(-15102.0684, device='cuda:0')
episode: 429 training return: tensor(-14624.0547, device='cuda:0')
episode: 430 training return: tensor(-68966344., device='cuda:0')
episode: 431 training return: tensor(-75688096., device='cuda:0')
epoch: 108 test_true_pfm: 124.67149538866802
episode: 432 training return: tensor(-69942696., device='cuda:0')
episode: 433 training return: tensor(-78611960., device='cuda:0')
episode: 434 training return: tensor(-1.0627e+08, device='cuda:0')
episode: 435 training return: tensor(-3.7026e+08, device='cuda:0')
epoch: 109 test_true_pfm: -98.49088177830077
episode: 436 training return: tensor(-37871.1094, device='cuda:0')
episode: 437 training return: tensor(-14015.4941, device='cuda:0')
episode: 438 training return: tensor(-11960.8066, device='cuda:0')
episode: 439 training return: tensor(-22025.0938, device='cuda:0')
epoch: 110 test_true_pfm: 195.46813748395584
episode: 440 training return: tensor(-9734.0957, device='cuda:0')
episode: 441 training return: tensor(-8834.8232, device='cuda:0')
episode: 442 training return: tensor(-9269.4482, device='cuda:0')
episode: 443 training return: tensor(-10237.8633, device='cuda:0')
epoch: 111 test_true_pfm: 583.4845897861152
episode: 444 training return: tensor(-7851.7383, device='cuda:0')
episode: 445 training return: tensor(-7197.1084, device='cuda:0')
episode: 446 training return: tensor(-8166.6304, device='cuda:0')
episode: 447 training return: tensor(-10673.7646, device='cuda:0')
epoch: 112 test_true_pfm: 213.64886696908684
episode: 448 training return: tensor(-9565.8789, device='cuda:0')
episode: 449 training return: tensor(-19035.5508, device='cuda:0')
episode: 450 training return: tensor(-7345.8374, device='cuda:0')
episode: 451 training return: tensor(-8328.5049, device='cuda:0')
epoch: 113 test_true_pfm: 62.08446587962627
episode: 452 training return: tensor(-8798.9707, device='cuda:0')
episode: 453 training return: tensor(-10957.9307, device='cuda:0')
episode: 454 training return: tensor(-13173.5420, device='cuda:0')
episode: 455 training return: tensor(-8857.9297, device='cuda:0')
epoch: 114 test_true_pfm: 148.5734083422025
episode: 456 training return: tensor(-9175.9834, device='cuda:0')
episode: 457 training return: tensor(-10068.7803, device='cuda:0')
episode: 458 training return: tensor(-10270.4541, device='cuda:0')
episode: 459 training return: tensor(-12164.5801, device='cuda:0')
epoch: 115 test_true_pfm: 75.2764688533736
episode: 460 training return: tensor(-10137.3643, device='cuda:0')
episode: 461 training return: tensor(-12480.6621, device='cuda:0')
episode: 462 training return: tensor(-12795.0840, device='cuda:0')
episode: 463 training return: tensor(-8393.2744, device='cuda:0')
epoch: 116 test_true_pfm: -34.87513631796153
episode: 464 training return: tensor(-9457.0049, device='cuda:0')
episode: 465 training return: tensor(-7810.4873, device='cuda:0')
episode: 466 training return: tensor(-55082768., device='cuda:0')
episode: 467 training return: tensor(-8705.6865, device='cuda:0')
epoch: 117 test_true_pfm: -8.39270771860235
episode: 468 training return: tensor(-8294.8574, device='cuda:0')
episode: 469 training return: tensor(-9545.8652, device='cuda:0')
episode: 470 training return: tensor(-8291.4980, device='cuda:0')
episode: 471 training return: tensor(-10712.6602, device='cuda:0')
epoch: 118 test_true_pfm: 14.224063761366947
episode: 472 training return: tensor(-15114.3135, device='cuda:0')
episode: 473 training return: tensor(-9054.7549, device='cuda:0')
episode: 474 training return: tensor(-8508.5459, device='cuda:0')
episode: 475 training return: tensor(-8508.2725, device='cuda:0')
epoch: 119 test_true_pfm: 198.54268055730265
episode: 476 training return: tensor(-56494960., device='cuda:0')
episode: 477 training return: tensor(-8390.5752, device='cuda:0')
episode: 478 training return: tensor(-8419.1191, device='cuda:0')
episode: 479 training return: tensor(-8883.2285, device='cuda:0')
epoch: 120 test_true_pfm: 216.3501395472973
episode: 480 training return: tensor(-15541.2617, device='cuda:0')
episode: 481 training return: tensor(-8986.5957, device='cuda:0')
episode: 482 training return: tensor(-21025.2480, device='cuda:0')
episode: 483 training return: tensor(-11549.0176, device='cuda:0')
epoch: 121 test_true_pfm: 26.214297749788503
episode: 484 training return: tensor(-9024.2725, device='cuda:0')
episode: 485 training return: tensor(-9413.5264, device='cuda:0')
episode: 486 training return: tensor(-15868.0840, device='cuda:0')
episode: 487 training return: tensor(-9406.7422, device='cuda:0')
epoch: 122 test_true_pfm: 223.35389035246362
episode: 488 training return: tensor(-8358.6592, device='cuda:0')
episode: 489 training return: tensor(-15573.9443, device='cuda:0')
episode: 490 training return: tensor(-11588.1172, device='cuda:0')
episode: 491 training return: tensor(-9971.5371, device='cuda:0')
epoch: 123 test_true_pfm: 218.42450375377078
episode: 492 training return: tensor(-8522.0244, device='cuda:0')
episode: 493 training return: tensor(-15767.8184, device='cuda:0')
episode: 494 training return: tensor(-8014.6758, device='cuda:0')
episode: 495 training return: tensor(-54082232., device='cuda:0')
epoch: 124 test_true_pfm: 227.09219763914606
episode: 496 training return: tensor(-9041.4736, device='cuda:0')
episode: 497 training return: tensor(-11993.8340, device='cuda:0')
episode: 498 training return: tensor(-10521.9395, device='cuda:0')
episode: 499 training return: tensor(-8314.7021, device='cuda:0')
epoch: 125 test_true_pfm: 189.34599887768422
episode: 500 training return: tensor(-9858.2432, device='cuda:0')
episode: 501 training return: tensor(-23824.8125, device='cuda:0')
episode: 502 training return: tensor(-10487.3867, device='cuda:0')
episode: 503 training return: tensor(-10692.7471, device='cuda:0')
epoch: 126 test_true_pfm: 361.27572970573897
episode: 504 training return: tensor(-35641468., device='cuda:0')
episode: 505 training return: tensor(-8525.6777, device='cuda:0')
episode: 506 training return: tensor(-10027.2021, device='cuda:0')
episode: 507 training return: tensor(-55738708., device='cuda:0')
epoch: 127 test_true_pfm: 159.05420763041613
episode: 508 training return: tensor(-10601.4795, device='cuda:0')
episode: 509 training return: tensor(-11675.2051, device='cuda:0')
episode: 510 training return: tensor(-12741.1191, device='cuda:0')
episode: 511 training return: tensor(-13070.5918, device='cuda:0')
epoch: 128 test_true_pfm: 133.55306218366408
episode: 512 training return: tensor(-3.3413e+08, device='cuda:0')
episode: 513 training return: tensor(-3.3952e+08, device='cuda:0')
episode: 514 training return: tensor(-3.3508e+08, device='cuda:0')
episode: 515 training return: tensor(-3.3393e+08, device='cuda:0')
epoch: 129 test_true_pfm: 108.12257226424828
episode: 516 training return: tensor(-3.2612e+08, device='cuda:0')
episode: 517 training return: tensor(-3.3404e+08, device='cuda:0')
episode: 518 training return: tensor(-3.3220e+08, device='cuda:0')
episode: 519 training return: tensor(-3.3846e+08, device='cuda:0')
epoch: 130 test_true_pfm: 41.63563469666018
episode: 520 training return: tensor(-3.3044e+08, device='cuda:0')
episode: 521 training return: tensor(-3.3076e+08, device='cuda:0')
episode: 522 training return: tensor(-3.1656e+08, device='cuda:0')
episode: 523 training return: tensor(-3.2998e+08, device='cuda:0')
epoch: 131 test_true_pfm: 134.07720717777423
episode: 524 training return: tensor(-3.3932e+08, device='cuda:0')
episode: 525 training return: tensor(-3.3395e+08, device='cuda:0')
episode: 526 training return: tensor(-3.3588e+08, device='cuda:0')
episode: 527 training return: tensor(-3.2555e+08, device='cuda:0')
epoch: 132 test_true_pfm: 138.00216531182366
episode: 528 training return: tensor(-3.2775e+08, device='cuda:0')
episode: 529 training return: tensor(-3.3046e+08, device='cuda:0')
episode: 530 training return: tensor(-3.5137e+08, device='cuda:0')
episode: 531 training return: tensor(-2.8284e+08, device='cuda:0')
epoch: 133 test_true_pfm: 13.063125255997846
episode: 532 training return: tensor(-2.8862e+08, device='cuda:0')
episode: 533 training return: tensor(-3.3252e+08, device='cuda:0')
episode: 534 training return: tensor(-3.3007e+08, device='cuda:0')
episode: 535 training return: tensor(-3.3062e+08, device='cuda:0')
epoch: 134 test_true_pfm: 40.103347636488614
episode: 536 training return: tensor(-3.0818e+08, device='cuda:0')
episode: 537 training return: tensor(-3.2552e+08, device='cuda:0')
episode: 538 training return: tensor(-59411528., device='cuda:0')
episode: 539 training return: tensor(-59765464., device='cuda:0')
epoch: 135 test_true_pfm: 224.09849440625968
episode: 540 training return: tensor(-74139976., device='cuda:0')
episode: 541 training return: tensor(-15993.7725, device='cuda:0')
episode: 542 training return: tensor(-21275.3438, device='cuda:0')
episode: 543 training return: tensor(-18751.3555, device='cuda:0')
epoch: 136 test_true_pfm: 228.59960506683527
episode: 544 training return: tensor(-19013.6406, device='cuda:0')
episode: 545 training return: tensor(-13890.1445, device='cuda:0')
episode: 546 training return: tensor(-68487720., device='cuda:0')
episode: 547 training return: tensor(-59032340., device='cuda:0')
epoch: 137 test_true_pfm: 255.11743153991847
episode: 548 training return: tensor(-91817800., device='cuda:0')
episode: 549 training return: tensor(-60227296., device='cuda:0')
episode: 550 training return: tensor(-67074264., device='cuda:0')
episode: 551 training return: tensor(-59211944., device='cuda:0')
epoch: 138 test_true_pfm: 209.06015224064382
episode: 552 training return: tensor(-65396108., device='cuda:0')
episode: 553 training return: tensor(-57459028., device='cuda:0')
episode: 554 training return: tensor(-57627916., device='cuda:0')
episode: 555 training return: tensor(-44288748., device='cuda:0')
epoch: 139 test_true_pfm: 387.65062888038256
episode: 556 training return: tensor(-23177.3965, device='cuda:0')
episode: 557 training return: tensor(-22023.2539, device='cuda:0')
episode: 558 training return: tensor(-17808.8789, device='cuda:0')
episode: 559 training return: tensor(-57379744., device='cuda:0')
epoch: 140 test_true_pfm: 330.7023060819885
episode: 560 training return: tensor(-49406452., device='cuda:0')
episode: 561 training return: tensor(-13488.3320, device='cuda:0')
episode: 562 training return: tensor(-3.4463e+08, device='cuda:0')
episode: 563 training return: tensor(-3.2512e+08, device='cuda:0')
epoch: 141 test_true_pfm: 33.984144960920304
episode: 564 training return: tensor(-3.2071e+08, device='cuda:0')
episode: 565 training return: tensor(-4.0354e+08, device='cuda:0')
episode: 566 training return: tensor(-3.2321e+08, device='cuda:0')
episode: 567 training return: tensor(-4.4267e+08, device='cuda:0')
epoch: 142 test_true_pfm: 85.26188511517013
episode: 568 training return: tensor(-3.6343e+08, device='cuda:0')
episode: 569 training return: tensor(-3.2139e+08, device='cuda:0')
episode: 570 training return: tensor(-4.1183e+08, device='cuda:0')
episode: 571 training return: tensor(-3.1893e+08, device='cuda:0')
epoch: 143 test_true_pfm: 126.99131266505053
episode: 572 training return: tensor(-3.1933e+08, device='cuda:0')
episode: 573 training return: tensor(-3.2221e+08, device='cuda:0')
episode: 574 training return: tensor(-3.1972e+08, device='cuda:0')
episode: 575 training return: tensor(-3.1934e+08, device='cuda:0')
epoch: 144 test_true_pfm: 114.89851795255167
episode: 576 training return: tensor(-3.2144e+08, device='cuda:0')
episode: 577 training return: tensor(-3.2110e+08, device='cuda:0')
episode: 578 training return: tensor(-3.2144e+08, device='cuda:0')
episode: 579 training return: tensor(-2.9635e+12, device='cuda:0')
epoch: 145 test_true_pfm: 111.98088687197362
episode: 580 training return: tensor(-3.2284e+08, device='cuda:0')
episode: 581 training return: tensor(-4.1547e+08, device='cuda:0')
episode: 582 training return: tensor(-1.8252e+12, device='cuda:0')
episode: 583 training return: tensor(-4.4342e+12, device='cuda:0')
epoch: 146 test_true_pfm: -90.00645456314585
episode: 584 training return: tensor(-4.2449e+09, device='cuda:0')
episode: 585 training return: tensor(-4.8515e+09, device='cuda:0')
episode: 586 training return: tensor(-3.1742e+08, device='cuda:0')
episode: 587 training return: tensor(-3.1962e+08, device='cuda:0')
epoch: 147 test_true_pfm: 132.08988122350675
episode: 588 training return: tensor(-3.2037e+08, device='cuda:0')
episode: 589 training return: tensor(-4.0228e+08, device='cuda:0')
episode: 590 training return: tensor(-3.9396e+12, device='cuda:0')
episode: 591 training return: tensor(-29055.0293, device='cuda:0')
epoch: 148 test_true_pfm: -165.47453019511232
episode: 592 training return: tensor(-4.1330e+12, device='cuda:0')
episode: 593 training return: tensor(-4.1986e+12, device='cuda:0')
episode: 594 training return: tensor(-4.4735e+12, device='cuda:0')
episode: 595 training return: tensor(-3.2423e+08, device='cuda:0')
epoch: 149 test_true_pfm: -11.67985356412431
episode: 596 training return: tensor(-3.1941e+08, device='cuda:0')
episode: 597 training return: tensor(-3.2140e+12, device='cuda:0')
episode: 598 training return: tensor(-4.0130e+12, device='cuda:0')
episode: 599 training return: tensor(-4.1623e+12, device='cuda:0')
epoch: 150 test_true_pfm: -64.17715559969093
