['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'brac', '--traj', 'expert', '--seed', '2', '--data', '100000']
epoch: 0 training_loss 4.875091119222343 test_loss: 0.023266702890396118
epoch: 1 training_loss -1.3864922877028585 test_loss: -2.570289421081543
epoch: 2 training_loss -3.1269068214297295 test_loss: -4.204844284057617
epoch: 3 training_loss -4.298532226085663 test_loss: -4.810428237915039
epoch: 4 training_loss -5.283260753154755 test_loss: -5.556171417236328
epoch: 5 training_loss -5.851561450958252 test_loss: -5.943597412109375
epoch: 6 training_loss -6.565870547294617 test_loss: -6.882228088378906
epoch: 7 training_loss -7.002270512580871 test_loss: -7.247364807128906
epoch: 8 training_loss -7.456831703186035 test_loss: -7.582377624511719
epoch: 9 training_loss -7.796047825813293 test_loss: -7.935313415527344
epoch: 10 training_loss -8.248915452957153 test_loss: -8.245830535888672
epoch: 11 training_loss -8.519048619270325 test_loss: -8.704048156738281
epoch: 12 training_loss -8.837629404067993 test_loss: -9.034696197509765
epoch: 13 training_loss -9.08700409412384 test_loss: -9.333226013183594
epoch: 14 training_loss -9.343372478485108 test_loss: -9.569602966308594
epoch: 15 training_loss -9.628675785064697 test_loss: -9.595664978027344
epoch: 16 training_loss -9.871976137161255 test_loss: -9.920150756835938
epoch: 17 training_loss -10.03461443901062 test_loss: -10.155089569091796
epoch: 18 training_loss -10.256770372390747 test_loss: -10.273571014404297
epoch: 19 training_loss -9.230745577812195 test_loss: -10.186219024658204
epoch: 20 training_loss -10.453540754318237 test_loss: -10.417713928222657
epoch: 21 training_loss -10.654472551345826 test_loss: -10.73488540649414
epoch: 22 training_loss -10.758944311141967 test_loss: -10.744962310791015
epoch: 23 training_loss -10.893794384002685 test_loss: -10.961241912841796
epoch: 24 training_loss -11.004316625595093 test_loss: -10.848041534423828
epoch: 25 training_loss -11.101856575012206 test_loss: -11.335234069824219
epoch: 26 training_loss -11.309614067077638 test_loss: -11.356147003173827
epoch: 27 training_loss -11.35503915786743 test_loss: -11.37206802368164
epoch: 28 training_loss -11.45314947128296 test_loss: -11.597611236572266
epoch: 29 training_loss -11.527616653442383 test_loss: -11.564493560791016
epoch: 30 training_loss -11.767510299682618 test_loss: -11.638815307617188
epoch: 31 training_loss -11.743560132980347 test_loss: -11.765672302246093
epoch: 32 training_loss -11.81998517036438 test_loss: -11.809044647216798
epoch: 33 training_loss -11.921253414154053 test_loss: -11.966697692871094
epoch: 34 training_loss -11.996457233428956 test_loss: -12.147679901123047
epoch: 35 training_loss -12.087549562454223 test_loss: -12.126387786865234
epoch: 36 training_loss -12.167632722854615 test_loss: -12.192742156982423
epoch: 37 training_loss -12.257720737457275 test_loss: -12.490841674804688
epoch: 38 training_loss -12.333580856323243 test_loss: -12.491902923583984
epoch: 39 training_loss -12.446480522155762 test_loss: -12.34561996459961
epoch: 40 training_loss -12.471268148422242 test_loss: -12.430935668945313
epoch: 41 training_loss -12.419767637252807 test_loss: -12.726369476318359
epoch: 42 training_loss -12.632201194763184 test_loss: -12.609414672851562
epoch: 43 training_loss -12.577909021377563 test_loss: -12.77919921875
epoch: 44 training_loss -12.783479452133179 test_loss: -12.749078369140625
epoch: 45 training_loss -12.836339645385742 test_loss: -12.882652282714844
epoch: 46 training_loss -12.822824287414551 test_loss: -12.811598205566407
epoch: 47 training_loss -12.978823642730713 test_loss: -12.918310546875
epoch: 48 training_loss -12.97902021408081 test_loss: -13.120198059082032
epoch: 49 training_loss -13.111737756729125 test_loss: -13.010592651367187
epoch: 50 training_loss -13.108302602767944 test_loss: -13.013487243652344
epoch: 51 training_loss -13.252674131393432 test_loss: -13.120672607421875
epoch: 52 training_loss -13.299415988922119 test_loss: -13.233233642578124
epoch: 53 training_loss -13.33838628768921 test_loss: -13.368264770507812
epoch: 54 training_loss -13.369602813720704 test_loss: -13.406187438964844
epoch: 55 training_loss -13.409825086593628 test_loss: -13.401898193359376
epoch: 56 training_loss -13.426377019882203 test_loss: -13.441244506835938
epoch: 57 training_loss -13.448540353775025 test_loss: -13.38546142578125
epoch: 58 training_loss -13.556032485961914 test_loss: -13.680569458007813
epoch: 59 training_loss -13.584560461044312 test_loss: -13.66273193359375
epoch: 60 training_loss -13.58722053527832 test_loss: -13.819912719726563
epoch: 61 training_loss -13.752387838363648 test_loss: -13.632931518554688
epoch: 62 training_loss -13.593007736206054 test_loss: -13.765414428710937
epoch: 63 training_loss -13.754440879821777 test_loss: -13.831951904296876
epoch: 64 training_loss -13.751725254058838 test_loss: -13.670376586914063
epoch: 65 training_loss -13.754183025360108 test_loss: -13.852076721191406
epoch: 66 training_loss -13.840292911529541 test_loss: -13.860171508789062
epoch: 67 training_loss -13.905351676940917 test_loss: -13.820570373535157
epoch: 68 training_loss -13.952508144378662 test_loss: -13.802252197265625
epoch: 69 training_loss -13.970890140533447 test_loss: -14.209024047851562
epoch: 70 training_loss -13.986733303070068 test_loss: -14.047529602050782
epoch: 71 training_loss -14.063945169448852 test_loss: -13.939657592773438
epoch: 72 training_loss -14.097917757034303 test_loss: -14.11706085205078
epoch: 73 training_loss -14.069990701675415 test_loss: -14.090980529785156
epoch: 74 training_loss -14.177551956176758 test_loss: -14.04530029296875
epoch: 75 training_loss -14.124255104064941 test_loss: -14.221385192871093
epoch: 76 training_loss -14.247126913070678 test_loss: -14.306375122070312
epoch: 77 training_loss -14.201497449874878 test_loss: -14.264096069335938
epoch: 78 training_loss -14.201047601699829 test_loss: -14.371723937988282
epoch: 79 training_loss -14.289445552825928 test_loss: -14.343780517578125
epoch: 80 training_loss -14.34329668045044 test_loss: -14.286308288574219
epoch: 81 training_loss -14.297664709091187 test_loss: -14.311677551269531
epoch: 82 training_loss -14.360601873397828 test_loss: -14.31728515625
epoch: 83 training_loss -14.403498821258545 test_loss: -14.312893676757813
epoch: 84 training_loss -14.349580421447754 test_loss: -14.530020141601563
epoch: 85 training_loss -14.419407196044922 test_loss: -14.573527526855468
epoch: 86 training_loss -14.41320297241211 test_loss: -14.523641967773438
epoch: 87 training_loss -14.506576642990112 test_loss: -14.514823913574219
epoch: 88 training_loss -14.487525806427001 test_loss: -14.5239013671875
epoch: 89 training_loss -14.508798427581787 test_loss: -14.433348083496094
epoch: 90 training_loss -14.55010489463806 test_loss: -14.510185241699219
epoch: 91 training_loss -14.621640214920044 test_loss: -14.683641052246093
epoch: 92 training_loss -14.635761489868164 test_loss: -14.617283630371094
epoch: 93 training_loss -14.726457328796387 test_loss: -14.51121826171875
epoch: 94 training_loss -14.651606464385987 test_loss: -14.528962707519531
epoch: 95 training_loss -14.59369270324707 test_loss: -14.559593200683594
epoch: 96 training_loss -14.598803653717042 test_loss: -14.678376770019531
epoch: 97 training_loss -14.6895902633667 test_loss: -14.586955261230468
epoch: 98 training_loss -14.687520351409912 test_loss: -14.951896667480469
epoch: 99 training_loss -14.74947805404663 test_loss: -14.678556823730469
epoch: 100 training_loss -14.777061576843261 test_loss: -14.851712036132813
epoch: 101 training_loss -14.794416913986206 test_loss: -14.737156677246094
epoch: 102 training_loss -14.777680368423463 test_loss: -14.71513671875
epoch: 103 training_loss -14.841096172332763 test_loss: -14.922564697265624
epoch: 104 training_loss -14.813947877883912 test_loss: -14.920014953613281
epoch: 105 training_loss -14.828309955596923 test_loss: -14.791281127929688
epoch: 106 training_loss -14.878743200302123 test_loss: -14.933876037597656
epoch: 107 training_loss -14.865347242355346 test_loss: -14.720634460449219
epoch: 108 training_loss -14.907410306930542 test_loss: -14.872444152832031
epoch: 109 training_loss -14.930673522949219 test_loss: -14.904019165039063
epoch: 110 training_loss -14.89297806739807 test_loss: -14.985501098632813
epoch: 111 training_loss -14.939865942001342 test_loss: -14.944285583496093
epoch: 112 training_loss -15.041151628494262 test_loss: -15.060775756835938
epoch: 113 training_loss -14.993981428146363 test_loss: -14.892587280273437
epoch: 114 training_loss -14.945104026794434 test_loss: -14.903645324707032
epoch: 115 training_loss -14.964265832901 test_loss: -15.065133666992187
epoch: 116 training_loss -15.091291971206665 test_loss: -14.875244140625
epoch: 117 training_loss -15.10255849838257 test_loss: -15.013262939453124
epoch: 118 training_loss -14.970549306869508 test_loss: -14.994320678710938
epoch: 119 training_loss -15.071714220046998 test_loss: -14.955471801757813
epoch: 120 training_loss -15.081810684204102 test_loss: -15.21081085205078
epoch: 121 training_loss -15.138875398635864 test_loss: -15.007884216308593
epoch: 122 training_loss -15.059532022476196 test_loss: -14.933851623535157
epoch: 123 training_loss -15.092326641082764 test_loss: -15.344026184082031
epoch: 124 training_loss -15.18373911857605 test_loss: -15.126364135742188
epoch: 125 training_loss -15.150936107635498 test_loss: -15.2469970703125
epoch: 126 training_loss -15.127943210601806 test_loss: -15.269451904296876
epoch: 127 training_loss -15.19409324645996 test_loss: -15.172926330566407
epoch: 128 training_loss -15.22428201675415 test_loss: -14.962887573242188
epoch: 129 training_loss -15.184395551681519 test_loss: -15.243260192871094
epoch: 130 training_loss -15.211675338745117 test_loss: -14.909393310546875
epoch: 131 training_loss -15.280069942474364 test_loss: -15.018991088867187
epoch: 132 training_loss -15.207356061935425 test_loss: -15.385186767578125
epoch: 133 training_loss -15.282709693908691 test_loss: -15.2392333984375
epoch: 134 training_loss -15.283805894851685 test_loss: -15.265408325195313
epoch: 135 training_loss -15.225661735534668 test_loss: -15.235395812988282
epoch: 136 training_loss -15.31603404045105 test_loss: -15.543002319335937
epoch: 137 training_loss -15.396304969787598 test_loss: -15.222343444824219
epoch: 138 training_loss -15.327913064956665 test_loss: -15.280805969238282
epoch: 139 training_loss -15.310937509536743 test_loss: -15.294146728515624
epoch: 140 training_loss -15.359616870880126 test_loss: -15.121705627441406
epoch: 141 training_loss -15.302399225234986 test_loss: -15.206092834472656
epoch: 142 training_loss -15.390154047012329 test_loss: -15.308216857910157
epoch: 143 training_loss -15.345773191452027 test_loss: -15.110687255859375
epoch: 144 training_loss -15.413976287841797 test_loss: -15.332577514648438
epoch: 145 training_loss -15.363355379104615 test_loss: -15.432258605957031
epoch: 146 training_loss -15.421469287872315 test_loss: -15.411224365234375
epoch: 147 training_loss -15.372142391204834 test_loss: -15.401216125488281
epoch: 148 training_loss -15.412505016326904 test_loss: -15.508621215820312
epoch: 149 training_loss -15.481724433898925 test_loss: -15.511474609375
-1602.5734754584842
episode: 0 training return: tensor(-5986.5020, device='cuda:0')
episode: 1 training return: tensor(-4888.9375, device='cuda:0')
episode: 2 training return: tensor(-5896.8223, device='cuda:0')
episode: 3 training return: tensor(-5608.2393, device='cuda:0')
epoch: 1 test_true_pfm: -318.9402950819555
episode: 4 training return: tensor(-4970.9824, device='cuda:0')
episode: 5 training return: tensor(-3428.4331, device='cuda:0')
episode: 6 training return: tensor(-5792.9751, device='cuda:0')
episode: 7 training return: tensor(-6075.4751, device='cuda:0')
epoch: 2 test_true_pfm: -259.767173647549
episode: 8 training return: tensor(-5762.8867, device='cuda:0')
episode: 9 training return: tensor(-4960.0664, device='cuda:0')
episode: 10 training return: tensor(-1900.7627, device='cuda:0')
episode: 11 training return: tensor(-1811.8149, device='cuda:0')
epoch: 3 test_true_pfm: -71.47328372648711
episode: 12 training return: tensor(-1813.2535, device='cuda:0')
episode: 13 training return: tensor(-1834.6243, device='cuda:0')
episode: 14 training return: tensor(-2280.4590, device='cuda:0')
episode: 15 training return: tensor(-1915.1997, device='cuda:0')
epoch: 4 test_true_pfm: -22.015834969553854
episode: 16 training return: tensor(-2024.3118, device='cuda:0')
episode: 17 training return: tensor(-1977.1691, device='cuda:0')
episode: 18 training return: tensor(-2311.5532, device='cuda:0')
episode: 19 training return: tensor(-2176.2922, device='cuda:0')
epoch: 5 test_true_pfm: -13.907664782194411
episode: 20 training return: tensor(-2396.3687, device='cuda:0')
episode: 21 training return: tensor(-2325.8745, device='cuda:0')
episode: 22 training return: tensor(-1911.6943, device='cuda:0')
episode: 23 training return: tensor(-2023.3380, device='cuda:0')
epoch: 6 test_true_pfm: -13.660571395966405
episode: 24 training return: tensor(-2002.9747, device='cuda:0')
episode: 25 training return: tensor(-1990.6642, device='cuda:0')
episode: 26 training return: tensor(-1909.3019, device='cuda:0')
episode: 27 training return: tensor(-2414.2170, device='cuda:0')
epoch: 7 test_true_pfm: -23.65293690709966
episode: 28 training return: tensor(-1959.6439, device='cuda:0')
episode: 29 training return: tensor(-1924.6985, device='cuda:0')
episode: 30 training return: tensor(-1945.5283, device='cuda:0')
episode: 31 training return: tensor(-2125.0054, device='cuda:0')
epoch: 8 test_true_pfm: -8.996965528416899
episode: 32 training return: tensor(-1899.3843, device='cuda:0')
episode: 33 training return: tensor(-2036.3867, device='cuda:0')
episode: 34 training return: tensor(-1940.6967, device='cuda:0')
episode: 35 training return: tensor(-2106.9417, device='cuda:0')
epoch: 9 test_true_pfm: -13.757753188243342
episode: 36 training return: tensor(-1889.4839, device='cuda:0')
episode: 37 training return: tensor(-1837.9626, device='cuda:0')
episode: 38 training return: tensor(-1870.4906, device='cuda:0')
episode: 39 training return: tensor(-1866.8429, device='cuda:0')
epoch: 10 test_true_pfm: -31.307962898421835
episode: 40 training return: tensor(-1884.5100, device='cuda:0')
episode: 41 training return: tensor(-2328.1162, device='cuda:0')
episode: 42 training return: tensor(-1870.5359, device='cuda:0')
episode: 43 training return: tensor(-1850.1211, device='cuda:0')
epoch: 11 test_true_pfm: -36.80337035528995
episode: 44 training return: tensor(-2393.9358, device='cuda:0')
episode: 45 training return: tensor(-2072.2017, device='cuda:0')
episode: 46 training return: tensor(-1924.2876, device='cuda:0')
episode: 47 training return: tensor(-1889.0087, device='cuda:0')
epoch: 12 test_true_pfm: -15.703859534509263
episode: 48 training return: tensor(-1895.7380, device='cuda:0')
episode: 49 training return: tensor(-1879.0062, device='cuda:0')
episode: 50 training return: tensor(-1840.4757, device='cuda:0')
episode: 51 training return: tensor(-2310.4868, device='cuda:0')
epoch: 13 test_true_pfm: -38.46318495560033
episode: 52 training return: tensor(-2057.5776, device='cuda:0')
episode: 53 training return: tensor(-1967.8561, device='cuda:0')
episode: 54 training return: tensor(-2418.1716, device='cuda:0')
episode: 55 training return: tensor(-2056.5076, device='cuda:0')
epoch: 14 test_true_pfm: -22.550964421167624
episode: 56 training return: tensor(-2041.9978, device='cuda:0')
episode: 57 training return: tensor(-2309.7874, device='cuda:0')
episode: 58 training return: tensor(-1869.5928, device='cuda:0')
episode: 59 training return: tensor(-1879.4084, device='cuda:0')
epoch: 15 test_true_pfm: -18.860336204725343
episode: 60 training return: tensor(-1869.1672, device='cuda:0')
episode: 61 training return: tensor(-1852.7891, device='cuda:0')
episode: 62 training return: tensor(-1865.7717, device='cuda:0')
episode: 63 training return: tensor(-1862.8341, device='cuda:0')
epoch: 16 test_true_pfm: -29.575263379822147
episode: 64 training return: tensor(-1839.8826, device='cuda:0')
episode: 65 training return: tensor(-1916.7823, device='cuda:0')
episode: 66 training return: tensor(-1832.8733, device='cuda:0')
episode: 67 training return: tensor(-1864.7928, device='cuda:0')
epoch: 17 test_true_pfm: -25.294118199376573
episode: 68 training return: tensor(-1873.2043, device='cuda:0')
episode: 69 training return: tensor(-1856.5560, device='cuda:0')
episode: 70 training return: tensor(-1886.0752, device='cuda:0')
episode: 71 training return: tensor(-1838.7814, device='cuda:0')
epoch: 18 test_true_pfm: -18.64789180189048
episode: 72 training return: tensor(-1893.9105, device='cuda:0')
episode: 73 training return: tensor(-1874.4138, device='cuda:0')
episode: 74 training return: tensor(-1861.0066, device='cuda:0')
episode: 75 training return: tensor(-1859.4851, device='cuda:0')
epoch: 19 test_true_pfm: -17.129423565802316
episode: 76 training return: tensor(-1867.7933, device='cuda:0')
episode: 77 training return: tensor(-1869.5770, device='cuda:0')
episode: 78 training return: tensor(-1847.8800, device='cuda:0')
episode: 79 training return: tensor(-1824.4225, device='cuda:0')
epoch: 20 test_true_pfm: -16.53412428697816
episode: 80 training return: tensor(-1895.8364, device='cuda:0')
episode: 81 training return: tensor(-1889.7869, device='cuda:0')
episode: 82 training return: tensor(-1888.2855, device='cuda:0')
episode: 83 training return: tensor(-1865.9138, device='cuda:0')
epoch: 21 test_true_pfm: -24.12065327031615
episode: 84 training return: tensor(-1850.5040, device='cuda:0')
episode: 85 training return: tensor(-1862.9137, device='cuda:0')
episode: 86 training return: tensor(-1842.6395, device='cuda:0')
episode: 87 training return: tensor(-1835.6449, device='cuda:0')
epoch: 22 test_true_pfm: -9.341992213870908
episode: 88 training return: tensor(-1879.1492, device='cuda:0')
episode: 89 training return: tensor(-1861.8448, device='cuda:0')
episode: 90 training return: tensor(-1853.2113, device='cuda:0')
episode: 91 training return: tensor(-1891.3749, device='cuda:0')
epoch: 23 test_true_pfm: -10.512434470217414
episode: 92 training return: tensor(-1876.6000, device='cuda:0')
episode: 93 training return: tensor(-1874.8041, device='cuda:0')
episode: 94 training return: tensor(-1878.7485, device='cuda:0')
episode: 95 training return: tensor(-1872.4318, device='cuda:0')
epoch: 24 test_true_pfm: -11.4268250588188
episode: 96 training return: tensor(-1866.3923, device='cuda:0')
episode: 97 training return: tensor(-1895.3065, device='cuda:0')
episode: 98 training return: tensor(-1873.1385, device='cuda:0')
episode: 99 training return: tensor(-1872.8986, device='cuda:0')
epoch: 25 test_true_pfm: -15.852977682565855
episode: 100 training return: tensor(-1867.8201, device='cuda:0')
episode: 101 training return: tensor(-1882.4862, device='cuda:0')
episode: 102 training return: tensor(-1875.8054, device='cuda:0')
episode: 103 training return: tensor(-1856.9567, device='cuda:0')
epoch: 26 test_true_pfm: -12.82056673549527
episode: 104 training return: tensor(-1865.4219, device='cuda:0')
episode: 105 training return: tensor(-1852.0729, device='cuda:0')
episode: 106 training return: tensor(-1882.4602, device='cuda:0')
episode: 107 training return: tensor(-1877.4138, device='cuda:0')
epoch: 27 test_true_pfm: -12.065205852293944
episode: 108 training return: tensor(-1888.9026, device='cuda:0')
episode: 109 training return: tensor(-1904.6622, device='cuda:0')
episode: 110 training return: tensor(-1873.4142, device='cuda:0')
episode: 111 training return: tensor(-1875.3195, device='cuda:0')
epoch: 28 test_true_pfm: -11.443735566105468
episode: 112 training return: tensor(-1890.8179, device='cuda:0')
episode: 113 training return: tensor(-1860.1527, device='cuda:0')
episode: 114 training return: tensor(-1857.0492, device='cuda:0')
episode: 115 training return: tensor(-1880.5906, device='cuda:0')
epoch: 29 test_true_pfm: -11.108830763156346
episode: 116 training return: tensor(-1889.2622, device='cuda:0')
episode: 117 training return: tensor(-1858.4106, device='cuda:0')
episode: 118 training return: tensor(-1890.1975, device='cuda:0')
episode: 119 training return: tensor(-1869.8564, device='cuda:0')
epoch: 30 test_true_pfm: -10.484765499697595
episode: 120 training return: tensor(-1878.4216, device='cuda:0')
episode: 121 training return: tensor(-1874.2815, device='cuda:0')
episode: 122 training return: tensor(-1886.5577, device='cuda:0')
episode: 123 training return: tensor(-1881.7144, device='cuda:0')
epoch: 31 test_true_pfm: -6.307156180465699
episode: 124 training return: tensor(-1846.3508, device='cuda:0')
episode: 125 training return: tensor(-1875.0789, device='cuda:0')
episode: 126 training return: tensor(-1867.9086, device='cuda:0')
episode: 127 training return: tensor(-1871.1190, device='cuda:0')
epoch: 32 test_true_pfm: -8.900753147734891
episode: 128 training return: tensor(-1891.6721, device='cuda:0')
episode: 129 training return: tensor(-1890.7031, device='cuda:0')
episode: 130 training return: tensor(-1872.7932, device='cuda:0')
episode: 131 training return: tensor(-1855.9135, device='cuda:0')
epoch: 33 test_true_pfm: -7.935137913799906
episode: 132 training return: tensor(-1927.1200, device='cuda:0')
episode: 133 training return: tensor(-1856.0890, device='cuda:0')
episode: 134 training return: tensor(-1887.6145, device='cuda:0')
episode: 135 training return: tensor(-1860.8508, device='cuda:0')
epoch: 34 test_true_pfm: -8.242597649529978
episode: 136 training return: tensor(-1880.0157, device='cuda:0')
episode: 137 training return: tensor(-1895.1100, device='cuda:0')
episode: 138 training return: tensor(-1863.9309, device='cuda:0')
episode: 139 training return: tensor(-1912.9304, device='cuda:0')
epoch: 35 test_true_pfm: 0.18552872202062973
episode: 140 training return: tensor(-1859.5590, device='cuda:0')
episode: 141 training return: tensor(-1891.5564, device='cuda:0')
episode: 142 training return: tensor(-1882.0563, device='cuda:0')
episode: 143 training return: tensor(-1876.3373, device='cuda:0')
epoch: 36 test_true_pfm: -14.663258549008717
episode: 144 training return: tensor(-1867.4801, device='cuda:0')
episode: 145 training return: tensor(-1875.8008, device='cuda:0')
episode: 146 training return: tensor(-1901.0791, device='cuda:0')
episode: 147 training return: tensor(-1877.9419, device='cuda:0')
epoch: 37 test_true_pfm: -6.964378092128398
episode: 148 training return: tensor(-1896.8468, device='cuda:0')
episode: 149 training return: tensor(-1891.5327, device='cuda:0')
episode: 150 training return: tensor(-1868.1812, device='cuda:0')
episode: 151 training return: tensor(-1838.8640, device='cuda:0')
epoch: 38 test_true_pfm: -9.88625600529347
episode: 152 training return: tensor(-1887.6676, device='cuda:0')
episode: 153 training return: tensor(-1882.2914, device='cuda:0')
episode: 154 training return: tensor(-1869.7356, device='cuda:0')
episode: 155 training return: tensor(-1891.3247, device='cuda:0')
epoch: 39 test_true_pfm: -7.410857294903086
episode: 156 training return: tensor(-1911.3278, device='cuda:0')
episode: 157 training return: tensor(-1860.0339, device='cuda:0')
episode: 158 training return: tensor(-1879.9031, device='cuda:0')
episode: 159 training return: tensor(-1872.0404, device='cuda:0')
epoch: 40 test_true_pfm: -5.753686952173502
episode: 160 training return: tensor(-1859.6426, device='cuda:0')
episode: 161 training return: tensor(-1866.7943, device='cuda:0')
episode: 162 training return: tensor(-1848.7993, device='cuda:0')
episode: 163 training return: tensor(-1860.0562, device='cuda:0')
epoch: 41 test_true_pfm: -4.344285451741441
episode: 164 training return: tensor(-1844.7650, device='cuda:0')
episode: 165 training return: tensor(-1910.5453, device='cuda:0')
episode: 166 training return: tensor(-1848.1799, device='cuda:0')
episode: 167 training return: tensor(-1927.4926, device='cuda:0')
epoch: 42 test_true_pfm: -9.06125200303798
episode: 168 training return: tensor(-1850.2030, device='cuda:0')
episode: 169 training return: tensor(-1876.3154, device='cuda:0')
episode: 170 training return: tensor(-1865.2014, device='cuda:0')
episode: 171 training return: tensor(-1895.0835, device='cuda:0')
epoch: 43 test_true_pfm: -10.435364531449153
episode: 172 training return: tensor(-1865.1554, device='cuda:0')
episode: 173 training return: tensor(-1860.1381, device='cuda:0')
episode: 174 training return: tensor(-1882.5677, device='cuda:0')
episode: 175 training return: tensor(-1859.5319, device='cuda:0')
epoch: 44 test_true_pfm: -6.6817086478201375
episode: 176 training return: tensor(-1860.5244, device='cuda:0')
episode: 177 training return: tensor(-1876.8724, device='cuda:0')
episode: 178 training return: tensor(-1885.2195, device='cuda:0')
episode: 179 training return: tensor(-1864.8834, device='cuda:0')
epoch: 45 test_true_pfm: -3.6780190859494564
episode: 180 training return: tensor(-1831.8027, device='cuda:0')
episode: 181 training return: tensor(-1862.0123, device='cuda:0')
episode: 182 training return: tensor(-1876.3815, device='cuda:0')
episode: 183 training return: tensor(-1849.8176, device='cuda:0')
epoch: 46 test_true_pfm: -1.934947497315168
episode: 184 training return: tensor(-1887.5765, device='cuda:0')
episode: 185 training return: tensor(-1893.5331, device='cuda:0')
episode: 186 training return: tensor(-1876.5725, device='cuda:0')
episode: 187 training return: tensor(-1863.5497, device='cuda:0')
epoch: 47 test_true_pfm: -3.7574434020752854
episode: 188 training return: tensor(-1854.2017, device='cuda:0')
episode: 189 training return: tensor(-1859.8693, device='cuda:0')
episode: 190 training return: tensor(-1892.4698, device='cuda:0')
episode: 191 training return: tensor(-1849.0109, device='cuda:0')
epoch: 48 test_true_pfm: -5.232818605074052
episode: 192 training return: tensor(-1887.8929, device='cuda:0')
episode: 193 training return: tensor(-1847.6339, device='cuda:0')
episode: 194 training return: tensor(-1852.6569, device='cuda:0')
episode: 195 training return: tensor(-1892.4517, device='cuda:0')
epoch: 49 test_true_pfm: -2.046596796717943
episode: 196 training return: tensor(-1894.0859, device='cuda:0')
episode: 197 training return: tensor(-1882.9535, device='cuda:0')
episode: 198 training return: tensor(-1869.8846, device='cuda:0')
episode: 199 training return: tensor(-1840.2157, device='cuda:0')
epoch: 50 test_true_pfm: -1.6742130967982647
episode: 200 training return: tensor(-1854.6342, device='cuda:0')
episode: 201 training return: tensor(-1887.7932, device='cuda:0')
episode: 202 training return: tensor(-1859.7484, device='cuda:0')
episode: 203 training return: tensor(-1879.8442, device='cuda:0')
epoch: 51 test_true_pfm: -4.663606560003575
episode: 204 training return: tensor(-1865.1143, device='cuda:0')
episode: 205 training return: tensor(-1879.9991, device='cuda:0')
episode: 206 training return: tensor(-1880.2903, device='cuda:0')
episode: 207 training return: tensor(-1856.0049, device='cuda:0')
epoch: 52 test_true_pfm: -2.443972957887995
episode: 208 training return: tensor(-1837.8314, device='cuda:0')
episode: 209 training return: tensor(-1873.4611, device='cuda:0')
episode: 210 training return: tensor(-1880.6169, device='cuda:0')
episode: 211 training return: tensor(-1877.2706, device='cuda:0')
epoch: 53 test_true_pfm: 1.981441527991796
episode: 212 training return: tensor(-1865.6870, device='cuda:0')
episode: 213 training return: tensor(-1872.0616, device='cuda:0')
episode: 214 training return: tensor(-1831.1702, device='cuda:0')
episode: 215 training return: tensor(-1863.3746, device='cuda:0')
epoch: 54 test_true_pfm: 2.4021355239937323
episode: 216 training return: tensor(-1843.8978, device='cuda:0')
episode: 217 training return: tensor(-1890.9233, device='cuda:0')
episode: 218 training return: tensor(-1892.3241, device='cuda:0')
episode: 219 training return: tensor(-1893.8976, device='cuda:0')
epoch: 55 test_true_pfm: -4.030221422941379
episode: 220 training return: tensor(-1873.1171, device='cuda:0')
episode: 221 training return: tensor(-1873.2130, device='cuda:0')
episode: 222 training return: tensor(-1875.2538, device='cuda:0')
episode: 223 training return: tensor(-1842.8906, device='cuda:0')
epoch: 56 test_true_pfm: 0.47458414508141405
episode: 224 training return: tensor(-1855.4198, device='cuda:0')
episode: 225 training return: tensor(-1848.7491, device='cuda:0')
episode: 226 training return: tensor(-1880.7373, device='cuda:0')
episode: 227 training return: tensor(-1873.0330, device='cuda:0')
epoch: 57 test_true_pfm: 1.5896770275617955
episode: 228 training return: tensor(-1870.4056, device='cuda:0')
episode: 229 training return: tensor(-1916.4089, device='cuda:0')
episode: 230 training return: tensor(-1837.9684, device='cuda:0')
episode: 231 training return: tensor(-1872.9041, device='cuda:0')
epoch: 58 test_true_pfm: -7.0468975530105995
episode: 232 training return: tensor(-1867.6060, device='cuda:0')
episode: 233 training return: tensor(-1862.4323, device='cuda:0')
episode: 234 training return: tensor(-1843.4393, device='cuda:0')
episode: 235 training return: tensor(-1865.1301, device='cuda:0')
epoch: 59 test_true_pfm: -2.543896016921773
episode: 236 training return: tensor(-1839.2996, device='cuda:0')
episode: 237 training return: tensor(-1848.1626, device='cuda:0')
episode: 238 training return: tensor(-1881.8241, device='cuda:0')
episode: 239 training return: tensor(-1883.3326, device='cuda:0')
epoch: 60 test_true_pfm: 0.42256083883077394
episode: 240 training return: tensor(-1856.7584, device='cuda:0')
episode: 241 training return: tensor(-1883.2029, device='cuda:0')
episode: 242 training return: tensor(-1855.7379, device='cuda:0')
episode: 243 training return: tensor(-1852.6099, device='cuda:0')
epoch: 61 test_true_pfm: 0.015707223579260448
episode: 244 training return: tensor(-1864.4055, device='cuda:0')
episode: 245 training return: tensor(-1862.3368, device='cuda:0')
episode: 246 training return: tensor(-1867.4551, device='cuda:0')
episode: 247 training return: tensor(-1886.2271, device='cuda:0')
epoch: 62 test_true_pfm: -1.680839382595969
episode: 248 training return: tensor(-1840.3429, device='cuda:0')
episode: 249 training return: tensor(-1863.5255, device='cuda:0')
episode: 250 training return: tensor(-1849.6226, device='cuda:0')
episode: 251 training return: tensor(-1835.0526, device='cuda:0')
epoch: 63 test_true_pfm: -6.4671932980194855
episode: 252 training return: tensor(-1887.7626, device='cuda:0')
episode: 253 training return: tensor(-1845.5834, device='cuda:0')
episode: 254 training return: tensor(-1864.6317, device='cuda:0')
episode: 255 training return: tensor(-1891.6571, device='cuda:0')
epoch: 64 test_true_pfm: -6.255909779553577
episode: 256 training return: tensor(-1896.5782, device='cuda:0')
episode: 257 training return: tensor(-1881.6101, device='cuda:0')
episode: 258 training return: tensor(-1863.2166, device='cuda:0')
episode: 259 training return: tensor(-1854.5839, device='cuda:0')
epoch: 65 test_true_pfm: 1.5793461647406717
episode: 260 training return: tensor(-1859.9723, device='cuda:0')
episode: 261 training return: tensor(-1866.3853, device='cuda:0')
episode: 262 training return: tensor(-1854.6968, device='cuda:0')
episode: 263 training return: tensor(-1908.3300, device='cuda:0')
epoch: 66 test_true_pfm: 2.5835930026538434
episode: 264 training return: tensor(-1862.6089, device='cuda:0')
episode: 265 training return: tensor(-1853.7268, device='cuda:0')
episode: 266 training return: tensor(-1877.2479, device='cuda:0')
episode: 267 training return: tensor(-1847.7311, device='cuda:0')
epoch: 67 test_true_pfm: -0.7956753024998613
episode: 268 training return: tensor(-1876.6052, device='cuda:0')
episode: 269 training return: tensor(-1898.3983, device='cuda:0')
episode: 270 training return: tensor(-1861.6354, device='cuda:0')
episode: 271 training return: tensor(-1866.5591, device='cuda:0')
epoch: 68 test_true_pfm: -1.708323068414413
episode: 272 training return: tensor(-1874.2710, device='cuda:0')
episode: 273 training return: tensor(-1859.9579, device='cuda:0')
episode: 274 training return: tensor(-1947.1627, device='cuda:0')
episode: 275 training return: tensor(-1868.4883, device='cuda:0')
epoch: 69 test_true_pfm: -9.846920427110634
episode: 276 training return: tensor(-1862.0853, device='cuda:0')
episode: 277 training return: tensor(-1877.1410, device='cuda:0')
episode: 278 training return: tensor(-1863.8931, device='cuda:0')
episode: 279 training return: tensor(-1863.7942, device='cuda:0')
epoch: 70 test_true_pfm: -4.363238260023207
episode: 280 training return: tensor(-1865.3683, device='cuda:0')
episode: 281 training return: tensor(-1868.6479, device='cuda:0')
episode: 282 training return: tensor(-1875.1218, device='cuda:0')
episode: 283 training return: tensor(-1861.9042, device='cuda:0')
epoch: 71 test_true_pfm: -2.2457651700425285
episode: 284 training return: tensor(-1876.1309, device='cuda:0')
episode: 285 training return: tensor(-1860.2755, device='cuda:0')
episode: 286 training return: tensor(-1864.7743, device='cuda:0')
episode: 287 training return: tensor(-1843.7041, device='cuda:0')
epoch: 72 test_true_pfm: -3.420433386300532
episode: 288 training return: tensor(-1864.7682, device='cuda:0')
episode: 289 training return: tensor(-1846.3208, device='cuda:0')
episode: 290 training return: tensor(-1876.5497, device='cuda:0')
episode: 291 training return: tensor(-1839.9299, device='cuda:0')
epoch: 73 test_true_pfm: -1.195167420104992
episode: 292 training return: tensor(-1864.9705, device='cuda:0')
episode: 293 training return: tensor(-1841.5790, device='cuda:0')
episode: 294 training return: tensor(-1849.6354, device='cuda:0')
episode: 295 training return: tensor(-1859.0891, device='cuda:0')
epoch: 74 test_true_pfm: -3.8232628376037607
episode: 296 training return: tensor(-1832.3822, device='cuda:0')
episode: 297 training return: tensor(-1886.1925, device='cuda:0')
episode: 298 training return: tensor(-1871.4622, device='cuda:0')
episode: 299 training return: tensor(-1836.1062, device='cuda:0')
epoch: 75 test_true_pfm: -6.155317043744829
episode: 300 training return: tensor(-1861.4258, device='cuda:0')
episode: 301 training return: tensor(-1878.3711, device='cuda:0')
episode: 302 training return: tensor(-1865.4915, device='cuda:0')
episode: 303 training return: tensor(-1850.3955, device='cuda:0')
epoch: 76 test_true_pfm: -6.448265878689701
episode: 304 training return: tensor(-1846.3298, device='cuda:0')
episode: 305 training return: tensor(-1871.1455, device='cuda:0')
episode: 306 training return: tensor(-1866.8427, device='cuda:0')
episode: 307 training return: tensor(-1869.7195, device='cuda:0')
epoch: 77 test_true_pfm: 1.3592067671687174
episode: 308 training return: tensor(-1844.0067, device='cuda:0')
episode: 309 training return: tensor(-1865.5653, device='cuda:0')
episode: 310 training return: tensor(-1837.7449, device='cuda:0')
episode: 311 training return: tensor(-1835.5215, device='cuda:0')
epoch: 78 test_true_pfm: -2.4708139738050314
episode: 312 training return: tensor(-1875.9171, device='cuda:0')
episode: 313 training return: tensor(-1868.5004, device='cuda:0')
episode: 314 training return: tensor(-1851.9786, device='cuda:0')
episode: 315 training return: tensor(-1828.8761, device='cuda:0')
epoch: 79 test_true_pfm: 0.3555308377201862
episode: 316 training return: tensor(-1857.1864, device='cuda:0')
episode: 317 training return: tensor(-1857.0050, device='cuda:0')
episode: 318 training return: tensor(-1878.1361, device='cuda:0')
episode: 319 training return: tensor(-1854.5160, device='cuda:0')
epoch: 80 test_true_pfm: -6.275385672417355
episode: 320 training return: tensor(-1854.6626, device='cuda:0')
episode: 321 training return: tensor(-1874.5070, device='cuda:0')
episode: 322 training return: tensor(-1865.6439, device='cuda:0')
episode: 323 training return: tensor(-1865.1306, device='cuda:0')
epoch: 81 test_true_pfm: -2.79078026678827
episode: 324 training return: tensor(-1857.5299, device='cuda:0')
episode: 325 training return: tensor(-1824.2102, device='cuda:0')
episode: 326 training return: tensor(-1876.8844, device='cuda:0')
episode: 327 training return: tensor(-1874.1326, device='cuda:0')
epoch: 82 test_true_pfm: -9.773937073579786
episode: 328 training return: tensor(-1849.9813, device='cuda:0')
episode: 329 training return: tensor(-1865.0914, device='cuda:0')
episode: 330 training return: tensor(-1871.5801, device='cuda:0')
episode: 331 training return: tensor(-1844.4191, device='cuda:0')
epoch: 83 test_true_pfm: -9.986526689162512
episode: 332 training return: tensor(-1839.5968, device='cuda:0')
episode: 333 training return: tensor(-1883.8715, device='cuda:0')
episode: 334 training return: tensor(-1859.2766, device='cuda:0')
episode: 335 training return: tensor(-1852.1901, device='cuda:0')
epoch: 84 test_true_pfm: -8.886731812982111
episode: 336 training return: tensor(-1863.1843, device='cuda:0')
episode: 337 training return: tensor(-1893.7853, device='cuda:0')
episode: 338 training return: tensor(-1866.0706, device='cuda:0')
episode: 339 training return: tensor(-1874.7288, device='cuda:0')
epoch: 85 test_true_pfm: -3.5519166666056132
episode: 340 training return: tensor(-1844.8103, device='cuda:0')
episode: 341 training return: tensor(-1860.0199, device='cuda:0')
episode: 342 training return: tensor(-1824.0010, device='cuda:0')
episode: 343 training return: tensor(-1854.5581, device='cuda:0')
epoch: 86 test_true_pfm: -8.573306449862377
episode: 344 training return: tensor(-1849.5410, device='cuda:0')
episode: 345 training return: tensor(-1878.6729, device='cuda:0')
episode: 346 training return: tensor(-1841.9619, device='cuda:0')
episode: 347 training return: tensor(-1844.9000, device='cuda:0')
epoch: 87 test_true_pfm: -0.8314649118493231
episode: 348 training return: tensor(-1839.9689, device='cuda:0')
episode: 349 training return: tensor(-1864.9121, device='cuda:0')
episode: 350 training return: tensor(-1853.9709, device='cuda:0')
episode: 351 training return: tensor(-1872.3605, device='cuda:0')
epoch: 88 test_true_pfm: -4.141765131417869
episode: 352 training return: tensor(-1846.6985, device='cuda:0')
episode: 353 training return: tensor(-1853.7299, device='cuda:0')
episode: 354 training return: tensor(-1846.3328, device='cuda:0')
episode: 355 training return: tensor(-1847.9047, device='cuda:0')
epoch: 89 test_true_pfm: -3.016183279539991
episode: 356 training return: tensor(-1853.0441, device='cuda:0')
episode: 357 training return: tensor(-1850.0239, device='cuda:0')
episode: 358 training return: tensor(-1882.1073, device='cuda:0')
episode: 359 training return: tensor(-1842.1040, device='cuda:0')
epoch: 90 test_true_pfm: -6.435131200311539
episode: 360 training return: tensor(-1886.2501, device='cuda:0')
episode: 361 training return: tensor(-1851.1112, device='cuda:0')
episode: 362 training return: tensor(-1875.3092, device='cuda:0')
episode: 363 training return: tensor(-1861.9104, device='cuda:0')
epoch: 91 test_true_pfm: -2.9931617522515865
episode: 364 training return: tensor(-1851.7247, device='cuda:0')
episode: 365 training return: tensor(-1862.1838, device='cuda:0')
episode: 366 training return: tensor(-1857.4584, device='cuda:0')
episode: 367 training return: tensor(-1857.5736, device='cuda:0')
epoch: 92 test_true_pfm: -6.741585631058413
episode: 368 training return: tensor(-1849.7137, device='cuda:0')
episode: 369 training return: tensor(-1841.9589, device='cuda:0')
episode: 370 training return: tensor(-1845.6129, device='cuda:0')
episode: 371 training return: tensor(-1832.4122, device='cuda:0')
epoch: 93 test_true_pfm: -5.603114608976017
episode: 372 training return: tensor(-1861.8754, device='cuda:0')
episode: 373 training return: tensor(-1839.1764, device='cuda:0')
episode: 374 training return: tensor(-1849.5660, device='cuda:0')
episode: 375 training return: tensor(-1853.5675, device='cuda:0')
epoch: 94 test_true_pfm: -5.804787752360521
episode: 376 training return: tensor(-1860.3829, device='cuda:0')
episode: 377 training return: tensor(-1870.5186, device='cuda:0')
episode: 378 training return: tensor(-1850.8185, device='cuda:0')
episode: 379 training return: tensor(-1843.0183, device='cuda:0')
epoch: 95 test_true_pfm: -8.623985588989681
episode: 380 training return: tensor(-1841.0280, device='cuda:0')
episode: 381 training return: tensor(-1860.6462, device='cuda:0')
episode: 382 training return: tensor(-1841.1963, device='cuda:0')
episode: 383 training return: tensor(-1902.7899, device='cuda:0')
epoch: 96 test_true_pfm: -2.8002095851712916
episode: 384 training return: tensor(-1862.2969, device='cuda:0')
episode: 385 training return: tensor(-1838.2341, device='cuda:0')
episode: 386 training return: tensor(-1837.4275, device='cuda:0')
episode: 387 training return: tensor(-1883.0747, device='cuda:0')
epoch: 97 test_true_pfm: 0.23125820255523497
episode: 388 training return: tensor(-1845.4700, device='cuda:0')
episode: 389 training return: tensor(-1840.2017, device='cuda:0')
episode: 390 training return: tensor(-1857.4312, device='cuda:0')
episode: 391 training return: tensor(-1846.2998, device='cuda:0')
epoch: 98 test_true_pfm: -4.832335993572108
episode: 392 training return: tensor(-1828.4420, device='cuda:0')
episode: 393 training return: tensor(-1870.4142, device='cuda:0')
episode: 394 training return: tensor(-1853.7756, device='cuda:0')
episode: 395 training return: tensor(-1866.2965, device='cuda:0')
epoch: 99 test_true_pfm: -3.077498983577168
episode: 396 training return: tensor(-1854.2617, device='cuda:0')
episode: 397 training return: tensor(-1881.6445, device='cuda:0')
episode: 398 training return: tensor(-1855.1832, device='cuda:0')
episode: 399 training return: tensor(-1885.2374, device='cuda:0')
epoch: 100 test_true_pfm: -8.625988427748295
episode: 400 training return: tensor(-1829.8110, device='cuda:0')
episode: 401 training return: tensor(-1849.1855, device='cuda:0')
episode: 402 training return: tensor(-1863.5656, device='cuda:0')
episode: 403 training return: tensor(-1863.5065, device='cuda:0')
epoch: 101 test_true_pfm: -4.122301433512527
episode: 404 training return: tensor(-1843.0527, device='cuda:0')
episode: 405 training return: tensor(-1853.9366, device='cuda:0')
episode: 406 training return: tensor(-1854.9241, device='cuda:0')
episode: 407 training return: tensor(-1885.7477, device='cuda:0')
epoch: 102 test_true_pfm: -6.045098297426478
episode: 408 training return: tensor(-1849.3011, device='cuda:0')
episode: 409 training return: tensor(-1853.5361, device='cuda:0')
episode: 410 training return: tensor(-1853.5540, device='cuda:0')
episode: 411 training return: tensor(-1852.8418, device='cuda:0')
epoch: 103 test_true_pfm: -8.55159346439765
episode: 412 training return: tensor(-1848.2874, device='cuda:0')
episode: 413 training return: tensor(-1848.8131, device='cuda:0')
episode: 414 training return: tensor(-1839.0405, device='cuda:0')
episode: 415 training return: tensor(-1897.2378, device='cuda:0')
epoch: 104 test_true_pfm: -7.5508335420743675
episode: 416 training return: tensor(-1931.9926, device='cuda:0')
episode: 417 training return: tensor(-1844.2043, device='cuda:0')
episode: 418 training return: tensor(-1851.1212, device='cuda:0')
episode: 419 training return: tensor(-1852.4392, device='cuda:0')
epoch: 105 test_true_pfm: -1.2159478624992863
episode: 420 training return: tensor(-1849.4756, device='cuda:0')
episode: 421 training return: tensor(-1821.5100, device='cuda:0')
episode: 422 training return: tensor(-1883.4951, device='cuda:0')
episode: 423 training return: tensor(-1850.8452, device='cuda:0')
epoch: 106 test_true_pfm: -5.4986292401941155
episode: 424 training return: tensor(-1843.0992, device='cuda:0')
episode: 425 training return: tensor(-1849.6780, device='cuda:0')
episode: 426 training return: tensor(-1849.5773, device='cuda:0')
episode: 427 training return: tensor(-1834.4718, device='cuda:0')
epoch: 107 test_true_pfm: -13.387135443238677
episode: 428 training return: tensor(-1863.2081, device='cuda:0')
episode: 429 training return: tensor(-1853.4940, device='cuda:0')
episode: 430 training return: tensor(-1861.9590, device='cuda:0')
episode: 431 training return: tensor(-1855.1257, device='cuda:0')
epoch: 108 test_true_pfm: -2.845099551272297
episode: 432 training return: tensor(-1847.5928, device='cuda:0')
episode: 433 training return: tensor(-1830.3418, device='cuda:0')
episode: 434 training return: tensor(-1844.9773, device='cuda:0')
episode: 435 training return: tensor(-1835.8839, device='cuda:0')
epoch: 109 test_true_pfm: -3.801180340429926
episode: 436 training return: tensor(-1841.7108, device='cuda:0')
episode: 437 training return: tensor(-1906.6504, device='cuda:0')
episode: 438 training return: tensor(-1854.7382, device='cuda:0')
episode: 439 training return: tensor(-1842.4790, device='cuda:0')
epoch: 110 test_true_pfm: -4.5649524092139275
episode: 440 training return: tensor(-1895.7152, device='cuda:0')
episode: 441 training return: tensor(-1877.1830, device='cuda:0')
episode: 442 training return: tensor(-1901.6432, device='cuda:0')
episode: 443 training return: tensor(-1860.8871, device='cuda:0')
epoch: 111 test_true_pfm: -4.618439419864642
episode: 444 training return: tensor(-1845.6333, device='cuda:0')
episode: 445 training return: tensor(-1861.3903, device='cuda:0')
episode: 446 training return: tensor(-1852.3365, device='cuda:0')
episode: 447 training return: tensor(-1864.9998, device='cuda:0')
epoch: 112 test_true_pfm: -3.4719870078425585
episode: 448 training return: tensor(-1870.8855, device='cuda:0')
episode: 449 training return: tensor(-1847.0356, device='cuda:0')
episode: 450 training return: tensor(-1861.3052, device='cuda:0')
episode: 451 training return: tensor(-1865.8293, device='cuda:0')
epoch: 113 test_true_pfm: -11.665022300262242
episode: 452 training return: tensor(-1854.2222, device='cuda:0')
episode: 453 training return: tensor(-1862.4380, device='cuda:0')
episode: 454 training return: tensor(-1871.1808, device='cuda:0')
episode: 455 training return: tensor(-1851.1193, device='cuda:0')
epoch: 114 test_true_pfm: -3.1602821507557226
episode: 456 training return: tensor(-1882.5793, device='cuda:0')
episode: 457 training return: tensor(-1842.5326, device='cuda:0')
episode: 458 training return: tensor(-1832.8672, device='cuda:0')
episode: 459 training return: tensor(-1821.3259, device='cuda:0')
epoch: 115 test_true_pfm: -2.438616383538415
episode: 460 training return: tensor(-1856.2990, device='cuda:0')
episode: 461 training return: tensor(-1819.6683, device='cuda:0')
episode: 462 training return: tensor(-1845.7222, device='cuda:0')
episode: 463 training return: tensor(-1830.8031, device='cuda:0')
epoch: 116 test_true_pfm: -13.684016545061814
episode: 464 training return: tensor(-1846.4672, device='cuda:0')
episode: 465 training return: tensor(-1851.4690, device='cuda:0')
episode: 466 training return: tensor(-1841.9299, device='cuda:0')
episode: 467 training return: tensor(-1851.4437, device='cuda:0')
epoch: 117 test_true_pfm: -0.8870920937647905
episode: 468 training return: tensor(-1845.5881, device='cuda:0')
episode: 469 training return: tensor(-1846.5637, device='cuda:0')
episode: 470 training return: tensor(-1859.2314, device='cuda:0')
episode: 471 training return: tensor(-1864.3948, device='cuda:0')
epoch: 118 test_true_pfm: -8.597341136410206
episode: 472 training return: tensor(-1871.3092, device='cuda:0')
episode: 473 training return: tensor(-1849.1001, device='cuda:0')
episode: 474 training return: tensor(-1856.4745, device='cuda:0')
episode: 475 training return: tensor(-1842.8668, device='cuda:0')
epoch: 119 test_true_pfm: -5.412453293519749
episode: 476 training return: tensor(-1865.4552, device='cuda:0')
episode: 477 training return: tensor(-1838.2239, device='cuda:0')
episode: 478 training return: tensor(-1836.7490, device='cuda:0')
episode: 479 training return: tensor(-1866.0585, device='cuda:0')
epoch: 120 test_true_pfm: -7.5331514787372
episode: 480 training return: tensor(-1859.2600, device='cuda:0')
episode: 481 training return: tensor(-1845.5645, device='cuda:0')
episode: 482 training return: tensor(-1846.5892, device='cuda:0')
episode: 483 training return: tensor(-1855.9161, device='cuda:0')
epoch: 121 test_true_pfm: -5.2420001809415835
episode: 484 training return: tensor(-1857.8784, device='cuda:0')
episode: 485 training return: tensor(-1849.9666, device='cuda:0')
episode: 486 training return: tensor(-1851.5232, device='cuda:0')
episode: 487 training return: tensor(-1852.7125, device='cuda:0')
epoch: 122 test_true_pfm: -13.507948803771546
episode: 488 training return: tensor(-1841.1447, device='cuda:0')
episode: 489 training return: tensor(-1851.3999, device='cuda:0')
episode: 490 training return: tensor(-1853.5784, device='cuda:0')
episode: 491 training return: tensor(-1865.0309, device='cuda:0')
epoch: 123 test_true_pfm: -2.6812991612828974
episode: 492 training return: tensor(-1863.6921, device='cuda:0')
episode: 493 training return: tensor(-1829.5319, device='cuda:0')
episode: 494 training return: tensor(-1837.6881, device='cuda:0')
episode: 495 training return: tensor(-1847.2096, device='cuda:0')
epoch: 124 test_true_pfm: -7.872144600918706
episode: 496 training return: tensor(-1858.9639, device='cuda:0')
episode: 497 training return: tensor(-1837.1049, device='cuda:0')
episode: 498 training return: tensor(-1851.4957, device='cuda:0')
episode: 499 training return: tensor(-1860.5193, device='cuda:0')
epoch: 125 test_true_pfm: -6.6483253796064625
episode: 500 training return: tensor(-1834.5209, device='cuda:0')
episode: 501 training return: tensor(-1857.2123, device='cuda:0')
episode: 502 training return: tensor(-1856.9105, device='cuda:0')
episode: 503 training return: tensor(-1838.9562, device='cuda:0')
epoch: 126 test_true_pfm: -9.176115618185504
episode: 504 training return: tensor(-1861.5834, device='cuda:0')
episode: 505 training return: tensor(-1828.7340, device='cuda:0')
episode: 506 training return: tensor(-1854.1989, device='cuda:0')
episode: 507 training return: tensor(-1890.7078, device='cuda:0')
epoch: 127 test_true_pfm: -7.190717747437165
episode: 508 training return: tensor(-1867.9248, device='cuda:0')
episode: 509 training return: tensor(-1846.8730, device='cuda:0')
episode: 510 training return: tensor(-1869.4960, device='cuda:0')
episode: 511 training return: tensor(-1869.4530, device='cuda:0')
epoch: 128 test_true_pfm: -7.17156360830916
episode: 512 training return: tensor(-1841.9598, device='cuda:0')
episode: 513 training return: tensor(-1836.7006, device='cuda:0')
episode: 514 training return: tensor(-1864.2073, device='cuda:0')
episode: 515 training return: tensor(-1880.6492, device='cuda:0')
epoch: 129 test_true_pfm: -6.457591950143317
episode: 516 training return: tensor(-1869.0748, device='cuda:0')
episode: 517 training return: tensor(-1841.5226, device='cuda:0')
episode: 518 training return: tensor(-1848.3120, device='cuda:0')
episode: 519 training return: tensor(-1837.7139, device='cuda:0')
epoch: 130 test_true_pfm: -10.374835646405607
episode: 520 training return: tensor(-1866.6060, device='cuda:0')
episode: 521 training return: tensor(-1858.9326, device='cuda:0')
episode: 522 training return: tensor(-1859.6301, device='cuda:0')
episode: 523 training return: tensor(-1868.1194, device='cuda:0')
epoch: 131 test_true_pfm: -9.31665679656843
episode: 524 training return: tensor(-1871.8885, device='cuda:0')
episode: 525 training return: tensor(-1845.9368, device='cuda:0')
episode: 526 training return: tensor(-1857.6951, device='cuda:0')
episode: 527 training return: tensor(-1886.2183, device='cuda:0')
epoch: 132 test_true_pfm: -8.47592529456064
episode: 528 training return: tensor(-1859.1888, device='cuda:0')
episode: 529 training return: tensor(-1844.4435, device='cuda:0')
episode: 530 training return: tensor(-1866.0298, device='cuda:0')
episode: 531 training return: tensor(-1869.9221, device='cuda:0')
epoch: 133 test_true_pfm: -3.892195365992688
episode: 532 training return: tensor(-1859.5800, device='cuda:0')
episode: 533 training return: tensor(-1856.4535, device='cuda:0')
episode: 534 training return: tensor(-1853.4767, device='cuda:0')
episode: 535 training return: tensor(-1872.5271, device='cuda:0')
epoch: 134 test_true_pfm: -5.307673293756506
episode: 536 training return: tensor(-1846.0012, device='cuda:0')
episode: 537 training return: tensor(-1847.5011, device='cuda:0')
episode: 538 training return: tensor(-1866.2971, device='cuda:0')
episode: 539 training return: tensor(-1850.0688, device='cuda:0')
epoch: 135 test_true_pfm: -3.654966262109401
episode: 540 training return: tensor(-1844.0820, device='cuda:0')
episode: 541 training return: tensor(-1825.9598, device='cuda:0')
episode: 542 training return: tensor(-1823.7249, device='cuda:0')
episode: 543 training return: tensor(-1858.3474, device='cuda:0')
epoch: 136 test_true_pfm: -4.144460876598241
episode: 544 training return: tensor(-1851.4801, device='cuda:0')
episode: 545 training return: tensor(-1844.0834, device='cuda:0')
episode: 546 training return: tensor(-1823.3365, device='cuda:0')
episode: 547 training return: tensor(-1843.1562, device='cuda:0')
epoch: 137 test_true_pfm: -2.2849367985447815
episode: 548 training return: tensor(-1831.8937, device='cuda:0')
episode: 549 training return: tensor(-1869.9331, device='cuda:0')
episode: 550 training return: tensor(-1849.3298, device='cuda:0')
episode: 551 training return: tensor(-1851.0172, device='cuda:0')
epoch: 138 test_true_pfm: -0.5217483596780127
episode: 552 training return: tensor(-1847.4807, device='cuda:0')
episode: 553 training return: tensor(-1854.3024, device='cuda:0')
episode: 554 training return: tensor(-1812.2445, device='cuda:0')
episode: 555 training return: tensor(-1892.3459, device='cuda:0')
epoch: 139 test_true_pfm: -7.924274397038652
episode: 556 training return: tensor(-1860.6960, device='cuda:0')
episode: 557 training return: tensor(-1860.2158, device='cuda:0')
episode: 558 training return: tensor(-1861.4160, device='cuda:0')
episode: 559 training return: tensor(-1845.9833, device='cuda:0')
epoch: 140 test_true_pfm: -1.664702093380306
episode: 560 training return: tensor(-1858.4663, device='cuda:0')
episode: 561 training return: tensor(-1876.5756, device='cuda:0')
episode: 562 training return: tensor(-1819.5439, device='cuda:0')
episode: 563 training return: tensor(-1864.8843, device='cuda:0')
epoch: 141 test_true_pfm: -7.619056353705041
episode: 564 training return: tensor(-1824.6891, device='cuda:0')
episode: 565 training return: tensor(-1850.7151, device='cuda:0')
episode: 566 training return: tensor(-1826.1071, device='cuda:0')
episode: 567 training return: tensor(-1842.5709, device='cuda:0')
epoch: 142 test_true_pfm: -3.183579004021805
episode: 568 training return: tensor(-1847.9512, device='cuda:0')
episode: 569 training return: tensor(-1847.8007, device='cuda:0')
episode: 570 training return: tensor(-1851.7726, device='cuda:0')
episode: 571 training return: tensor(-1846.7770, device='cuda:0')
epoch: 143 test_true_pfm: -3.91250563526406
episode: 572 training return: tensor(-1852.6462, device='cuda:0')
episode: 573 training return: tensor(-1830.0531, device='cuda:0')
episode: 574 training return: tensor(-1852.7454, device='cuda:0')
episode: 575 training return: tensor(-1855.3290, device='cuda:0')
epoch: 144 test_true_pfm: -5.042778811192943
episode: 576 training return: tensor(-1847.4994, device='cuda:0')
episode: 577 training return: tensor(-1849.9030, device='cuda:0')
episode: 578 training return: tensor(-1833.0715, device='cuda:0')
episode: 579 training return: tensor(-1872.5294, device='cuda:0')
epoch: 145 test_true_pfm: 0.1705317272959849
episode: 580 training return: tensor(-1872.0739, device='cuda:0')
episode: 581 training return: tensor(-1856.2566, device='cuda:0')
episode: 582 training return: tensor(-1861.8367, device='cuda:0')
episode: 583 training return: tensor(-1839.8979, device='cuda:0')
epoch: 146 test_true_pfm: -2.3850538899027405
episode: 584 training return: tensor(-1845.3552, device='cuda:0')
episode: 585 training return: tensor(-1853.9928, device='cuda:0')
episode: 586 training return: tensor(-1826.0098, device='cuda:0')
episode: 587 training return: tensor(-1853.1760, device='cuda:0')
epoch: 147 test_true_pfm: -2.1810196969208913
episode: 588 training return: tensor(-1862.4706, device='cuda:0')
episode: 589 training return: tensor(-1836.7986, device='cuda:0')
episode: 590 training return: tensor(-1825.9777, device='cuda:0')
episode: 591 training return: tensor(-1834.0215, device='cuda:0')
epoch: 148 test_true_pfm: -6.181702544547879
episode: 592 training return: tensor(-1860.4548, device='cuda:0')
episode: 593 training return: tensor(-1861.2493, device='cuda:0')
episode: 594 training return: tensor(-1874.7233, device='cuda:0')
episode: 595 training return: tensor(-1833.5923, device='cuda:0')
epoch: 149 test_true_pfm: -5.3610227202802605
episode: 596 training return: tensor(-1855.6472, device='cuda:0')
episode: 597 training return: tensor(-1839.4038, device='cuda:0')
episode: 598 training return: tensor(-1837.0490, device='cuda:0')
episode: 599 training return: tensor(-1833.1271, device='cuda:0')
epoch: 150 test_true_pfm: -2.914094713454182
