['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'brac', '--traj', 'expert', '--seed', '2', '--data', '100000']
epoch: 0 training_loss 0.5236292048357427 test_loss: -1.3880488395690918
epoch: 1 training_loss -2.2747702956199647 test_loss: -2.9162014007568358
epoch: 2 training_loss -3.360166790485382 test_loss: -3.6238639831542967
epoch: 3 training_loss -4.20548572063446 test_loss: -4.460120773315429
epoch: 4 training_loss -4.724330961704254 test_loss: -4.882941436767578
epoch: 5 training_loss -5.136691279411316 test_loss: -5.419020080566407
epoch: 6 training_loss -5.497576704025269 test_loss: -5.496339797973633
epoch: 7 training_loss -5.685197544097901 test_loss: -5.8128608703613285
epoch: 8 training_loss -5.881466064453125 test_loss: -5.986400604248047
epoch: 9 training_loss -6.030712594985962 test_loss: -6.113605499267578
epoch: 10 training_loss -6.205710186958313 test_loss: -6.263869857788086
epoch: 11 training_loss -6.288296966552735 test_loss: -6.33630485534668
epoch: 12 training_loss -6.410788412094116 test_loss: -6.439728546142578
epoch: 13 training_loss -6.46397367477417 test_loss: -6.534874725341797
epoch: 14 training_loss -6.5971326684951785 test_loss: -6.655488586425781
epoch: 15 training_loss -6.675868725776672 test_loss: -6.770221710205078
epoch: 16 training_loss -6.7771502161026005 test_loss: -6.694220733642578
epoch: 17 training_loss -6.8389834833145144 test_loss: -6.908287048339844
epoch: 18 training_loss -6.909853181838989 test_loss: -6.863406372070313
epoch: 19 training_loss -6.9096961069107055 test_loss: -6.958469390869141
epoch: 20 training_loss -6.987551770210266 test_loss: -6.937538909912109
epoch: 21 training_loss -7.062087969779968 test_loss: -7.10889892578125
epoch: 22 training_loss -7.05913170337677 test_loss: -7.025590515136718
epoch: 23 training_loss -7.151065492630005 test_loss: -7.151875305175781
epoch: 24 training_loss -7.163236169815064 test_loss: -7.275017547607422
epoch: 25 training_loss -7.217088322639466 test_loss: -7.237250518798828
epoch: 26 training_loss -7.25469660282135 test_loss: -7.264515686035156
epoch: 27 training_loss -7.301211180686951 test_loss: -7.307335662841797
epoch: 28 training_loss -7.2793092966079715 test_loss: -7.35042724609375
epoch: 29 training_loss -7.391027059555054 test_loss: -7.405313110351562
epoch: 30 training_loss -7.386004781723022 test_loss: -7.428230285644531
epoch: 31 training_loss -7.383963365554809 test_loss: -7.364894104003906
epoch: 32 training_loss -7.414458003044128 test_loss: -7.4985809326171875
epoch: 33 training_loss -7.453736615180969 test_loss: -7.489128112792969
epoch: 34 training_loss -7.472816915512085 test_loss: -7.500525665283203
epoch: 35 training_loss -7.486654405593872 test_loss: -7.340702056884766
epoch: 36 training_loss -7.50972797870636 test_loss: -7.475547790527344
epoch: 37 training_loss -7.531797342300415 test_loss: -7.538871002197266
epoch: 38 training_loss -7.570192737579346 test_loss: -7.675282287597656
epoch: 39 training_loss -7.606618542671203 test_loss: -7.62377700805664
epoch: 40 training_loss -7.567908630371094 test_loss: -7.58458251953125
epoch: 41 training_loss -7.631565957069397 test_loss: -7.691425323486328
epoch: 42 training_loss -7.630063614845276 test_loss: -7.611148834228516
epoch: 43 training_loss -7.643354334831238 test_loss: -7.614022827148437
epoch: 44 training_loss -7.6382914781570435 test_loss: -7.715859222412109
epoch: 45 training_loss -7.675229330062866 test_loss: -7.652986907958985
epoch: 46 training_loss -7.640901999473572 test_loss: -7.7092430114746096
epoch: 47 training_loss -7.70599705696106 test_loss: -7.732013702392578
epoch: 48 training_loss -7.706956386566162 test_loss: -7.716216278076172
epoch: 49 training_loss -7.737757287025452 test_loss: -7.646260833740234
epoch: 50 training_loss -7.754067420959473 test_loss: -7.763408660888672
epoch: 51 training_loss -7.754442572593689 test_loss: -7.712612152099609
epoch: 52 training_loss -7.7491712617874144 test_loss: -7.785335540771484
epoch: 53 training_loss -7.782119908332825 test_loss: -7.727128601074218
epoch: 54 training_loss -7.776345062255859 test_loss: -7.862821960449219
epoch: 55 training_loss -7.81515112400055 test_loss: -7.859579467773438
epoch: 56 training_loss -7.806787776947021 test_loss: -7.611275482177734
epoch: 57 training_loss -7.829556522369384 test_loss: -7.815643310546875
epoch: 58 training_loss -7.827649946212769 test_loss: -7.836235809326172
epoch: 59 training_loss -7.8420661306381225 test_loss: -7.7757118225097654
epoch: 60 training_loss -7.828866634368897 test_loss: -7.8508140563964846
epoch: 61 training_loss -7.853166847229004 test_loss: -7.858123016357422
epoch: 62 training_loss -7.835083723068237 test_loss: -7.858575439453125
epoch: 63 training_loss -7.873448586463928 test_loss: -7.886341857910156
epoch: 64 training_loss -7.906124739646912 test_loss: -7.9093482971191404
epoch: 65 training_loss -7.89451801776886 test_loss: -7.861665344238281
epoch: 66 training_loss -7.890479145050048 test_loss: -7.931448364257813
epoch: 67 training_loss -7.893304138183594 test_loss: -7.8902732849121096
epoch: 68 training_loss -7.900824980735779 test_loss: -7.923515319824219
epoch: 69 training_loss -7.934996347427369 test_loss: -8.041471099853515
epoch: 70 training_loss -7.930436568260193 test_loss: -7.922334289550781
epoch: 71 training_loss -7.916372981071472 test_loss: -7.931018829345703
epoch: 72 training_loss -7.950035986900329 test_loss: -7.994474029541015
epoch: 73 training_loss -7.953771734237671 test_loss: -7.926518249511719
epoch: 74 training_loss -7.95750292301178 test_loss: -7.968166351318359
epoch: 75 training_loss -7.935074119567871 test_loss: -7.921806335449219
epoch: 76 training_loss -7.94016580581665 test_loss: -7.880012512207031
epoch: 77 training_loss -7.9480117893219 test_loss: -7.979187774658203
epoch: 78 training_loss -7.983963656425476 test_loss: -8.02962417602539
epoch: 79 training_loss -7.98000542640686 test_loss: -7.949908447265625
epoch: 80 training_loss -7.986805987358093 test_loss: -8.00829849243164
epoch: 81 training_loss -8.01650936126709 test_loss: -8.00968246459961
epoch: 82 training_loss -8.006048951148987 test_loss: -8.048603057861328
epoch: 83 training_loss -7.9883062505722044 test_loss: -8.004853820800781
epoch: 84 training_loss -8.001933698654176 test_loss: -8.007269287109375
epoch: 85 training_loss -8.01587338924408 test_loss: -7.9862213134765625
epoch: 86 training_loss -8.034130053520203 test_loss: -8.035604095458984
epoch: 87 training_loss -8.00628975391388 test_loss: -8.003240203857422
epoch: 88 training_loss -8.04472948551178 test_loss: -8.071466827392578
epoch: 89 training_loss -8.022143692970277 test_loss: -7.96057357788086
epoch: 90 training_loss -8.029383368492127 test_loss: -8.08443832397461
epoch: 91 training_loss -8.0503506565094 test_loss: -8.021744537353516
epoch: 92 training_loss -8.050801148414612 test_loss: -8.073286437988282
epoch: 93 training_loss -8.064923605918885 test_loss: -8.019051361083985
epoch: 94 training_loss -8.072915835380554 test_loss: -8.115174102783204
epoch: 95 training_loss -8.039853410720825 test_loss: -7.98974838256836
epoch: 96 training_loss -8.074054217338562 test_loss: -8.076268005371094
epoch: 97 training_loss -8.07529405117035 test_loss: -8.170822143554688
epoch: 98 training_loss -8.094251923561096 test_loss: -8.004080963134765
epoch: 99 training_loss -8.108134164810181 test_loss: -8.129511260986328
epoch: 100 training_loss -8.107466139793395 test_loss: -7.998927307128906
epoch: 101 training_loss -8.106833682060241 test_loss: -8.095052337646484
epoch: 102 training_loss -8.07773901939392 test_loss: -8.102088165283202
epoch: 103 training_loss -8.101511068344116 test_loss: -8.003597259521484
epoch: 104 training_loss -8.091618070602417 test_loss: -8.118512725830078
epoch: 105 training_loss -8.11163770198822 test_loss: -8.155645751953125
epoch: 106 training_loss -8.091129608154297 test_loss: -8.110771179199219
epoch: 107 training_loss -8.141378407478333 test_loss: -8.191829681396484
epoch: 108 training_loss -8.10812689781189 test_loss: -8.006006622314453
epoch: 109 training_loss -8.13183102607727 test_loss: -8.115693664550781
epoch: 110 training_loss -8.126950826644897 test_loss: -8.116981506347656
epoch: 111 training_loss -8.143057570457458 test_loss: -8.181145477294923
epoch: 112 training_loss -8.158023471832275 test_loss: -8.132127380371093
epoch: 113 training_loss -8.105969152450562 test_loss: -8.13974380493164
epoch: 114 training_loss -8.141992745399476 test_loss: -8.171163940429688
epoch: 115 training_loss -8.14789023399353 test_loss: -8.164300537109375
epoch: 116 training_loss -8.178539423942565 test_loss: -8.034664916992188
epoch: 117 training_loss -8.154292268753052 test_loss: -8.176682281494141
epoch: 118 training_loss -8.17113965511322 test_loss: -8.19495849609375
epoch: 119 training_loss -8.214625968933106 test_loss: -8.253011322021484
epoch: 120 training_loss -8.16493754386902 test_loss: -8.192121124267578
epoch: 121 training_loss -8.179039306640625 test_loss: -8.201874542236329
epoch: 122 training_loss -8.184707918167113 test_loss: -8.219353485107423
epoch: 123 training_loss -8.204168186187744 test_loss: -8.219835662841797
epoch: 124 training_loss -8.207569727897644 test_loss: -8.209674072265624
epoch: 125 training_loss -8.198584799766541 test_loss: -8.213534545898437
epoch: 126 training_loss -8.197413244247436 test_loss: -8.197264862060546
epoch: 127 training_loss -8.208943295478822 test_loss: -8.17948226928711
epoch: 128 training_loss -8.183098721504212 test_loss: -8.259389495849609
epoch: 129 training_loss -8.190871677398682 test_loss: -8.255076599121093
epoch: 130 training_loss -8.214223380088805 test_loss: -8.213365173339843
epoch: 131 training_loss -8.22917830467224 test_loss: -8.215228271484374
epoch: 132 training_loss -8.18644314289093 test_loss: -8.201476287841796
epoch: 133 training_loss -8.212925887107849 test_loss: -8.163132476806641
epoch: 134 training_loss -8.209206290245056 test_loss: -8.210039520263672
epoch: 135 training_loss -8.246250157356263 test_loss: -8.229421997070313
epoch: 136 training_loss -8.217243299484252 test_loss: -8.23266372680664
epoch: 137 training_loss -8.246112937927245 test_loss: -8.238934326171876
epoch: 138 training_loss -8.26571753501892 test_loss: -8.295357513427735
epoch: 139 training_loss -8.251940817832947 test_loss: -8.179100799560548
epoch: 140 training_loss -8.238418655395508 test_loss: -8.280040740966797
epoch: 141 training_loss -8.232212653160095 test_loss: -8.106085968017577
epoch: 142 training_loss -8.218250093460084 test_loss: -8.304051208496094
epoch: 143 training_loss -8.249018182754517 test_loss: -8.224962615966797
epoch: 144 training_loss -8.241166315078736 test_loss: -8.264454650878907
epoch: 145 training_loss -8.254430418014527 test_loss: -8.284513854980469
epoch: 146 training_loss -8.288599328994751 test_loss: -8.215050506591798
epoch: 147 training_loss -8.253865203857423 test_loss: -8.248062133789062
epoch: 148 training_loss -8.218410396575928 test_loss: -8.143460845947265
epoch: 149 training_loss -8.273986549377442 test_loss: -8.315890502929687
984.3455569284075
episode: 0 training return: tensor(-135664.7188, device='cuda:0')
episode: 1 training return: tensor(-727172.0625, device='cuda:0')
episode: 2 training return: tensor(-2475832.7500, device='cuda:0')
episode: 3 training return: tensor(-186772.8906, device='cuda:0')
epoch: 1 test_true_pfm: -21.436800948821006
episode: 4 training return: tensor(-95103.5156, device='cuda:0')
episode: 5 training return: tensor(-128775.6016, device='cuda:0')
episode: 6 training return: tensor(-520126.8438, device='cuda:0')
episode: 7 training return: tensor(-26930.4238, device='cuda:0')
epoch: 2 test_true_pfm: 65.98069927480175
episode: 8 training return: tensor(-10240302., device='cuda:0')
episode: 9 training return: tensor(-60934.5820, device='cuda:0')
episode: 10 training return: tensor(-226167.2188, device='cuda:0')
episode: 11 training return: tensor(-13691056., device='cuda:0')
epoch: 3 test_true_pfm: 103.56523974058395
episode: 12 training return: tensor(-3471516.2500, device='cuda:0')
episode: 13 training return: tensor(-126771.7422, device='cuda:0')
episode: 14 training return: tensor(-604366.0625, device='cuda:0')
episode: 15 training return: tensor(-964537.6875, device='cuda:0')
epoch: 4 test_true_pfm: -71.03443257653004
episode: 16 training return: tensor(-3804342., device='cuda:0')
episode: 17 training return: tensor(-239136.1562, device='cuda:0')
episode: 18 training return: tensor(-81591.9141, device='cuda:0')
episode: 19 training return: tensor(-33409.0117, device='cuda:0')
epoch: 5 test_true_pfm: -57.27959099212898
episode: 20 training return: tensor(-24224.5742, device='cuda:0')
episode: 21 training return: tensor(-43030.1641, device='cuda:0')
episode: 22 training return: tensor(-40299.4062, device='cuda:0')
episode: 23 training return: tensor(-37768.7656, device='cuda:0')
epoch: 6 test_true_pfm: -102.19458594761953
episode: 24 training return: tensor(-34920.9414, device='cuda:0')
episode: 25 training return: tensor(-37555.0234, device='cuda:0')
episode: 26 training return: tensor(-30485.8906, device='cuda:0')
episode: 27 training return: tensor(-18473.1582, device='cuda:0')
epoch: 7 test_true_pfm: -35.59525620928116
episode: 28 training return: tensor(-24794.5078, device='cuda:0')
episode: 29 training return: tensor(-20882.2109, device='cuda:0')
episode: 30 training return: tensor(-26640.8965, device='cuda:0')
episode: 31 training return: tensor(-22680.2227, device='cuda:0')
epoch: 8 test_true_pfm: -61.09586152621055
episode: 32 training return: tensor(-27503.6172, device='cuda:0')
episode: 33 training return: tensor(-20791.8965, device='cuda:0')
episode: 34 training return: tensor(-20886.0176, device='cuda:0')
episode: 35 training return: tensor(-21452.9004, device='cuda:0')
epoch: 9 test_true_pfm: -23.881166379440092
episode: 36 training return: tensor(-20725.0137, device='cuda:0')
episode: 37 training return: tensor(-22835.0605, device='cuda:0')
episode: 38 training return: tensor(-24513.3613, device='cuda:0')
episode: 39 training return: tensor(-25850.9707, device='cuda:0')
epoch: 10 test_true_pfm: -61.47488721603802
episode: 40 training return: tensor(-25492.3535, device='cuda:0')
episode: 41 training return: tensor(-23205.0410, device='cuda:0')
episode: 42 training return: tensor(-25155.8906, device='cuda:0')
episode: 43 training return: tensor(-23015.4570, device='cuda:0')
epoch: 11 test_true_pfm: -76.38285768806618
episode: 44 training return: tensor(-24667.4922, device='cuda:0')
episode: 45 training return: tensor(-22386.1895, device='cuda:0')
episode: 46 training return: tensor(-26753.0449, device='cuda:0')
episode: 47 training return: tensor(-21529.4570, device='cuda:0')
epoch: 12 test_true_pfm: -79.25152135283518
episode: 48 training return: tensor(-24677.5586, device='cuda:0')
episode: 49 training return: tensor(-25327.8379, device='cuda:0')
episode: 50 training return: tensor(-24230.3691, device='cuda:0')
episode: 51 training return: tensor(-21896.1855, device='cuda:0')
epoch: 13 test_true_pfm: -52.67431155997991
episode: 52 training return: tensor(-22544.8652, device='cuda:0')
episode: 53 training return: tensor(-22690.4805, device='cuda:0')
episode: 54 training return: tensor(-21622.0020, device='cuda:0')
episode: 55 training return: tensor(-20405.6387, device='cuda:0')
epoch: 14 test_true_pfm: -96.9640225316803
episode: 56 training return: tensor(-19387.9043, device='cuda:0')
episode: 57 training return: tensor(-21406.7812, device='cuda:0')
episode: 58 training return: tensor(-25040.6074, device='cuda:0')
episode: 59 training return: tensor(-14594.0205, device='cuda:0')
epoch: 15 test_true_pfm: -188.15373940035133
episode: 60 training return: tensor(-16750.8223, device='cuda:0')
episode: 61 training return: tensor(-15592.5488, device='cuda:0')
episode: 62 training return: tensor(-11847.8193, device='cuda:0')
episode: 63 training return: tensor(-10620.3213, device='cuda:0')
epoch: 16 test_true_pfm: -68.8904091717432
episode: 64 training return: tensor(-12296.9033, device='cuda:0')
episode: 65 training return: tensor(-15022.5342, device='cuda:0')
episode: 66 training return: tensor(-11833.2676, device='cuda:0')
episode: 67 training return: tensor(-13320.3623, device='cuda:0')
epoch: 17 test_true_pfm: 741.1495724414905
episode: 68 training return: tensor(-9001.5508, device='cuda:0')
episode: 69 training return: tensor(-16952.4395, device='cuda:0')
episode: 70 training return: tensor(-10281.1650, device='cuda:0')
episode: 71 training return: tensor(-45829.5664, device='cuda:0')
epoch: 18 test_true_pfm: 466.0102755111097
episode: 72 training return: tensor(-11796.0635, device='cuda:0')
episode: 73 training return: tensor(-63044.5859, device='cuda:0')
episode: 74 training return: tensor(-18114.7559, device='cuda:0')
episode: 75 training return: tensor(-11974.9033, device='cuda:0')
epoch: 19 test_true_pfm: -129.3298255183499
episode: 76 training return: tensor(-9361.7363, device='cuda:0')
episode: 77 training return: tensor(-13089.4131, device='cuda:0')
episode: 78 training return: tensor(-15033.4746, device='cuda:0')
episode: 79 training return: tensor(-10135.3555, device='cuda:0')
epoch: 20 test_true_pfm: -78.03050503294986
episode: 80 training return: tensor(-14141.6904, device='cuda:0')
episode: 81 training return: tensor(-19648.9141, device='cuda:0')
episode: 82 training return: tensor(-16964.3379, device='cuda:0')
episode: 83 training return: tensor(-20382.7441, device='cuda:0')
epoch: 21 test_true_pfm: -26.523534715930925
episode: 84 training return: tensor(-13096.7197, device='cuda:0')
episode: 85 training return: tensor(-16519.0801, device='cuda:0')
episode: 86 training return: tensor(-18961.6504, device='cuda:0')
episode: 87 training return: tensor(-17455.0137, device='cuda:0')
epoch: 22 test_true_pfm: -145.1277544437395
episode: 88 training return: tensor(-18761.4941, device='cuda:0')
episode: 89 training return: tensor(-16678.1484, device='cuda:0')
episode: 90 training return: tensor(-13452.3652, device='cuda:0')
episode: 91 training return: tensor(-15454.3320, device='cuda:0')
epoch: 23 test_true_pfm: -153.44939598915536
episode: 92 training return: tensor(-21481.0859, device='cuda:0')
episode: 93 training return: tensor(-16437.1465, device='cuda:0')
episode: 94 training return: tensor(-23676.4805, device='cuda:0')
episode: 95 training return: tensor(-21427.8086, device='cuda:0')
epoch: 24 test_true_pfm: -75.5929469958135
episode: 96 training return: tensor(-15507.8799, device='cuda:0')
episode: 97 training return: tensor(-17424.1621, device='cuda:0')
episode: 98 training return: tensor(-16045.9336, device='cuda:0')
episode: 99 training return: tensor(-14785.9912, device='cuda:0')
epoch: 25 test_true_pfm: -49.014869146660295
episode: 100 training return: tensor(-14390.9463, device='cuda:0')
episode: 101 training return: tensor(-13513.9238, device='cuda:0')
episode: 102 training return: tensor(-16587.9883, device='cuda:0')
episode: 103 training return: tensor(-13913.8154, device='cuda:0')
epoch: 26 test_true_pfm: -158.65634025714266
episode: 104 training return: tensor(-9873.0889, device='cuda:0')
episode: 105 training return: tensor(-9788.6328, device='cuda:0')
episode: 106 training return: tensor(-10480.3770, device='cuda:0')
episode: 107 training return: tensor(-10660.5840, device='cuda:0')
epoch: 27 test_true_pfm: -136.13313536398255
episode: 108 training return: tensor(-8910.3184, device='cuda:0')
episode: 109 training return: tensor(-8893.5547, device='cuda:0')
episode: 110 training return: tensor(-11366.9131, device='cuda:0')
episode: 111 training return: tensor(-22987.0332, device='cuda:0')
epoch: 28 test_true_pfm: -150.28120601154106
episode: 112 training return: tensor(-21055.3965, device='cuda:0')
episode: 113 training return: tensor(-20023.0801, device='cuda:0')
episode: 114 training return: tensor(-13854.5547, device='cuda:0')
episode: 115 training return: tensor(-31411.9023, device='cuda:0')
epoch: 29 test_true_pfm: -301.01170180283316
episode: 116 training return: tensor(-21164.4434, device='cuda:0')
episode: 117 training return: tensor(-22870.0508, device='cuda:0')
episode: 118 training return: tensor(-22248.0195, device='cuda:0')
episode: 119 training return: tensor(-27592.4902, device='cuda:0')
epoch: 30 test_true_pfm: -134.7570872318546
episode: 120 training return: tensor(-22984.1348, device='cuda:0')
episode: 121 training return: tensor(-23578.5391, device='cuda:0')
episode: 122 training return: tensor(-38892.2227, device='cuda:0')
episode: 123 training return: tensor(-23183.4160, device='cuda:0')
epoch: 31 test_true_pfm: -134.67730914717032
episode: 124 training return: tensor(-60548.8984, device='cuda:0')
episode: 125 training return: tensor(-10342.9639, device='cuda:0')
episode: 126 training return: tensor(-6795.8057, device='cuda:0')
episode: 127 training return: tensor(-28060.7715, device='cuda:0')
epoch: 32 test_true_pfm: -143.55567386231795
episode: 128 training return: tensor(-8778.9111, device='cuda:0')
episode: 129 training return: tensor(-8955.6797, device='cuda:0')
episode: 130 training return: tensor(-7022.0557, device='cuda:0')
episode: 131 training return: tensor(-15493.5059, device='cuda:0')
epoch: 33 test_true_pfm: -159.66580441634358
episode: 132 training return: tensor(-12884.2871, device='cuda:0')
episode: 133 training return: tensor(-18512.2500, device='cuda:0')
episode: 134 training return: tensor(-6917.1685, device='cuda:0')
episode: 135 training return: tensor(-9273.6641, device='cuda:0')
epoch: 34 test_true_pfm: -143.09754581951822
episode: 136 training return: tensor(-12564.6865, device='cuda:0')
episode: 137 training return: tensor(-7887.8623, device='cuda:0')
episode: 138 training return: tensor(-7815.9814, device='cuda:0')
episode: 139 training return: tensor(-7711.0137, device='cuda:0')
epoch: 35 test_true_pfm: -32.394275291512905
episode: 140 training return: tensor(-7012.0215, device='cuda:0')
episode: 141 training return: tensor(-9746.1982, device='cuda:0')
episode: 142 training return: tensor(-8573.0654, device='cuda:0')
episode: 143 training return: tensor(-17290.0586, device='cuda:0')
epoch: 36 test_true_pfm: 197.46878195200324
episode: 144 training return: tensor(-8651.5186, device='cuda:0')
episode: 145 training return: tensor(-9633.5732, device='cuda:0')
episode: 146 training return: tensor(-9560.2891, device='cuda:0')
episode: 147 training return: tensor(-10294.1953, device='cuda:0')
epoch: 37 test_true_pfm: -147.80960650239368
episode: 148 training return: tensor(-9114.8926, device='cuda:0')
episode: 149 training return: tensor(-9039.9521, device='cuda:0')
episode: 150 training return: tensor(-7408.8677, device='cuda:0')
episode: 151 training return: tensor(-7372.9844, device='cuda:0')
epoch: 38 test_true_pfm: -194.81732643747605
episode: 152 training return: tensor(-9620.7432, device='cuda:0')
episode: 153 training return: tensor(-7085.5371, device='cuda:0')
episode: 154 training return: tensor(-9676.2041, device='cuda:0')
episode: 155 training return: tensor(-11174.9268, device='cuda:0')
epoch: 39 test_true_pfm: -139.26808177694545
episode: 156 training return: tensor(-10424.4375, device='cuda:0')
episode: 157 training return: tensor(-9647.2686, device='cuda:0')
episode: 158 training return: tensor(-10543.2275, device='cuda:0')
episode: 159 training return: tensor(-10955.7539, device='cuda:0')
epoch: 40 test_true_pfm: -136.3594986593815
episode: 160 training return: tensor(-14748.4219, device='cuda:0')
episode: 161 training return: tensor(-12129.4033, device='cuda:0')
episode: 162 training return: tensor(-11066.6924, device='cuda:0')
episode: 163 training return: tensor(-17388.5566, device='cuda:0')
epoch: 41 test_true_pfm: -147.75533547560892
episode: 164 training return: tensor(-10411.2646, device='cuda:0')
episode: 165 training return: tensor(-16292.2266, device='cuda:0')
episode: 166 training return: tensor(-14401.9775, device='cuda:0')
episode: 167 training return: tensor(-14414.4062, device='cuda:0')
epoch: 42 test_true_pfm: -141.85974325328047
episode: 168 training return: tensor(-19910.6855, device='cuda:0')
episode: 169 training return: tensor(-12490.3779, device='cuda:0')
episode: 170 training return: tensor(-12257.3047, device='cuda:0')
episode: 171 training return: tensor(-13009.3955, device='cuda:0')
epoch: 43 test_true_pfm: -140.6033077474983
episode: 172 training return: tensor(-11366.5840, device='cuda:0')
episode: 173 training return: tensor(-15077.8232, device='cuda:0')
episode: 174 training return: tensor(-16306.7832, device='cuda:0')
episode: 175 training return: tensor(-14660.4678, device='cuda:0')
epoch: 44 test_true_pfm: -154.43678102556407
episode: 176 training return: tensor(-14760.3916, device='cuda:0')
episode: 177 training return: tensor(-21818.9414, device='cuda:0')
episode: 178 training return: tensor(-14878.6289, device='cuda:0')
episode: 179 training return: tensor(-20796.9629, device='cuda:0')
epoch: 45 test_true_pfm: -182.66695559201426
episode: 180 training return: tensor(-18151.9570, device='cuda:0')
episode: 181 training return: tensor(-17893.4414, device='cuda:0')
episode: 182 training return: tensor(-16394.5957, device='cuda:0')
episode: 183 training return: tensor(-16752.5840, device='cuda:0')
epoch: 46 test_true_pfm: -171.46062383246235
episode: 184 training return: tensor(-10100.0732, device='cuda:0')
episode: 185 training return: tensor(-19753.4570, device='cuda:0')
episode: 186 training return: tensor(-18914.3027, device='cuda:0')
episode: 187 training return: tensor(-17049.9492, device='cuda:0')
epoch: 47 test_true_pfm: -182.59233055371237
episode: 188 training return: tensor(-20951.1621, device='cuda:0')
episode: 189 training return: tensor(-15987.4980, device='cuda:0')
episode: 190 training return: tensor(-23687.7930, device='cuda:0')
episode: 191 training return: tensor(-26956.6504, device='cuda:0')
epoch: 48 test_true_pfm: -158.8577557006967
episode: 192 training return: tensor(-21287.1426, device='cuda:0')
episode: 193 training return: tensor(-21806.0742, device='cuda:0')
episode: 194 training return: tensor(-21466.5410, device='cuda:0')
episode: 195 training return: tensor(-23688.5098, device='cuda:0')
epoch: 49 test_true_pfm: -143.45777239370133
episode: 196 training return: tensor(-33231.3984, device='cuda:0')
episode: 197 training return: tensor(-20638.5195, device='cuda:0')
episode: 198 training return: tensor(-19115.9512, device='cuda:0')
episode: 199 training return: tensor(-20929.4395, device='cuda:0')
epoch: 50 test_true_pfm: -136.56974380154622
episode: 200 training return: tensor(-28170.0859, device='cuda:0')
episode: 201 training return: tensor(-22432.7285, device='cuda:0')
episode: 202 training return: tensor(-19419.4434, device='cuda:0')
episode: 203 training return: tensor(-21491.8906, device='cuda:0')
epoch: 51 test_true_pfm: -167.8772684338673
episode: 204 training return: tensor(-27329.4023, device='cuda:0')
episode: 205 training return: tensor(-28204.7188, device='cuda:0')
episode: 206 training return: tensor(-28080.4199, device='cuda:0')
episode: 207 training return: tensor(-34087.1641, device='cuda:0')
epoch: 52 test_true_pfm: -139.83653024296567
episode: 208 training return: tensor(-27600.8496, device='cuda:0')
episode: 209 training return: tensor(-79167.8906, device='cuda:0')
episode: 210 training return: tensor(-37743.8281, device='cuda:0')
episode: 211 training return: tensor(-20811.8105, device='cuda:0')
epoch: 53 test_true_pfm: -142.2253968224799
episode: 212 training return: tensor(-21679.9043, device='cuda:0')
episode: 213 training return: tensor(-35151.5039, device='cuda:0')
episode: 214 training return: tensor(-61579.7578, device='cuda:0')
episode: 215 training return: tensor(-63886.0820, device='cuda:0')
epoch: 54 test_true_pfm: -131.88785734884434
episode: 216 training return: tensor(-49995.6641, device='cuda:0')
episode: 217 training return: tensor(-58676.8828, device='cuda:0')
episode: 218 training return: tensor(-44496.9492, device='cuda:0')
episode: 219 training return: tensor(-39976.6016, device='cuda:0')
epoch: 55 test_true_pfm: -125.10553785147346
episode: 220 training return: tensor(-49519.5117, device='cuda:0')
episode: 221 training return: tensor(-33196.4219, device='cuda:0')
episode: 222 training return: tensor(-38819.9219, device='cuda:0')
episode: 223 training return: tensor(-56428.3008, device='cuda:0')
epoch: 56 test_true_pfm: -169.54357582196283
episode: 224 training return: tensor(-47179.3516, device='cuda:0')
episode: 225 training return: tensor(-37328.5352, device='cuda:0')
episode: 226 training return: tensor(-41717.8750, device='cuda:0')
episode: 227 training return: tensor(-40940.5039, device='cuda:0')
epoch: 57 test_true_pfm: -158.81981141185983
episode: 228 training return: tensor(-43542.8516, device='cuda:0')
episode: 229 training return: tensor(-68339.2422, device='cuda:0')
episode: 230 training return: tensor(-65042.3906, device='cuda:0')
episode: 231 training return: tensor(-51305.8555, device='cuda:0')
epoch: 58 test_true_pfm: -121.53476489491398
episode: 232 training return: tensor(-66189.5859, device='cuda:0')
episode: 233 training return: tensor(-46759.6055, device='cuda:0')
episode: 234 training return: tensor(-38658.2500, device='cuda:0')
episode: 235 training return: tensor(-248461.3906, device='cuda:0')
epoch: 59 test_true_pfm: -173.7031140904885
episode: 236 training return: tensor(-49445.1758, device='cuda:0')
episode: 237 training return: tensor(-46286.4844, device='cuda:0')
episode: 238 training return: tensor(-51811.5156, device='cuda:0')
episode: 239 training return: tensor(-62298.6914, device='cuda:0')
epoch: 60 test_true_pfm: -140.1134642652939
episode: 240 training return: tensor(-51864.4219, device='cuda:0')
episode: 241 training return: tensor(-54398.8945, device='cuda:0')
episode: 242 training return: tensor(-54292.3672, device='cuda:0')
episode: 243 training return: tensor(-46976.1055, device='cuda:0')
epoch: 61 test_true_pfm: -147.62268614100458
episode: 244 training return: tensor(-36292.8867, device='cuda:0')
episode: 245 training return: tensor(-51847.1094, device='cuda:0')
episode: 246 training return: tensor(-47726.4062, device='cuda:0')
episode: 247 training return: tensor(-46650.4766, device='cuda:0')
epoch: 62 test_true_pfm: -162.282850242609
episode: 248 training return: tensor(-45005.3008, device='cuda:0')
episode: 249 training return: tensor(-40570.9102, device='cuda:0')
episode: 250 training return: tensor(-26534.1484, device='cuda:0')
episode: 251 training return: tensor(-31264.1953, device='cuda:0')
epoch: 63 test_true_pfm: -166.7665926572823
episode: 252 training return: tensor(-40469.4570, device='cuda:0')
episode: 253 training return: tensor(-44157.4414, device='cuda:0')
episode: 254 training return: tensor(-49650.7227, device='cuda:0')
episode: 255 training return: tensor(-35242.1758, device='cuda:0')
epoch: 64 test_true_pfm: -131.79376634948497
episode: 256 training return: tensor(-36148.7266, device='cuda:0')
episode: 257 training return: tensor(-42189.9922, device='cuda:0')
episode: 258 training return: tensor(-52945.7422, device='cuda:0')
episode: 259 training return: tensor(-8023.5317, device='cuda:0')
epoch: 65 test_true_pfm: -117.13708666953603
episode: 260 training return: tensor(-80157.1328, device='cuda:0')
episode: 261 training return: tensor(-40733.5195, device='cuda:0')
episode: 262 training return: tensor(-55866.4414, device='cuda:0')
episode: 263 training return: tensor(-46646.5039, device='cuda:0')
epoch: 66 test_true_pfm: -107.08282046106247
episode: 264 training return: tensor(-118622.7109, device='cuda:0')
episode: 265 training return: tensor(-80491.1719, device='cuda:0')
episode: 266 training return: tensor(-36902.1055, device='cuda:0')
episode: 267 training return: tensor(-72048.2031, device='cuda:0')
epoch: 67 test_true_pfm: -102.68080265117074
episode: 268 training return: tensor(-80524.7188, device='cuda:0')
episode: 269 training return: tensor(-53121.8828, device='cuda:0')
episode: 270 training return: tensor(-45068.6211, device='cuda:0')
episode: 271 training return: tensor(-34554.0352, device='cuda:0')
epoch: 68 test_true_pfm: -110.22133399846398
episode: 272 training return: tensor(-23308.7363, device='cuda:0')
episode: 273 training return: tensor(-77494.5234, device='cuda:0')
episode: 274 training return: tensor(-32162.7227, device='cuda:0')
episode: 275 training return: tensor(-65686.2266, device='cuda:0')
epoch: 69 test_true_pfm: -104.713751602924
episode: 276 training return: tensor(-47347.6914, device='cuda:0')
episode: 277 training return: tensor(-60716.6719, device='cuda:0')
episode: 278 training return: tensor(-31225.3789, device='cuda:0')
episode: 279 training return: tensor(-60479.5273, device='cuda:0')
epoch: 70 test_true_pfm: -97.81439396949334
episode: 280 training return: tensor(-50167.8203, device='cuda:0')
episode: 281 training return: tensor(-28957.8086, device='cuda:0')
episode: 282 training return: tensor(-8406.0498, device='cuda:0')
episode: 283 training return: tensor(-82001.6094, device='cuda:0')
epoch: 71 test_true_pfm: -94.05250570704465
episode: 284 training return: tensor(-62408.2773, device='cuda:0')
episode: 285 training return: tensor(-18483.9219, device='cuda:0')
episode: 286 training return: tensor(-57093.2344, device='cuda:0')
episode: 287 training return: tensor(-71748.9375, device='cuda:0')
epoch: 72 test_true_pfm: -95.76295451683858
episode: 288 training return: tensor(-53411.8008, device='cuda:0')
episode: 289 training return: tensor(-81428.8125, device='cuda:0')
episode: 290 training return: tensor(-37969.1484, device='cuda:0')
episode: 291 training return: tensor(-53664.9805, device='cuda:0')
epoch: 73 test_true_pfm: 46.5125218727734
episode: 292 training return: tensor(-56159.9102, device='cuda:0')
episode: 293 training return: tensor(-102901.6016, device='cuda:0')
episode: 294 training return: tensor(-57707.4414, device='cuda:0')
episode: 295 training return: tensor(-635102.2500, device='cuda:0')
epoch: 74 test_true_pfm: -48.15241822343439
episode: 296 training return: tensor(-70966.9531, device='cuda:0')
episode: 297 training return: tensor(-337449.5000, device='cuda:0')
episode: 298 training return: tensor(-116143.3594, device='cuda:0')
episode: 299 training return: tensor(-69588.6797, device='cuda:0')
epoch: 75 test_true_pfm: -41.843536807511896
episode: 300 training return: tensor(-111746.7344, device='cuda:0')
episode: 301 training return: tensor(-58030.3828, device='cuda:0')
episode: 302 training return: tensor(-64436.2578, device='cuda:0')
episode: 303 training return: tensor(-131059.5000, device='cuda:0')
epoch: 76 test_true_pfm: -0.6316949915827857
episode: 304 training return: tensor(-91035.9844, device='cuda:0')
episode: 305 training return: tensor(-110891.0469, device='cuda:0')
episode: 306 training return: tensor(-124355.2031, device='cuda:0')
episode: 307 training return: tensor(-81095.5859, device='cuda:0')
epoch: 77 test_true_pfm: -272.73526424042274
episode: 308 training return: tensor(-151754.6250, device='cuda:0')
episode: 309 training return: tensor(-128901., device='cuda:0')
episode: 310 training return: tensor(-130110.5000, device='cuda:0')
episode: 311 training return: tensor(-173980.7188, device='cuda:0')
epoch: 78 test_true_pfm: -257.789644029833
episode: 312 training return: tensor(-179768.1406, device='cuda:0')
episode: 313 training return: tensor(-77602.0547, device='cuda:0')
episode: 314 training return: tensor(-77121.8281, device='cuda:0')
episode: 315 training return: tensor(-49144.6133, device='cuda:0')
epoch: 79 test_true_pfm: -129.19719115019714
episode: 316 training return: tensor(-27450.5215, device='cuda:0')
episode: 317 training return: tensor(-107861.2500, device='cuda:0')
episode: 318 training return: tensor(-63971.1641, device='cuda:0')
episode: 319 training return: tensor(-97272.8828, device='cuda:0')
epoch: 80 test_true_pfm: -134.77167813480386
episode: 320 training return: tensor(-206953.7500, device='cuda:0')
episode: 321 training return: tensor(-77253.1250, device='cuda:0')
episode: 322 training return: tensor(-113095.9609, device='cuda:0')
episode: 323 training return: tensor(-130268.8359, device='cuda:0')
epoch: 81 test_true_pfm: -74.08486658734353
episode: 324 training return: tensor(-137479.2969, device='cuda:0')
episode: 325 training return: tensor(-171255.8125, device='cuda:0')
episode: 326 training return: tensor(-71216.5625, device='cuda:0')
episode: 327 training return: tensor(-74791.1094, device='cuda:0')
epoch: 82 test_true_pfm: -87.77541204079272
episode: 328 training return: tensor(-47310.5078, device='cuda:0')
episode: 329 training return: tensor(-153064.3906, device='cuda:0')
episode: 330 training return: tensor(-22675.5215, device='cuda:0')
episode: 331 training return: tensor(-78384.4688, device='cuda:0')
epoch: 83 test_true_pfm: -74.21673278871195
episode: 332 training return: tensor(-45053.0117, device='cuda:0')
episode: 333 training return: tensor(-94174.8828, device='cuda:0')
episode: 334 training return: tensor(-148849.8281, device='cuda:0')
episode: 335 training return: tensor(-146843.6719, device='cuda:0')
epoch: 84 test_true_pfm: 57.40483276452795
episode: 336 training return: tensor(-88924.8359, device='cuda:0')
episode: 337 training return: tensor(-234946.9219, device='cuda:0')
episode: 338 training return: tensor(-75267.3438, device='cuda:0')
episode: 339 training return: tensor(-145254.1250, device='cuda:0')
epoch: 85 test_true_pfm: -92.88004478053482
episode: 340 training return: tensor(-61336.9414, device='cuda:0')
episode: 341 training return: tensor(-63955.8828, device='cuda:0')
episode: 342 training return: tensor(-75512.9375, device='cuda:0')
episode: 343 training return: tensor(-105003.4844, device='cuda:0')
epoch: 86 test_true_pfm: 44.34477061105488
episode: 344 training return: tensor(-56204.8242, device='cuda:0')
episode: 345 training return: tensor(-58968.3164, device='cuda:0')
episode: 346 training return: tensor(-109825.0547, device='cuda:0')
episode: 347 training return: tensor(-26155.5801, device='cuda:0')
epoch: 87 test_true_pfm: -66.03836071807756
episode: 348 training return: tensor(-70034.8828, device='cuda:0')
episode: 349 training return: tensor(-109080.4453, device='cuda:0')
episode: 350 training return: tensor(-30180.4629, device='cuda:0')
episode: 351 training return: tensor(-280838.0312, device='cuda:0')
epoch: 88 test_true_pfm: -16.57375692648748
episode: 352 training return: tensor(-61652.5898, device='cuda:0')
episode: 353 training return: tensor(-48787.8789, device='cuda:0')
episode: 354 training return: tensor(-32109.5273, device='cuda:0')
episode: 355 training return: tensor(-75377.9141, device='cuda:0')
epoch: 89 test_true_pfm: -16.488336908608712
episode: 356 training return: tensor(-90114.0781, device='cuda:0')
episode: 357 training return: tensor(-53082.1406, device='cuda:0')
episode: 358 training return: tensor(-44712.5156, device='cuda:0')
episode: 359 training return: tensor(-74023.6016, device='cuda:0')
epoch: 90 test_true_pfm: -54.031066919147285
episode: 360 training return: tensor(-81830.6641, device='cuda:0')
episode: 361 training return: tensor(-71339.7500, device='cuda:0')
episode: 362 training return: tensor(-51522.9023, device='cuda:0')
episode: 363 training return: tensor(-29348.5723, device='cuda:0')
epoch: 91 test_true_pfm: -103.41024522902323
episode: 364 training return: tensor(-40855.0469, device='cuda:0')
episode: 365 training return: tensor(-38885.0664, device='cuda:0')
episode: 366 training return: tensor(-62037.2188, device='cuda:0')
episode: 367 training return: tensor(-76578.9531, device='cuda:0')
epoch: 92 test_true_pfm: -48.421020134349256
episode: 368 training return: tensor(-46760.0273, device='cuda:0')
episode: 369 training return: tensor(-64990.9766, device='cuda:0')
episode: 370 training return: tensor(-35919.6367, device='cuda:0')
episode: 371 training return: tensor(-42452.6797, device='cuda:0')
epoch: 93 test_true_pfm: -133.0121354028733
episode: 372 training return: tensor(-18944.1270, device='cuda:0')
episode: 373 training return: tensor(-18875.8594, device='cuda:0')
episode: 374 training return: tensor(-21108.0312, device='cuda:0')
episode: 375 training return: tensor(-20682.4492, device='cuda:0')
epoch: 94 test_true_pfm: -133.75839711650312
episode: 376 training return: tensor(-7274.7100, device='cuda:0')
episode: 377 training return: tensor(-21150.2793, device='cuda:0')
episode: 378 training return: tensor(-10918.5322, device='cuda:0')
episode: 379 training return: tensor(-22325.1504, device='cuda:0')
epoch: 95 test_true_pfm: -107.80844559697148
episode: 380 training return: tensor(-24735.5332, device='cuda:0')
episode: 381 training return: tensor(-56813.7500, device='cuda:0')
episode: 382 training return: tensor(-34184.1602, device='cuda:0')
episode: 383 training return: tensor(-25347.1230, device='cuda:0')
epoch: 96 test_true_pfm: -42.63325739273435
episode: 384 training return: tensor(-30943.8438, device='cuda:0')
episode: 385 training return: tensor(-15666.1025, device='cuda:0')
episode: 386 training return: tensor(-41105.2930, device='cuda:0')
episode: 387 training return: tensor(-14792.8633, device='cuda:0')
epoch: 97 test_true_pfm: -22.93731161644889
episode: 388 training return: tensor(-23852.2383, device='cuda:0')
episode: 389 training return: tensor(-49664.9961, device='cuda:0')
episode: 390 training return: tensor(-14975.4951, device='cuda:0')
episode: 391 training return: tensor(-15420.3750, device='cuda:0')
epoch: 98 test_true_pfm: -29.613472684212116
episode: 392 training return: tensor(-13877.5732, device='cuda:0')
episode: 393 training return: tensor(-27273.7578, device='cuda:0')
episode: 394 training return: tensor(-15569.0410, device='cuda:0')
episode: 395 training return: tensor(-17177.7285, device='cuda:0')
epoch: 99 test_true_pfm: 10.151935148953687
episode: 396 training return: tensor(-12029.2402, device='cuda:0')
episode: 397 training return: tensor(-18597.4570, device='cuda:0')
episode: 398 training return: tensor(-19435.1152, device='cuda:0')
episode: 399 training return: tensor(-21862.8223, device='cuda:0')
epoch: 100 test_true_pfm: 13.157213658154769
episode: 400 training return: tensor(-24495.9512, device='cuda:0')
episode: 401 training return: tensor(-38546.6016, device='cuda:0')
episode: 402 training return: tensor(-218427.7031, device='cuda:0')
episode: 403 training return: tensor(-34259.5430, device='cuda:0')
epoch: 101 test_true_pfm: -112.31875254113218
episode: 404 training return: tensor(-16760.4336, device='cuda:0')
episode: 405 training return: tensor(-16537.8125, device='cuda:0')
episode: 406 training return: tensor(-24013.8711, device='cuda:0')
episode: 407 training return: tensor(-20655.9961, device='cuda:0')
epoch: 102 test_true_pfm: -96.83346061810029
episode: 408 training return: tensor(-45394.0430, device='cuda:0')
episode: 409 training return: tensor(-19996.5859, device='cuda:0')
episode: 410 training return: tensor(-19404.7070, device='cuda:0')
episode: 411 training return: tensor(-19366.4043, device='cuda:0')
epoch: 103 test_true_pfm: -18.240742345966602
episode: 412 training return: tensor(-25563.7910, device='cuda:0')
episode: 413 training return: tensor(-20347.8789, device='cuda:0')
episode: 414 training return: tensor(-49349.9766, device='cuda:0')
episode: 415 training return: tensor(-29463.0176, device='cuda:0')
epoch: 104 test_true_pfm: 14.428657258153878
episode: 416 training return: tensor(-43451.0078, device='cuda:0')
episode: 417 training return: tensor(-15055.1514, device='cuda:0')
episode: 418 training return: tensor(-23647.1582, device='cuda:0')
episode: 419 training return: tensor(-18122.1074, device='cuda:0')
epoch: 105 test_true_pfm: -26.4898785301832
episode: 420 training return: tensor(-26847.1895, device='cuda:0')
episode: 421 training return: tensor(-16613.7363, device='cuda:0')
episode: 422 training return: tensor(-17621.2559, device='cuda:0')
episode: 423 training return: tensor(-20107.8691, device='cuda:0')
epoch: 106 test_true_pfm: -2.540585937441478
episode: 424 training return: tensor(-13529.6650, device='cuda:0')
episode: 425 training return: tensor(-24313.7891, device='cuda:0')
episode: 426 training return: tensor(-17863.0996, device='cuda:0')
episode: 427 training return: tensor(-17859.6543, device='cuda:0')
epoch: 107 test_true_pfm: -32.27093257012053
episode: 428 training return: tensor(-19432.9590, device='cuda:0')
episode: 429 training return: tensor(-12401.2344, device='cuda:0')
episode: 430 training return: tensor(-17357.1875, device='cuda:0')
episode: 431 training return: tensor(-12698.2529, device='cuda:0')
epoch: 108 test_true_pfm: -66.65048765290629
episode: 432 training return: tensor(-13380.7451, device='cuda:0')
episode: 433 training return: tensor(-43389.6445, device='cuda:0')
episode: 434 training return: tensor(-18131.2617, device='cuda:0')
episode: 435 training return: tensor(-8292.5811, device='cuda:0')
epoch: 109 test_true_pfm: -96.52380983370699
episode: 436 training return: tensor(-12849.1885, device='cuda:0')
episode: 437 training return: tensor(-4725.1411, device='cuda:0')
episode: 438 training return: tensor(-10491.2646, device='cuda:0')
episode: 439 training return: tensor(-11534.4414, device='cuda:0')
epoch: 110 test_true_pfm: -124.49537346978819
episode: 440 training return: tensor(-14826.9229, device='cuda:0')
episode: 441 training return: tensor(-24183.8105, device='cuda:0')
episode: 442 training return: tensor(-35275.2227, device='cuda:0')
episode: 443 training return: tensor(-21178.8145, device='cuda:0')
epoch: 111 test_true_pfm: 3.1317407150139496
episode: 444 training return: tensor(-15039.9395, device='cuda:0')
episode: 445 training return: tensor(-8765.2979, device='cuda:0')
episode: 446 training return: tensor(-25321.0703, device='cuda:0')
episode: 447 training return: tensor(-37888.7070, device='cuda:0')
epoch: 112 test_true_pfm: -236.63571757500847
episode: 448 training return: tensor(-25807.0078, device='cuda:0')
episode: 449 training return: tensor(-28600.3340, device='cuda:0')
episode: 450 training return: tensor(-35163.7656, device='cuda:0')
episode: 451 training return: tensor(-32396.7168, device='cuda:0')
epoch: 113 test_true_pfm: -131.37733676468528
episode: 452 training return: tensor(-34341.0039, device='cuda:0')
episode: 453 training return: tensor(-32957.0898, device='cuda:0')
episode: 454 training return: tensor(-24719.3066, device='cuda:0')
episode: 455 training return: tensor(-7947.1577, device='cuda:0')
epoch: 114 test_true_pfm: 48.47624910541729
episode: 456 training return: tensor(-7288.0371, device='cuda:0')
episode: 457 training return: tensor(-7814.4512, device='cuda:0')
episode: 458 training return: tensor(-7530.1289, device='cuda:0')
episode: 459 training return: tensor(-7677.6934, device='cuda:0')
epoch: 115 test_true_pfm: -36.52316205598845
episode: 460 training return: tensor(-8149.7329, device='cuda:0')
episode: 461 training return: tensor(-9717.7637, device='cuda:0')
episode: 462 training return: tensor(-11903.7705, device='cuda:0')
episode: 463 training return: tensor(-13641.6836, device='cuda:0')
epoch: 116 test_true_pfm: -41.423337341796234
episode: 464 training return: tensor(-14573.6904, device='cuda:0')
episode: 465 training return: tensor(-13684.7246, device='cuda:0')
episode: 466 training return: tensor(-13795.8887, device='cuda:0')
episode: 467 training return: tensor(-13523.3809, device='cuda:0')
epoch: 117 test_true_pfm: -93.43312854169805
episode: 468 training return: tensor(-13457.8359, device='cuda:0')
episode: 469 training return: tensor(-13714.1602, device='cuda:0')
episode: 470 training return: tensor(-14787.7188, device='cuda:0')
episode: 471 training return: tensor(-18685.6777, device='cuda:0')
epoch: 118 test_true_pfm: -73.08517078602854
episode: 472 training return: tensor(-24227.7617, device='cuda:0')
episode: 473 training return: tensor(-17858.0645, device='cuda:0')
episode: 474 training return: tensor(-15778.1260, device='cuda:0')
episode: 475 training return: tensor(-17965.1113, device='cuda:0')
epoch: 119 test_true_pfm: -121.46510368819541
episode: 476 training return: tensor(-28215.9863, device='cuda:0')
episode: 477 training return: tensor(-24400.1953, device='cuda:0')
episode: 478 training return: tensor(-24849.1094, device='cuda:0')
episode: 479 training return: tensor(-20070.3887, device='cuda:0')
epoch: 120 test_true_pfm: -76.85392795378114
episode: 480 training return: tensor(-33405.4648, device='cuda:0')
episode: 481 training return: tensor(-29790.4863, device='cuda:0')
episode: 482 training return: tensor(-27417.5332, device='cuda:0')
episode: 483 training return: tensor(-25027.9629, device='cuda:0')
epoch: 121 test_true_pfm: -35.70550103050267
episode: 484 training return: tensor(-32042.8789, device='cuda:0')
episode: 485 training return: tensor(-29656.0234, device='cuda:0')
episode: 486 training return: tensor(-24997.0117, device='cuda:0')
episode: 487 training return: tensor(-31240.4727, device='cuda:0')
epoch: 122 test_true_pfm: 67.96424612956997
episode: 488 training return: tensor(-31515.0508, device='cuda:0')
episode: 489 training return: tensor(-31978.5039, device='cuda:0')
episode: 490 training return: tensor(-13382.8555, device='cuda:0')
episode: 491 training return: tensor(-21265.5840, device='cuda:0')
epoch: 123 test_true_pfm: -107.82983768523327
episode: 492 training return: tensor(-19514.8613, device='cuda:0')
episode: 493 training return: tensor(-14370.1211, device='cuda:0')
episode: 494 training return: tensor(-31923.0547, device='cuda:0')
episode: 495 training return: tensor(-36890.2383, device='cuda:0')
epoch: 124 test_true_pfm: 114.5594919546099
episode: 496 training return: tensor(-29690.0840, device='cuda:0')
episode: 497 training return: tensor(-35999.4805, device='cuda:0')
episode: 498 training return: tensor(-23261.3516, device='cuda:0')
episode: 499 training return: tensor(-24283.8789, device='cuda:0')
epoch: 125 test_true_pfm: -80.02215417042264
episode: 500 training return: tensor(-22513.1035, device='cuda:0')
episode: 501 training return: tensor(-30846.8105, device='cuda:0')
episode: 502 training return: tensor(-22113.7656, device='cuda:0')
episode: 503 training return: tensor(-19022.5723, device='cuda:0')
epoch: 126 test_true_pfm: -77.8740318152068
episode: 504 training return: tensor(-8905.5840, device='cuda:0')
episode: 505 training return: tensor(-9669.5732, device='cuda:0')
episode: 506 training return: tensor(-10585.6396, device='cuda:0')
episode: 507 training return: tensor(-9581.2822, device='cuda:0')
epoch: 127 test_true_pfm: -167.5779962933928
episode: 508 training return: tensor(-10142.3574, device='cuda:0')
episode: 509 training return: tensor(-8263.7002, device='cuda:0')
episode: 510 training return: tensor(-11920.4365, device='cuda:0')
episode: 511 training return: tensor(-13617.5029, device='cuda:0')
epoch: 128 test_true_pfm: -163.42532339294206
episode: 512 training return: tensor(-9034.9492, device='cuda:0')
episode: 513 training return: tensor(-11365.5342, device='cuda:0')
episode: 514 training return: tensor(-8537.0234, device='cuda:0')
episode: 515 training return: tensor(-12092.6094, device='cuda:0')
epoch: 129 test_true_pfm: -169.0373811545209
episode: 516 training return: tensor(-18954.6660, device='cuda:0')
episode: 517 training return: tensor(-1098514.5000, device='cuda:0')
episode: 518 training return: tensor(-14799.4307, device='cuda:0')
episode: 519 training return: tensor(-17264.9219, device='cuda:0')
epoch: 130 test_true_pfm: -105.50485316508967
episode: 520 training return: tensor(-18236.7227, device='cuda:0')
episode: 521 training return: tensor(-10073.7695, device='cuda:0')
episode: 522 training return: tensor(-16297.6455, device='cuda:0')
episode: 523 training return: tensor(-9697.1543, device='cuda:0')
epoch: 131 test_true_pfm: -152.3455792018558
episode: 524 training return: tensor(-10314.8604, device='cuda:0')
episode: 525 training return: tensor(-17828.2715, device='cuda:0')
episode: 526 training return: tensor(-14143.8486, device='cuda:0')
episode: 527 training return: tensor(-25861.8203, device='cuda:0')
epoch: 132 test_true_pfm: -115.20087426416531
episode: 528 training return: tensor(-18027.8906, device='cuda:0')
episode: 529 training return: tensor(-12396.3740, device='cuda:0')
episode: 530 training return: tensor(-12700.1699, device='cuda:0')
episode: 531 training return: tensor(-22306.1523, device='cuda:0')
epoch: 133 test_true_pfm: -112.07610795674442
episode: 532 training return: tensor(-19576.7773, device='cuda:0')
episode: 533 training return: tensor(-15192.9287, device='cuda:0')
episode: 534 training return: tensor(-13511.4844, device='cuda:0')
episode: 535 training return: tensor(-18197.4004, device='cuda:0')
epoch: 134 test_true_pfm: -119.47898557619607
episode: 536 training return: tensor(-16794.1973, device='cuda:0')
episode: 537 training return: tensor(-17424.3359, device='cuda:0')
episode: 538 training return: tensor(-12333.3652, device='cuda:0')
episode: 539 training return: tensor(-22739.9980, device='cuda:0')
epoch: 135 test_true_pfm: -175.9071257888323
episode: 540 training return: tensor(-12226.7754, device='cuda:0')
episode: 541 training return: tensor(-11218.9014, device='cuda:0')
episode: 542 training return: tensor(-12830.8994, device='cuda:0')
episode: 543 training return: tensor(-12207.5635, device='cuda:0')
epoch: 136 test_true_pfm: -151.27035211165312
episode: 544 training return: tensor(-14731.5049, device='cuda:0')
episode: 545 training return: tensor(-9100.0635, device='cuda:0')
episode: 546 training return: tensor(-9537.3389, device='cuda:0')
episode: 547 training return: tensor(-8953.9580, device='cuda:0')
epoch: 137 test_true_pfm: -136.75531682144347
episode: 548 training return: tensor(-9865.9541, device='cuda:0')
episode: 549 training return: tensor(-11995.7061, device='cuda:0')
episode: 550 training return: tensor(-14958.0205, device='cuda:0')
episode: 551 training return: tensor(-12835.2207, device='cuda:0')
epoch: 138 test_true_pfm: -181.96388261654
episode: 552 training return: tensor(-12086.4619, device='cuda:0')
episode: 553 training return: tensor(-13151.2705, device='cuda:0')
episode: 554 training return: tensor(-11915.3154, device='cuda:0')
episode: 555 training return: tensor(-14382.7656, device='cuda:0')
epoch: 139 test_true_pfm: -136.42234100381728
episode: 556 training return: tensor(-12971.6123, device='cuda:0')
episode: 557 training return: tensor(-11699.4883, device='cuda:0')
episode: 558 training return: tensor(-26315.4688, device='cuda:0')
episode: 559 training return: tensor(-16150.5400, device='cuda:0')
epoch: 140 test_true_pfm: -136.8031815868787
episode: 560 training return: tensor(-21727.1562, device='cuda:0')
episode: 561 training return: tensor(-14625.1289, device='cuda:0')
episode: 562 training return: tensor(-19926.1230, device='cuda:0')
episode: 563 training return: tensor(-18783.2500, device='cuda:0')
epoch: 141 test_true_pfm: -113.42847065138382
episode: 564 training return: tensor(-25252.8613, device='cuda:0')
episode: 565 training return: tensor(-20763.6875, device='cuda:0')
episode: 566 training return: tensor(-16705.2363, device='cuda:0')
episode: 567 training return: tensor(-12447.7646, device='cuda:0')
epoch: 142 test_true_pfm: -163.94490635378804
episode: 568 training return: tensor(-20115.3594, device='cuda:0')
episode: 569 training return: tensor(-12892.4639, device='cuda:0')
episode: 570 training return: tensor(-14370.3799, device='cuda:0')
episode: 571 training return: tensor(-14529.2354, device='cuda:0')
epoch: 143 test_true_pfm: -165.79341760510223
episode: 572 training return: tensor(-15121.4297, device='cuda:0')
episode: 573 training return: tensor(-15290.0195, device='cuda:0')
episode: 574 training return: tensor(-9487.5898, device='cuda:0')
episode: 575 training return: tensor(-15867.0234, device='cuda:0')
epoch: 144 test_true_pfm: -169.74870028077527
episode: 576 training return: tensor(-14282.4697, device='cuda:0')
episode: 577 training return: tensor(-18556.4902, device='cuda:0')
episode: 578 training return: tensor(-16060.4590, device='cuda:0')
episode: 579 training return: tensor(-16905.8770, device='cuda:0')
epoch: 145 test_true_pfm: -99.2128685312636
episode: 580 training return: tensor(-22122.2812, device='cuda:0')
episode: 581 training return: tensor(-15285.8193, device='cuda:0')
episode: 582 training return: tensor(-17203.9668, device='cuda:0')
episode: 583 training return: tensor(-21086.1504, device='cuda:0')
epoch: 146 test_true_pfm: 757.4359485011927
episode: 584 training return: tensor(-29869.3027, device='cuda:0')
episode: 585 training return: tensor(-32491.6172, device='cuda:0')
episode: 586 training return: tensor(-35445.3359, device='cuda:0')
episode: 587 training return: tensor(-19618.9688, device='cuda:0')
epoch: 147 test_true_pfm: -193.4010112777116
episode: 588 training return: tensor(-43129.4766, device='cuda:0')
episode: 589 training return: tensor(-18230.5176, device='cuda:0')
episode: 590 training return: tensor(-23247.5293, device='cuda:0')
episode: 591 training return: tensor(-16703.1465, device='cuda:0')
epoch: 148 test_true_pfm: -186.14282310038928
episode: 592 training return: tensor(-13088.6084, device='cuda:0')
episode: 593 training return: tensor(-10429.1104, device='cuda:0')
episode: 594 training return: tensor(-11476.0996, device='cuda:0')
episode: 595 training return: tensor(-32322.7207, device='cuda:0')
epoch: 149 test_true_pfm: -161.42746255363133
episode: 596 training return: tensor(-190499.2969, device='cuda:0')
episode: 597 training return: tensor(-326797.2500, device='cuda:0')
episode: 598 training return: tensor(-156634.7188, device='cuda:0')
episode: 599 training return: tensor(-990511.7500, device='cuda:0')
epoch: 150 test_true_pfm: 201.6733910127376
