['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'brac', '--traj', 'expert', '--seed', '1', '--data', '100000']
epoch: 0 training_loss 0.7023789929691702 test_loss: -1.0806273460388183
epoch: 1 training_loss -1.8254399847984315 test_loss: -2.730327033996582
epoch: 2 training_loss -3.4912135004997253 test_loss: -4.071864318847656
epoch: 3 training_loss -4.380601134300232 test_loss: -4.679042816162109
epoch: 4 training_loss -4.967156782150268 test_loss: -5.091933059692383
epoch: 5 training_loss -5.294662113189697 test_loss: -5.453013610839844
epoch: 6 training_loss -5.597157545089722 test_loss: -5.5864604949951175
epoch: 7 training_loss -5.892120575904846 test_loss: -5.994953155517578
epoch: 8 training_loss -6.051219921112061 test_loss: -6.0822303771972654
epoch: 9 training_loss -6.252418942451477 test_loss: -6.228068542480469
epoch: 10 training_loss -6.328698296546936 test_loss: -6.371898651123047
epoch: 11 training_loss -6.461549601554871 test_loss: -6.47601547241211
epoch: 12 training_loss -6.53677387714386 test_loss: -6.597383117675781
epoch: 13 training_loss -6.641907234191894 test_loss: -6.639011383056641
epoch: 14 training_loss -6.70878270149231 test_loss: -6.594657897949219
epoch: 15 training_loss -6.72836094379425 test_loss: -6.666020202636719
epoch: 16 training_loss -6.8389252138137815 test_loss: -6.7787925720214846
epoch: 17 training_loss -6.806420021057129 test_loss: -6.821554565429688
epoch: 18 training_loss -6.924824151992798 test_loss: -6.939545440673828
epoch: 19 training_loss -6.9461659240722655 test_loss: -6.946269226074219
epoch: 20 training_loss -6.967825160026551 test_loss: -6.943585968017578
epoch: 21 training_loss -7.008011846542359 test_loss: -6.998808288574219
epoch: 22 training_loss -7.08701449394226 test_loss: -6.930612182617187
epoch: 23 training_loss -7.034422307014466 test_loss: -7.043315887451172
epoch: 24 training_loss -7.122144947052002 test_loss: -7.163809204101563
epoch: 25 training_loss -7.159434037208557 test_loss: -7.1700386047363285
epoch: 26 training_loss -7.131460514068603 test_loss: -7.098429107666016
epoch: 27 training_loss -7.164245133399963 test_loss: -7.179556274414063
epoch: 28 training_loss -7.215366387367249 test_loss: -7.211707305908203
epoch: 29 training_loss -7.241484303474426 test_loss: -7.247225952148438
epoch: 30 training_loss -7.2578168392181395 test_loss: -7.330439758300781
epoch: 31 training_loss -7.306785078048706 test_loss: -7.301898193359375
epoch: 32 training_loss -7.320831336975098 test_loss: -7.312164306640625
epoch: 33 training_loss -7.3438350248336794 test_loss: -7.2496498107910154
epoch: 34 training_loss -7.314213237762451 test_loss: -7.388929748535157
epoch: 35 training_loss -7.363624787330627 test_loss: -7.373478698730469
epoch: 36 training_loss -7.392527203559876 test_loss: -7.384886932373047
epoch: 37 training_loss -7.398035697937011 test_loss: -7.4892120361328125
epoch: 38 training_loss -7.455381588935852 test_loss: -7.4812873840332035
epoch: 39 training_loss -7.3977468299865725 test_loss: -7.310153198242188
epoch: 40 training_loss -7.4445115756988525 test_loss: -7.469561004638672
epoch: 41 training_loss -7.466187291145324 test_loss: -7.4459228515625
epoch: 42 training_loss -7.490957808494568 test_loss: -7.449422454833984
epoch: 43 training_loss -7.489803004264831 test_loss: -7.53023681640625
epoch: 44 training_loss -7.528433947563172 test_loss: -7.48101577758789
epoch: 45 training_loss -7.5244933080673215 test_loss: -7.553081512451172
epoch: 46 training_loss -7.56051155090332 test_loss: -7.614576721191407
epoch: 47 training_loss -7.557397909164429 test_loss: -7.523644256591797
epoch: 48 training_loss -7.588449010848999 test_loss: -7.521353912353516
epoch: 49 training_loss -7.57437273979187 test_loss: -7.661502838134766
epoch: 50 training_loss -7.5942691755294796 test_loss: -7.600025939941406
epoch: 51 training_loss -7.594028840065002 test_loss: -7.552191162109375
epoch: 52 training_loss -7.622958397865295 test_loss: -7.636875152587891
epoch: 53 training_loss -7.657136168479919 test_loss: -7.580174255371094
epoch: 54 training_loss -7.652572441101074 test_loss: -7.714487457275391
epoch: 55 training_loss -7.655657310485839 test_loss: -7.581139373779297
epoch: 56 training_loss -7.605548691749573 test_loss: -7.608528137207031
epoch: 57 training_loss -7.700991640090942 test_loss: -7.650730895996094
epoch: 58 training_loss -7.708253812789917 test_loss: -7.792101287841797
epoch: 59 training_loss -7.711432018280029 test_loss: -7.673800659179688
epoch: 60 training_loss -7.698878488540649 test_loss: -7.801830291748047
epoch: 61 training_loss -7.7297908353805544 test_loss: -7.777980804443359
epoch: 62 training_loss -7.738543591499329 test_loss: -7.694347381591797
epoch: 63 training_loss -7.740207943916321 test_loss: -7.704591369628906
epoch: 64 training_loss -7.766235904693604 test_loss: -7.7677757263183596
epoch: 65 training_loss -7.751908431053161 test_loss: -7.789127349853516
epoch: 66 training_loss -7.752814130783081 test_loss: -7.832009124755859
epoch: 67 training_loss -7.79832097530365 test_loss: -7.818767547607422
epoch: 68 training_loss -7.796319918632507 test_loss: -7.8171531677246096
epoch: 69 training_loss -7.819486970901489 test_loss: -7.792137145996094
epoch: 70 training_loss -7.816500024795532 test_loss: -7.799128723144531
epoch: 71 training_loss -7.797670950889588 test_loss: -7.8108573913574215
epoch: 72 training_loss -7.811154799461365 test_loss: -7.775981903076172
epoch: 73 training_loss -7.834005479812622 test_loss: -7.7523345947265625
epoch: 74 training_loss -7.816145315170288 test_loss: -7.87471923828125
epoch: 75 training_loss -7.853803973197937 test_loss: -7.785453796386719
epoch: 76 training_loss -7.8479866743087765 test_loss: -7.858648681640625
epoch: 77 training_loss -7.850704860687256 test_loss: -7.844401550292969
epoch: 78 training_loss -7.86817877292633 test_loss: -7.87835693359375
epoch: 79 training_loss -7.888147406578064 test_loss: -7.928643035888672
epoch: 80 training_loss -7.872497067451477 test_loss: -7.930674743652344
epoch: 81 training_loss -7.906221499443054 test_loss: -7.8810272216796875
epoch: 82 training_loss -7.8869053173065184 test_loss: -7.910437774658203
epoch: 83 training_loss -7.897884759902954 test_loss: -7.915540313720703
epoch: 84 training_loss -7.922372522354126 test_loss: -7.964897918701172
epoch: 85 training_loss -7.947332220077515 test_loss: -7.969197845458984
epoch: 86 training_loss -7.9090038013458255 test_loss: -7.911553192138672
epoch: 87 training_loss -7.905869536399841 test_loss: -7.902928161621094
epoch: 88 training_loss -7.9526486444473266 test_loss: -7.9339347839355465
epoch: 89 training_loss -7.978591537475586 test_loss: -7.9986717224121096
epoch: 90 training_loss -7.9261468362808225 test_loss: -7.799442291259766
epoch: 91 training_loss -7.936317772865295 test_loss: -7.959342956542969
epoch: 92 training_loss -7.98250376701355 test_loss: -8.017940521240234
epoch: 93 training_loss -7.972200345993042 test_loss: -7.968122100830078
epoch: 94 training_loss -7.970219593048096 test_loss: -8.00228271484375
epoch: 95 training_loss -7.963690481185913 test_loss: -7.9725898742675785
epoch: 96 training_loss -7.984251642227173 test_loss: -8.012149810791016
epoch: 97 training_loss -7.977112383842468 test_loss: -8.002855682373047
epoch: 98 training_loss -8.00248718738556 test_loss: -7.942207336425781
epoch: 99 training_loss -8.032086200714112 test_loss: -8.065975189208984
epoch: 100 training_loss -8.021880960464477 test_loss: -7.9807281494140625
epoch: 101 training_loss -7.98474591255188 test_loss: -8.035773468017577
epoch: 102 training_loss -8.017014074325562 test_loss: -8.015819549560547
epoch: 103 training_loss -8.028683156967164 test_loss: -7.996951293945313
epoch: 104 training_loss -8.034984951019288 test_loss: -7.984164428710938
epoch: 105 training_loss -8.025941381454468 test_loss: -8.018108367919922
epoch: 106 training_loss -8.041563167572022 test_loss: -8.036180114746093
epoch: 107 training_loss -8.044783215522767 test_loss: -8.005956268310547
epoch: 108 training_loss -8.03434965133667 test_loss: -8.058522033691407
epoch: 109 training_loss -8.060308599472046 test_loss: -8.005250549316406
epoch: 110 training_loss -8.047137374877929 test_loss: -7.9875640869140625
epoch: 111 training_loss -8.054688997268677 test_loss: -8.102265930175781
epoch: 112 training_loss -8.051091303825379 test_loss: -8.079094696044923
epoch: 113 training_loss -8.084660787582397 test_loss: -8.086634826660156
epoch: 114 training_loss -8.061298885345458 test_loss: -8.032701873779297
epoch: 115 training_loss -8.018684177398681 test_loss: -8.119335174560547
epoch: 116 training_loss -8.113238129615784 test_loss: -7.956146240234375
epoch: 117 training_loss -8.092700381278991 test_loss: -8.08734588623047
epoch: 118 training_loss -8.094237909317016 test_loss: -8.036253356933594
epoch: 119 training_loss -8.091177678108215 test_loss: -8.105367279052734
epoch: 120 training_loss -8.098075776100158 test_loss: -8.193080139160156
epoch: 121 training_loss -8.123795242309571 test_loss: -8.076842498779296
epoch: 122 training_loss -8.087257113456726 test_loss: -8.158750915527344
epoch: 123 training_loss -8.11597580909729 test_loss: -8.128225708007813
epoch: 124 training_loss -8.137034583091737 test_loss: -8.100228881835937
epoch: 125 training_loss -8.115941953659057 test_loss: -8.055559539794922
epoch: 126 training_loss -8.08551745414734 test_loss: -8.086049652099609
epoch: 127 training_loss -8.104110112190247 test_loss: -8.153514862060547
epoch: 128 training_loss -8.14423086643219 test_loss: -8.115003967285157
epoch: 129 training_loss -8.11111949443817 test_loss: -8.073748016357422
epoch: 130 training_loss -8.122621788978577 test_loss: -8.102132415771484
epoch: 131 training_loss -8.135998930931091 test_loss: -8.087714385986327
epoch: 132 training_loss -8.123918700218201 test_loss: -8.083283233642579
epoch: 133 training_loss -8.139423241615296 test_loss: -8.050523376464843
epoch: 134 training_loss -8.107026524543762 test_loss: -8.171209716796875
epoch: 135 training_loss -8.128763966560363 test_loss: -8.208268737792968
epoch: 136 training_loss -8.15181465625763 test_loss: -8.176077270507813
epoch: 137 training_loss -8.158645133972168 test_loss: -8.13009262084961
epoch: 138 training_loss -8.178429145812988 test_loss: -8.171893310546874
epoch: 139 training_loss -8.164040060043336 test_loss: -8.214319610595703
epoch: 140 training_loss -8.157265558242798 test_loss: -8.182707214355469
epoch: 141 training_loss -8.185579495429993 test_loss: -8.169358062744141
epoch: 142 training_loss -8.181451444625855 test_loss: -8.200627899169922
epoch: 143 training_loss -8.137968244552612 test_loss: -8.159087371826171
epoch: 144 training_loss -8.170968327522278 test_loss: -8.20083999633789
epoch: 145 training_loss -8.165182890892028 test_loss: -8.18334503173828
epoch: 146 training_loss -8.184092116355895 test_loss: -8.145417022705079
epoch: 147 training_loss -8.13436641216278 test_loss: -8.184503936767578
epoch: 148 training_loss -8.203080945014953 test_loss: -8.23522720336914
epoch: 149 training_loss -8.215158171653748 test_loss: -8.201134490966798
1017.2842113682302
episode: 0 training return: tensor(-3.4115e+08, device='cuda:0')
episode: 1 training return: tensor(-1.2976e+08, device='cuda:0')
episode: 2 training return: tensor(-17516578., device='cuda:0')
episode: 3 training return: tensor(-1.0480e+08, device='cuda:0')
epoch: 1 test_true_pfm: -94.85585717799978
episode: 4 training return: tensor(-98523864., device='cuda:0')
episode: 5 training return: tensor(-2191148., device='cuda:0')
episode: 6 training return: tensor(-1388273.3750, device='cuda:0')
episode: 7 training return: tensor(-27613248., device='cuda:0')
epoch: 2 test_true_pfm: -27.28769159154385
episode: 8 training return: tensor(-67116120., device='cuda:0')
episode: 9 training return: tensor(-7744352., device='cuda:0')
episode: 10 training return: tensor(-2.7674e+10, device='cuda:0')
episode: 11 training return: tensor(-22574.6719, device='cuda:0')
epoch: 3 test_true_pfm: 96.8783596990537
episode: 12 training return: tensor(-22429.6641, device='cuda:0')
episode: 13 training return: tensor(-15464.6484, device='cuda:0')
episode: 14 training return: tensor(-20763.6367, device='cuda:0')
episode: 15 training return: tensor(-23822.2891, device='cuda:0')
epoch: 4 test_true_pfm: 92.51771633657984
episode: 16 training return: tensor(-24776.9980, device='cuda:0')
episode: 17 training return: tensor(-5401095., device='cuda:0')
episode: 18 training return: tensor(-13237756., device='cuda:0')
episode: 19 training return: tensor(-14364514., device='cuda:0')
epoch: 5 test_true_pfm: 158.32916234748805
episode: 20 training return: tensor(-16099830., device='cuda:0')
episode: 21 training return: tensor(-23328762., device='cuda:0')
episode: 22 training return: tensor(-12156020., device='cuda:0')
episode: 23 training return: tensor(-5408127., device='cuda:0')
epoch: 6 test_true_pfm: -18.582005406555496
episode: 24 training return: tensor(-3.9080e+08, device='cuda:0')
episode: 25 training return: tensor(-1444371.7500, device='cuda:0')
episode: 26 training return: tensor(-4914571.5000, device='cuda:0')
episode: 27 training return: tensor(-5.2874e+11, device='cuda:0')
epoch: 7 test_true_pfm: 358.380229864762
episode: 28 training return: tensor(-10078559., device='cuda:0')
episode: 29 training return: tensor(-12396615., device='cuda:0')
episode: 30 training return: tensor(-8079519., device='cuda:0')
episode: 31 training return: tensor(-4.4689e+10, device='cuda:0')
epoch: 8 test_true_pfm: -3.683679327547926
episode: 32 training return: tensor(-9.0123e+08, device='cuda:0')
episode: 33 training return: tensor(-1.4170e+12, device='cuda:0')
episode: 34 training return: tensor(-9.3034e+08, device='cuda:0')
episode: 35 training return: tensor(-38590896., device='cuda:0')
epoch: 9 test_true_pfm: -37.77171914425942
episode: 36 training return: tensor(-856519., device='cuda:0')
episode: 37 training return: tensor(-62589020., device='cuda:0')
episode: 38 training return: tensor(-16696560., device='cuda:0')
episode: 39 training return: tensor(-9986773., device='cuda:0')
epoch: 10 test_true_pfm: 150.8969300990518
episode: 40 training return: tensor(-2831512.2500, device='cuda:0')
episode: 41 training return: tensor(-1570883.2500, device='cuda:0')
episode: 42 training return: tensor(-213939.2500, device='cuda:0')
episode: 43 training return: tensor(-13288.1406, device='cuda:0')
epoch: 11 test_true_pfm: 184.2051358221256
episode: 44 training return: tensor(-6186.5845, device='cuda:0')
episode: 45 training return: tensor(-4577.2065, device='cuda:0')
episode: 46 training return: tensor(-3296.0381, device='cuda:0')
episode: 47 training return: tensor(-3219.4963, device='cuda:0')
epoch: 12 test_true_pfm: 232.83136981463974
episode: 48 training return: tensor(-3175.7834, device='cuda:0')
episode: 49 training return: tensor(-3038.1462, device='cuda:0')
episode: 50 training return: tensor(-3040.5947, device='cuda:0')
episode: 51 training return: tensor(-3153.7385, device='cuda:0')
epoch: 13 test_true_pfm: 217.87418912240275
episode: 52 training return: tensor(-3116.9458, device='cuda:0')
episode: 53 training return: tensor(-3120.2976, device='cuda:0')
episode: 54 training return: tensor(-3282.5078, device='cuda:0')
episode: 55 training return: tensor(-3818.6792, device='cuda:0')
epoch: 14 test_true_pfm: 218.5449891342482
episode: 56 training return: tensor(-3854.6545, device='cuda:0')
episode: 57 training return: tensor(-5732.1030, device='cuda:0')
episode: 58 training return: tensor(-5933.7778, device='cuda:0')
episode: 59 training return: tensor(-6932.5098, device='cuda:0')
epoch: 15 test_true_pfm: 199.3665925260049
episode: 60 training return: tensor(-7686.4985, device='cuda:0')
episode: 61 training return: tensor(-27699.5254, device='cuda:0')
episode: 62 training return: tensor(-10037.8799, device='cuda:0')
episode: 63 training return: tensor(-15643.0107, device='cuda:0')
epoch: 16 test_true_pfm: 190.02759435856782
episode: 64 training return: tensor(-10721.7402, device='cuda:0')
episode: 65 training return: tensor(-50034.4375, device='cuda:0')
episode: 66 training return: tensor(-29314.1094, device='cuda:0')
episode: 67 training return: tensor(-5310902., device='cuda:0')
epoch: 17 test_true_pfm: -71.70995836845321
episode: 68 training return: tensor(-3571547.5000, device='cuda:0')
episode: 69 training return: tensor(-2823908.7500, device='cuda:0')
episode: 70 training return: tensor(-5876679.5000, device='cuda:0')
episode: 71 training return: tensor(-17482088., device='cuda:0')
epoch: 18 test_true_pfm: -141.0287374199476
episode: 72 training return: tensor(-7789129., device='cuda:0')
episode: 73 training return: tensor(-18723730., device='cuda:0')
episode: 74 training return: tensor(-3438377., device='cuda:0')
episode: 75 training return: tensor(-3196534.5000, device='cuda:0')
epoch: 19 test_true_pfm: 47.448817175325395
episode: 76 training return: tensor(-3045574., device='cuda:0')
episode: 77 training return: tensor(-13255641., device='cuda:0')
episode: 78 training return: tensor(-298444., device='cuda:0')
episode: 79 training return: tensor(-196086.4844, device='cuda:0')
epoch: 20 test_true_pfm: -49.36942705698527
episode: 80 training return: tensor(-1313838.2500, device='cuda:0')
episode: 81 training return: tensor(-1916510.8750, device='cuda:0')
episode: 82 training return: tensor(-2125275.2500, device='cuda:0')
episode: 83 training return: tensor(-4344985.5000, device='cuda:0')
epoch: 21 test_true_pfm: -87.84666650626413
episode: 84 training return: tensor(-4.5878e+09, device='cuda:0')
episode: 85 training return: tensor(-6625198., device='cuda:0')
episode: 86 training return: tensor(-10335398., device='cuda:0')
episode: 87 training return: tensor(-7.5730e+08, device='cuda:0')
epoch: 22 test_true_pfm: -137.38842157847625
episode: 88 training return: tensor(-1510702.3750, device='cuda:0')
episode: 89 training return: tensor(-8033753.5000, device='cuda:0')
episode: 90 training return: tensor(-13684449., device='cuda:0')
episode: 91 training return: tensor(-7.5089e+08, device='cuda:0')
epoch: 23 test_true_pfm: -134.0853089363114
episode: 92 training return: tensor(-5981069., device='cuda:0')
episode: 93 training return: tensor(-14902072., device='cuda:0')
episode: 94 training return: tensor(-1.4147e+09, device='cuda:0')
episode: 95 training return: tensor(-1.9803e+09, device='cuda:0')
epoch: 24 test_true_pfm: -132.0245941249587
episode: 96 training return: tensor(-1.3338e+09, device='cuda:0')
episode: 97 training return: tensor(-30132002., device='cuda:0')
episode: 98 training return: tensor(-3.2661e+08, device='cuda:0')
episode: 99 training return: tensor(-11696955., device='cuda:0')
epoch: 25 test_true_pfm: -123.00544001173881
episode: 100 training return: tensor(-3147078., device='cuda:0')
episode: 101 training return: tensor(-1.4928e+09, device='cuda:0')
episode: 102 training return: tensor(-3420669., device='cuda:0')
episode: 103 training return: tensor(-8.1490e+08, device='cuda:0')
epoch: 26 test_true_pfm: -135.05338677662016
episode: 104 training return: tensor(-1.1071e+09, device='cuda:0')
episode: 105 training return: tensor(-26899782., device='cuda:0')
episode: 106 training return: tensor(-2.6644e+09, device='cuda:0')
episode: 107 training return: tensor(-2447026., device='cuda:0')
epoch: 27 test_true_pfm: -91.56294976984064
episode: 108 training return: tensor(-787923.3750, device='cuda:0')
episode: 109 training return: tensor(-12585863., device='cuda:0')
episode: 110 training return: tensor(-2987821.5000, device='cuda:0')
episode: 111 training return: tensor(-2.4534e+09, device='cuda:0')
epoch: 28 test_true_pfm: -97.65832289587668
episode: 112 training return: tensor(-9145990., device='cuda:0')
episode: 113 training return: tensor(-8548002., device='cuda:0')
episode: 114 training return: tensor(-1074247.8750, device='cuda:0')
episode: 115 training return: tensor(-1749573., device='cuda:0')
epoch: 29 test_true_pfm: -109.53966383844964
episode: 116 training return: tensor(-1421343.2500, device='cuda:0')
episode: 117 training return: tensor(-3.9177e+09, device='cuda:0')
episode: 118 training return: tensor(-1001625.5000, device='cuda:0')
episode: 119 training return: tensor(-977047.3125, device='cuda:0')
epoch: 30 test_true_pfm: -97.62478691180847
episode: 120 training return: tensor(-929473.3125, device='cuda:0')
episode: 121 training return: tensor(-1700427.2500, device='cuda:0')
episode: 122 training return: tensor(-939542.6875, device='cuda:0')
episode: 123 training return: tensor(-1317832.5000, device='cuda:0')
epoch: 31 test_true_pfm: -104.3026966034763
episode: 124 training return: tensor(-1674642.2500, device='cuda:0')
episode: 125 training return: tensor(-3.8279e+08, device='cuda:0')
episode: 126 training return: tensor(-960176.1250, device='cuda:0')
episode: 127 training return: tensor(-1652043.8750, device='cuda:0')
epoch: 32 test_true_pfm: -109.97337399870912
episode: 128 training return: tensor(-1026471.4375, device='cuda:0')
episode: 129 training return: tensor(-1110500., device='cuda:0')
episode: 130 training return: tensor(-1245126.5000, device='cuda:0')
episode: 131 training return: tensor(-9.4491e+08, device='cuda:0')
epoch: 33 test_true_pfm: -104.94427360792213
episode: 132 training return: tensor(-1332359., device='cuda:0')
episode: 133 training return: tensor(-1097872.1250, device='cuda:0')
episode: 134 training return: tensor(-1.1887e+10, device='cuda:0')
episode: 135 training return: tensor(-4.5857e+08, device='cuda:0')
epoch: 34 test_true_pfm: -102.79200059477411
episode: 136 training return: tensor(-2.1206e+10, device='cuda:0')
episode: 137 training return: tensor(-1273893.5000, device='cuda:0')
episode: 138 training return: tensor(-1314270.6250, device='cuda:0')
episode: 139 training return: tensor(-1093411.2500, device='cuda:0')
epoch: 35 test_true_pfm: -60.255888883836604
episode: 140 training return: tensor(-1591857.7500, device='cuda:0')
episode: 141 training return: tensor(-1862760.2500, device='cuda:0')
episode: 142 training return: tensor(-1106687.1250, device='cuda:0')
episode: 143 training return: tensor(-3.1467e+09, device='cuda:0')
epoch: 36 test_true_pfm: -101.53278794636377
episode: 144 training return: tensor(-3744357.7500, device='cuda:0')
episode: 145 training return: tensor(-8.4778e+09, device='cuda:0')
episode: 146 training return: tensor(-2.5600e+09, device='cuda:0')
episode: 147 training return: tensor(-996336.5000, device='cuda:0')
epoch: 37 test_true_pfm: -116.68881703212651
episode: 148 training return: tensor(-3816215.5000, device='cuda:0')
episode: 149 training return: tensor(-993368.2500, device='cuda:0')
episode: 150 training return: tensor(-3320201.2500, device='cuda:0')
episode: 151 training return: tensor(-2.1822e+09, device='cuda:0')
epoch: 38 test_true_pfm: -111.124872901676
episode: 152 training return: tensor(-1.2178e+09, device='cuda:0')
episode: 153 training return: tensor(-5.2801e+09, device='cuda:0')
episode: 154 training return: tensor(-1090120.2500, device='cuda:0')
episode: 155 training return: tensor(-6.6956e+09, device='cuda:0')
epoch: 39 test_true_pfm: -18.015189471693695
episode: 156 training return: tensor(-1.5817e+09, device='cuda:0')
episode: 157 training return: tensor(-1.0450e+09, device='cuda:0')
episode: 158 training return: tensor(-1283176.5000, device='cuda:0')
episode: 159 training return: tensor(-4.2222e+08, device='cuda:0')
epoch: 40 test_true_pfm: -103.49464105967188
episode: 160 training return: tensor(-1175002.2500, device='cuda:0')
episode: 161 training return: tensor(-1442993.7500, device='cuda:0')
episode: 162 training return: tensor(-1400015.1250, device='cuda:0')
episode: 163 training return: tensor(-7.4728e+08, device='cuda:0')
epoch: 41 test_true_pfm: -99.7856500952929
episode: 164 training return: tensor(-1026907., device='cuda:0')
episode: 165 training return: tensor(-998931.1875, device='cuda:0')
episode: 166 training return: tensor(-1169945.3750, device='cuda:0')
episode: 167 training return: tensor(-1.8498e+09, device='cuda:0')
epoch: 42 test_true_pfm: -103.75983623663448
episode: 168 training return: tensor(-978541.3750, device='cuda:0')
episode: 169 training return: tensor(-6.4790e+09, device='cuda:0')
episode: 170 training return: tensor(-6.5743e+09, device='cuda:0')
episode: 171 training return: tensor(-1465075.3750, device='cuda:0')
epoch: 43 test_true_pfm: -102.38213019492646
episode: 172 training return: tensor(-3.8479e+08, device='cuda:0')
episode: 173 training return: tensor(-3.1316e+10, device='cuda:0')
episode: 174 training return: tensor(-1.4320e+10, device='cuda:0')
episode: 175 training return: tensor(-830352.6875, device='cuda:0')
epoch: 44 test_true_pfm: 168.29884099524034
episode: 176 training return: tensor(-2.2801e+11, device='cuda:0')
episode: 177 training return: tensor(-1047651.5625, device='cuda:0')
episode: 178 training return: tensor(-7.4488e+09, device='cuda:0')
episode: 179 training return: tensor(-1080655.2500, device='cuda:0')
epoch: 45 test_true_pfm: -107.35415348698488
episode: 180 training return: tensor(-981179.8125, device='cuda:0')
episode: 181 training return: tensor(-1032878.8125, device='cuda:0')
episode: 182 training return: tensor(-805462.9375, device='cuda:0')
episode: 183 training return: tensor(-1448996.3750, device='cuda:0')
epoch: 46 test_true_pfm: -82.57318570282256
episode: 184 training return: tensor(-1811839.6250, device='cuda:0')
episode: 185 training return: tensor(-6.7792e+08, device='cuda:0')
episode: 186 training return: tensor(-990433.7500, device='cuda:0')
episode: 187 training return: tensor(-1034123.9375, device='cuda:0')
epoch: 47 test_true_pfm: 73.04612957497496
episode: 188 training return: tensor(-5.4219e+10, device='cuda:0')
episode: 189 training return: tensor(-859089.7500, device='cuda:0')
episode: 190 training return: tensor(-4.8304e+11, device='cuda:0')
episode: 191 training return: tensor(-3.7298e+08, device='cuda:0')
epoch: 48 test_true_pfm: -113.89902988391832
episode: 192 training return: tensor(-4.5920e+08, device='cuda:0')
episode: 193 training return: tensor(-4.1688e+11, device='cuda:0')
episode: 194 training return: tensor(-2.9124e+11, device='cuda:0')
episode: 195 training return: tensor(-4.1073e+10, device='cuda:0')
epoch: 49 test_true_pfm: -102.05834299284106
episode: 196 training return: tensor(-3659791.7500, device='cuda:0')
episode: 197 training return: tensor(-1467015.7500, device='cuda:0')
episode: 198 training return: tensor(-3580855.5000, device='cuda:0')
episode: 199 training return: tensor(-1906193.2500, device='cuda:0')
epoch: 50 test_true_pfm: -99.13057484501955
episode: 200 training return: tensor(-993053.7500, device='cuda:0')
episode: 201 training return: tensor(-1.4819e+10, device='cuda:0')
episode: 202 training return: tensor(-1335779.8750, device='cuda:0')
episode: 203 training return: tensor(-784896.6250, device='cuda:0')
epoch: 51 test_true_pfm: 179.0971823271157
episode: 204 training return: tensor(-9.7746e+08, device='cuda:0')
episode: 205 training return: tensor(-1.9110e+10, device='cuda:0')
episode: 206 training return: tensor(-1471256.3750, device='cuda:0')
episode: 207 training return: tensor(-1168548.6250, device='cuda:0')
epoch: 52 test_true_pfm: 532.7643069049941
episode: 208 training return: tensor(-5.3122e+11, device='cuda:0')
episode: 209 training return: tensor(-1.7729e+10, device='cuda:0')
episode: 210 training return: tensor(-2024461.3750, device='cuda:0')
episode: 211 training return: tensor(-873167.9375, device='cuda:0')
epoch: 53 test_true_pfm: -167.08987403331355
episode: 212 training return: tensor(-1.7207e+09, device='cuda:0')
episode: 213 training return: tensor(-1075817.7500, device='cuda:0')
episode: 214 training return: tensor(-1961017.1250, device='cuda:0')
episode: 215 training return: tensor(-2.5261e+11, device='cuda:0')
epoch: 54 test_true_pfm: -219.3580612573239
episode: 216 training return: tensor(-907532.1875, device='cuda:0')
episode: 217 training return: tensor(-4.1125e+10, device='cuda:0')
episode: 218 training return: tensor(-815944.5000, device='cuda:0')
episode: 219 training return: tensor(-906906.6250, device='cuda:0')
epoch: 55 test_true_pfm: -248.69325676763796
episode: 220 training return: tensor(-3.8385e+11, device='cuda:0')
episode: 221 training return: tensor(-930397.9375, device='cuda:0')
episode: 222 training return: tensor(-762310.5625, device='cuda:0')
episode: 223 training return: tensor(-4.0967e+09, device='cuda:0')
epoch: 56 test_true_pfm: -95.59434870187194
episode: 224 training return: tensor(-27871.7559, device='cuda:0')
episode: 225 training return: tensor(-1399621.7500, device='cuda:0')
episode: 226 training return: tensor(-27607.6270, device='cuda:0')
episode: 227 training return: tensor(-27450.3848, device='cuda:0')
epoch: 57 test_true_pfm: 116.31748695343815
episode: 228 training return: tensor(-26723.4238, device='cuda:0')
episode: 229 training return: tensor(-13619035., device='cuda:0')
episode: 230 training return: tensor(-22694.3867, device='cuda:0')
episode: 231 training return: tensor(-26674.9922, device='cuda:0')
epoch: 58 test_true_pfm: 109.85369484644788
episode: 232 training return: tensor(-801921.6875, device='cuda:0')
episode: 233 training return: tensor(-27103.6230, device='cuda:0')
episode: 234 training return: tensor(-28028.5879, device='cuda:0')
episode: 235 training return: tensor(-25504.6055, device='cuda:0')
epoch: 59 test_true_pfm: 138.7356549934776
episode: 236 training return: tensor(-22846.4316, device='cuda:0')
episode: 237 training return: tensor(-27865.3340, device='cuda:0')
episode: 238 training return: tensor(-27059.8340, device='cuda:0')
episode: 239 training return: tensor(-27841.4805, device='cuda:0')
epoch: 60 test_true_pfm: 117.67557325305523
episode: 240 training return: tensor(-27223.8398, device='cuda:0')
episode: 241 training return: tensor(-25437.7129, device='cuda:0')
episode: 242 training return: tensor(-26180.7598, device='cuda:0')
episode: 243 training return: tensor(-26865.2188, device='cuda:0')
epoch: 61 test_true_pfm: 118.81297201680498
episode: 244 training return: tensor(-26754.3594, device='cuda:0')
episode: 245 training return: tensor(-30088.3633, device='cuda:0')
episode: 246 training return: tensor(-26852.5332, device='cuda:0')
episode: 247 training return: tensor(-27169.9668, device='cuda:0')
epoch: 62 test_true_pfm: 117.34090421995226
episode: 248 training return: tensor(-26676.6934, device='cuda:0')
episode: 249 training return: tensor(-30466.2188, device='cuda:0')
episode: 250 training return: tensor(-27621.6191, device='cuda:0')
episode: 251 training return: tensor(-35023.9336, device='cuda:0')
epoch: 63 test_true_pfm: -175.902238273096
episode: 252 training return: tensor(-37570.9297, device='cuda:0')
episode: 253 training return: tensor(-30610.2891, device='cuda:0')
episode: 254 training return: tensor(-25914754., device='cuda:0')
episode: 255 training return: tensor(-1232500.2500, device='cuda:0')
epoch: 64 test_true_pfm: -104.69063782005247
episode: 256 training return: tensor(-1099536.7500, device='cuda:0')
episode: 257 training return: tensor(-56823.2266, device='cuda:0')
episode: 258 training return: tensor(-5477673., device='cuda:0')
episode: 259 training return: tensor(-1949844.6250, device='cuda:0')
epoch: 65 test_true_pfm: 151.48061071489636
episode: 260 training return: tensor(-30759.5273, device='cuda:0')
episode: 261 training return: tensor(-1028129.5625, device='cuda:0')
episode: 262 training return: tensor(-2.7890e+08, device='cuda:0')
episode: 263 training return: tensor(-6960993., device='cuda:0')
epoch: 66 test_true_pfm: -93.78407565517362
episode: 264 training return: tensor(-1328386.2500, device='cuda:0')
episode: 265 training return: tensor(-151908.6875, device='cuda:0')
episode: 266 training return: tensor(-1341277.1250, device='cuda:0')
episode: 267 training return: tensor(-119060.5469, device='cuda:0')
epoch: 67 test_true_pfm: -75.94295156998504
episode: 268 training return: tensor(-5457115.5000, device='cuda:0')
episode: 269 training return: tensor(-2541840.7500, device='cuda:0')
episode: 270 training return: tensor(-334342.3125, device='cuda:0')
episode: 271 training return: tensor(-5.0987e+08, device='cuda:0')
epoch: 68 test_true_pfm: -55.67804545639018
episode: 272 training return: tensor(-4140307.7500, device='cuda:0')
episode: 273 training return: tensor(-9947510., device='cuda:0')
episode: 274 training return: tensor(-22991622., device='cuda:0')
episode: 275 training return: tensor(-4250246.5000, device='cuda:0')
epoch: 69 test_true_pfm: -84.36841790033372
episode: 276 training return: tensor(-2.1256e+08, device='cuda:0')
episode: 277 training return: tensor(-1.0662e+08, device='cuda:0')
episode: 278 training return: tensor(-1.9356e+09, device='cuda:0')
episode: 279 training return: tensor(-1.8869e+08, device='cuda:0')
epoch: 70 test_true_pfm: -125.73299084435394
episode: 280 training return: tensor(-3716684.2500, device='cuda:0')
episode: 281 training return: tensor(-1.0122e+08, device='cuda:0')
episode: 282 training return: tensor(-63966488., device='cuda:0')
episode: 283 training return: tensor(-12447720., device='cuda:0')
epoch: 71 test_true_pfm: -132.2498498818293
episode: 284 training return: tensor(-1.9039e+09, device='cuda:0')
episode: 285 training return: tensor(-3754017.7500, device='cuda:0')
episode: 286 training return: tensor(-42924.9062, device='cuda:0')
episode: 287 training return: tensor(-3019837., device='cuda:0')
epoch: 72 test_true_pfm: -27.555188228267877
episode: 288 training return: tensor(-10948625., device='cuda:0')
episode: 289 training return: tensor(-27295.9648, device='cuda:0')
episode: 290 training return: tensor(-31034.9863, device='cuda:0')
episode: 291 training return: tensor(-16402718., device='cuda:0')
epoch: 73 test_true_pfm: 52.148035653331476
episode: 292 training return: tensor(-31600.0820, device='cuda:0')
episode: 293 training return: tensor(-4.0709e+09, device='cuda:0')
episode: 294 training return: tensor(-14546802., device='cuda:0')
episode: 295 training return: tensor(-20247.6816, device='cuda:0')
epoch: 74 test_true_pfm: 268.80196406684394
episode: 296 training return: tensor(-12154244., device='cuda:0')
episode: 297 training return: tensor(-19086.6094, device='cuda:0')
episode: 298 training return: tensor(-16941.4395, device='cuda:0')
episode: 299 training return: tensor(-14312.9453, device='cuda:0')
epoch: 75 test_true_pfm: 164.19981387764344
episode: 300 training return: tensor(-5518019., device='cuda:0')
episode: 301 training return: tensor(-17858.4805, device='cuda:0')
episode: 302 training return: tensor(-1732742.8750, device='cuda:0')
episode: 303 training return: tensor(-14669.4805, device='cuda:0')
epoch: 76 test_true_pfm: -79.20462480854692
episode: 304 training return: tensor(-9.3902e+08, device='cuda:0')
episode: 305 training return: tensor(-16614.3398, device='cuda:0')
episode: 306 training return: tensor(-9772708., device='cuda:0')
episode: 307 training return: tensor(-1532912.1250, device='cuda:0')
epoch: 77 test_true_pfm: -28.623497353644307
episode: 308 training return: tensor(-19814.7324, device='cuda:0')
episode: 309 training return: tensor(-8587365., device='cuda:0')
episode: 310 training return: tensor(-7.1247e+08, device='cuda:0')
episode: 311 training return: tensor(-5278649.5000, device='cuda:0')
epoch: 78 test_true_pfm: 7.320354351928027
episode: 312 training return: tensor(-542511.0625, device='cuda:0')
episode: 313 training return: tensor(-3467648.2500, device='cuda:0')
episode: 314 training return: tensor(-1287451.6250, device='cuda:0')
episode: 315 training return: tensor(-19245232., device='cuda:0')
epoch: 79 test_true_pfm: -263.79293014686374
episode: 316 training return: tensor(-779622.9375, device='cuda:0')
episode: 317 training return: tensor(-22210.8027, device='cuda:0')
episode: 318 training return: tensor(-7990194.5000, device='cuda:0')
episode: 319 training return: tensor(-6268647.5000, device='cuda:0')
epoch: 80 test_true_pfm: 198.34770449626845
episode: 320 training return: tensor(-5306.1348, device='cuda:0')
episode: 321 training return: tensor(-19334658., device='cuda:0')
episode: 322 training return: tensor(-10101.0625, device='cuda:0')
episode: 323 training return: tensor(-5992.9019, device='cuda:0')
epoch: 81 test_true_pfm: 206.3423941660028
episode: 324 training return: tensor(-13828777., device='cuda:0')
episode: 325 training return: tensor(-20349196., device='cuda:0')
episode: 326 training return: tensor(-3925838.2500, device='cuda:0')
episode: 327 training return: tensor(-5542.7983, device='cuda:0')
epoch: 82 test_true_pfm: 318.03902111349186
episode: 328 training return: tensor(-18053932., device='cuda:0')
episode: 329 training return: tensor(-7953048.5000, device='cuda:0')
episode: 330 training return: tensor(-722290.1875, device='cuda:0')
episode: 331 training return: tensor(-14087637., device='cuda:0')
epoch: 83 test_true_pfm: 474.0524248249682
episode: 332 training return: tensor(-8243.6465, device='cuda:0')
episode: 333 training return: tensor(-9848.8838, device='cuda:0')
episode: 334 training return: tensor(-4760076., device='cuda:0')
episode: 335 training return: tensor(-8049.0410, device='cuda:0')
epoch: 84 test_true_pfm: 68.35664450108618
episode: 336 training return: tensor(-1697450.7500, device='cuda:0')
episode: 337 training return: tensor(-561390.2500, device='cuda:0')
episode: 338 training return: tensor(-1489194.3750, device='cuda:0')
episode: 339 training return: tensor(-3755207.7500, device='cuda:0')
epoch: 85 test_true_pfm: -179.06106559121022
episode: 340 training return: tensor(-548895.1250, device='cuda:0')
episode: 341 training return: tensor(-13054443., device='cuda:0')
episode: 342 training return: tensor(-632240.6875, device='cuda:0')
episode: 343 training return: tensor(-610496.6875, device='cuda:0')
epoch: 86 test_true_pfm: -177.11749603922644
episode: 344 training return: tensor(-1352491.1250, device='cuda:0')
episode: 345 training return: tensor(-711501.5625, device='cuda:0')
episode: 346 training return: tensor(-4444722., device='cuda:0')
episode: 347 training return: tensor(-19936780., device='cuda:0')
epoch: 87 test_true_pfm: -253.5760500746662
episode: 348 training return: tensor(-1462890.6250, device='cuda:0')
episode: 349 training return: tensor(-1881285.3750, device='cuda:0')
episode: 350 training return: tensor(-7614.1641, device='cuda:0')
episode: 351 training return: tensor(-4446256.5000, device='cuda:0')
epoch: 88 test_true_pfm: -293.08290375174374
episode: 352 training return: tensor(-1051969.7500, device='cuda:0')
episode: 353 training return: tensor(-55497796., device='cuda:0')
episode: 354 training return: tensor(-558980.6875, device='cuda:0')
episode: 355 training return: tensor(-1319121.6250, device='cuda:0')
epoch: 89 test_true_pfm: -27.174981566430347
episode: 356 training return: tensor(-14789906., device='cuda:0')
episode: 357 training return: tensor(-8136668.5000, device='cuda:0')
episode: 358 training return: tensor(-8617430., device='cuda:0')
episode: 359 training return: tensor(-26119686., device='cuda:0')
epoch: 90 test_true_pfm: 755.234733034009
episode: 360 training return: tensor(-23125200., device='cuda:0')
episode: 361 training return: tensor(-5188.4155, device='cuda:0')
episode: 362 training return: tensor(-7230402., device='cuda:0')
episode: 363 training return: tensor(-15100754., device='cuda:0')
epoch: 91 test_true_pfm: 677.1239363382409
episode: 364 training return: tensor(-14123196., device='cuda:0')
episode: 365 training return: tensor(-13782849., device='cuda:0')
episode: 366 training return: tensor(-7840673., device='cuda:0')
episode: 367 training return: tensor(-11249891., device='cuda:0')
epoch: 92 test_true_pfm: 342.08663170244535
episode: 368 training return: tensor(-5941.5508, device='cuda:0')
episode: 369 training return: tensor(-7627.9575, device='cuda:0')
episode: 370 training return: tensor(-5509.3457, device='cuda:0')
episode: 371 training return: tensor(-29602190., device='cuda:0')
epoch: 93 test_true_pfm: 622.8177903765812
episode: 372 training return: tensor(-11696942., device='cuda:0')
episode: 373 training return: tensor(-18367822., device='cuda:0')
episode: 374 training return: tensor(-6393.2495, device='cuda:0')
episode: 375 training return: tensor(-7812254.5000, device='cuda:0')
epoch: 94 test_true_pfm: -112.67034519506831
episode: 376 training return: tensor(-12246979., device='cuda:0')
episode: 377 training return: tensor(-1482341.2500, device='cuda:0')
episode: 378 training return: tensor(-4630971., device='cuda:0')
episode: 379 training return: tensor(-23692090., device='cuda:0')
epoch: 95 test_true_pfm: 682.3453761244194
episode: 380 training return: tensor(-17008430., device='cuda:0')
episode: 381 training return: tensor(-15061119., device='cuda:0')
episode: 382 training return: tensor(-10224816., device='cuda:0')
episode: 383 training return: tensor(-9245198., device='cuda:0')
epoch: 96 test_true_pfm: 1017.9534722567895
episode: 384 training return: tensor(-12130957., device='cuda:0')
episode: 385 training return: tensor(-14990184., device='cuda:0')
episode: 386 training return: tensor(-16060757., device='cuda:0')
episode: 387 training return: tensor(-15044869., device='cuda:0')
epoch: 97 test_true_pfm: 664.2587251131326
episode: 388 training return: tensor(-7303136., device='cuda:0')
episode: 389 training return: tensor(-14213702., device='cuda:0')
episode: 390 training return: tensor(-11253983., device='cuda:0')
episode: 391 training return: tensor(-14784326., device='cuda:0')
epoch: 98 test_true_pfm: 575.6015777001477
episode: 392 training return: tensor(-15830689., device='cuda:0')
episode: 393 training return: tensor(-10936622., device='cuda:0')
episode: 394 training return: tensor(-8468883., device='cuda:0')
episode: 395 training return: tensor(-8478765., device='cuda:0')
epoch: 99 test_true_pfm: -301.46491366977403
episode: 396 training return: tensor(-6282839.5000, device='cuda:0')
episode: 397 training return: tensor(-17231496., device='cuda:0')
episode: 398 training return: tensor(-16201025., device='cuda:0')
episode: 399 training return: tensor(-6858416.5000, device='cuda:0')
epoch: 100 test_true_pfm: 1155.692692192653
episode: 400 training return: tensor(-5919714.5000, device='cuda:0')
episode: 401 training return: tensor(-4933767., device='cuda:0')
episode: 402 training return: tensor(-16032102., device='cuda:0')
episode: 403 training return: tensor(-16327448., device='cuda:0')
epoch: 101 test_true_pfm: 577.1191470919215
episode: 404 training return: tensor(-13740052., device='cuda:0')
episode: 405 training return: tensor(-13919607., device='cuda:0')
episode: 406 training return: tensor(-13953564., device='cuda:0')
episode: 407 training return: tensor(-16067594., device='cuda:0')
epoch: 102 test_true_pfm: 693.7701996754935
episode: 408 training return: tensor(-12054518., device='cuda:0')
episode: 409 training return: tensor(-8898787., device='cuda:0')
episode: 410 training return: tensor(-19488116., device='cuda:0')
episode: 411 training return: tensor(-9135100., device='cuda:0')
epoch: 103 test_true_pfm: 850.1403305536606
episode: 412 training return: tensor(-14656105., device='cuda:0')
episode: 413 training return: tensor(-42711392., device='cuda:0')
episode: 414 training return: tensor(-19970736., device='cuda:0')
episode: 415 training return: tensor(-15473937., device='cuda:0')
epoch: 104 test_true_pfm: 1081.757367845011
episode: 416 training return: tensor(-22988172., device='cuda:0')
episode: 417 training return: tensor(-12081900., device='cuda:0')
episode: 418 training return: tensor(-26880024., device='cuda:0')
episode: 419 training return: tensor(-16437721., device='cuda:0')
epoch: 105 test_true_pfm: 1038.979688474192
episode: 420 training return: tensor(-12537352., device='cuda:0')
episode: 421 training return: tensor(-14353969., device='cuda:0')
episode: 422 training return: tensor(-13169634., device='cuda:0')
episode: 423 training return: tensor(-7930889., device='cuda:0')
epoch: 106 test_true_pfm: 705.9963583462913
episode: 424 training return: tensor(-8109893.5000, device='cuda:0')
episode: 425 training return: tensor(-12593590., device='cuda:0')
episode: 426 training return: tensor(-6846397., device='cuda:0')
episode: 427 training return: tensor(-9024034., device='cuda:0')
epoch: 107 test_true_pfm: 1373.6844773962537
episode: 428 training return: tensor(-17057160., device='cuda:0')
episode: 429 training return: tensor(-15516332., device='cuda:0')
episode: 430 training return: tensor(-9500276., device='cuda:0')
episode: 431 training return: tensor(-9106399., device='cuda:0')
epoch: 108 test_true_pfm: 1004.085156599103
episode: 432 training return: tensor(-12565281., device='cuda:0')
episode: 433 training return: tensor(-6132139.5000, device='cuda:0')
episode: 434 training return: tensor(-11951841., device='cuda:0')
episode: 435 training return: tensor(-10844518., device='cuda:0')
epoch: 109 test_true_pfm: 773.0866158702376
episode: 436 training return: tensor(-5579452., device='cuda:0')
episode: 437 training return: tensor(-8532855., device='cuda:0')
episode: 438 training return: tensor(-13284087., device='cuda:0')
episode: 439 training return: tensor(-8284562.5000, device='cuda:0')
epoch: 110 test_true_pfm: 590.0486823854974
episode: 440 training return: tensor(-2303994.7500, device='cuda:0')
episode: 441 training return: tensor(-24310936., device='cuda:0')
episode: 442 training return: tensor(-10917274., device='cuda:0')
episode: 443 training return: tensor(-7623278., device='cuda:0')
epoch: 111 test_true_pfm: -238.00470883278862
episode: 444 training return: tensor(-5374923.5000, device='cuda:0')
episode: 445 training return: tensor(-2592459.7500, device='cuda:0')
episode: 446 training return: tensor(-9598100., device='cuda:0')
episode: 447 training return: tensor(-1283256.6250, device='cuda:0')
epoch: 112 test_true_pfm: -391.82437272307817
episode: 448 training return: tensor(-1751326.1250, device='cuda:0')
episode: 449 training return: tensor(-1363481., device='cuda:0')
episode: 450 training return: tensor(-1235265.6250, device='cuda:0')
episode: 451 training return: tensor(-951112.0625, device='cuda:0')
epoch: 113 test_true_pfm: 1015.1173498258139
episode: 452 training return: tensor(-7297190., device='cuda:0')
episode: 453 training return: tensor(-11289677., device='cuda:0')
episode: 454 training return: tensor(-974800.5000, device='cuda:0')
episode: 455 training return: tensor(-6544745.5000, device='cuda:0')
epoch: 114 test_true_pfm: -257.5878265998972
episode: 456 training return: tensor(-8110390., device='cuda:0')
episode: 457 training return: tensor(-824924.7500, device='cuda:0')
episode: 458 training return: tensor(-993957.1875, device='cuda:0')
episode: 459 training return: tensor(-2048200.2500, device='cuda:0')
epoch: 115 test_true_pfm: -261.7946567387023
episode: 460 training return: tensor(-2822195., device='cuda:0')
episode: 461 training return: tensor(-12240928., device='cuda:0')
episode: 462 training return: tensor(-832006.3750, device='cuda:0')
episode: 463 training return: tensor(-3719682.2500, device='cuda:0')
epoch: 116 test_true_pfm: 827.115500680229
episode: 464 training return: tensor(-10829421., device='cuda:0')
episode: 465 training return: tensor(-4424176.5000, device='cuda:0')
episode: 466 training return: tensor(-7384486.5000, device='cuda:0')
episode: 467 training return: tensor(-7733464.5000, device='cuda:0')
epoch: 117 test_true_pfm: -196.96880026852034
episode: 468 training return: tensor(-1507898.7500, device='cuda:0')
episode: 469 training return: tensor(-23277376., device='cuda:0')
episode: 470 training return: tensor(-4740985., device='cuda:0')
episode: 471 training return: tensor(-986237.6875, device='cuda:0')
epoch: 118 test_true_pfm: -243.9045567004081
episode: 472 training return: tensor(-673621., device='cuda:0')
episode: 473 training return: tensor(-4910809., device='cuda:0')
episode: 474 training return: tensor(-5218434., device='cuda:0')
episode: 475 training return: tensor(-8777842., device='cuda:0')
epoch: 119 test_true_pfm: -198.53440817589907
episode: 476 training return: tensor(-1103508.6250, device='cuda:0')
episode: 477 training return: tensor(-1054809.8750, device='cuda:0')
episode: 478 training return: tensor(-9891967., device='cuda:0')
episode: 479 training return: tensor(-612916.9375, device='cuda:0')
epoch: 120 test_true_pfm: -186.69114091132448
episode: 480 training return: tensor(-645886.7500, device='cuda:0')
episode: 481 training return: tensor(-10880860., device='cuda:0')
episode: 482 training return: tensor(-2402667., device='cuda:0')
episode: 483 training return: tensor(-7405355.5000, device='cuda:0')
epoch: 121 test_true_pfm: -214.83278364543972
episode: 484 training return: tensor(-1271090.5000, device='cuda:0')
episode: 485 training return: tensor(-732397.3125, device='cuda:0')
episode: 486 training return: tensor(-5381270., device='cuda:0')
episode: 487 training return: tensor(-2370023.5000, device='cuda:0')
epoch: 122 test_true_pfm: -268.71381379259884
episode: 488 training return: tensor(-1088805.6250, device='cuda:0')
episode: 489 training return: tensor(-972445.8750, device='cuda:0')
episode: 490 training return: tensor(-921701.6250, device='cuda:0')
episode: 491 training return: tensor(-904659.4375, device='cuda:0')
epoch: 123 test_true_pfm: -403.3119001885796
episode: 492 training return: tensor(-1310874.7500, device='cuda:0')
episode: 493 training return: tensor(-677068.9375, device='cuda:0')
episode: 494 training return: tensor(-7151779.5000, device='cuda:0')
episode: 495 training return: tensor(-1507633.5000, device='cuda:0')
epoch: 124 test_true_pfm: -211.86709810145362
episode: 496 training return: tensor(-811044.0625, device='cuda:0')
episode: 497 training return: tensor(-1086577., device='cuda:0')
episode: 498 training return: tensor(-5691380., device='cuda:0')
episode: 499 training return: tensor(-782824.4375, device='cuda:0')
epoch: 125 test_true_pfm: -274.40655504784974
episode: 500 training return: tensor(-874212.5000, device='cuda:0')
episode: 501 training return: tensor(-5113238.5000, device='cuda:0')
episode: 502 training return: tensor(-3820256.7500, device='cuda:0')
episode: 503 training return: tensor(-1354766.6250, device='cuda:0')
epoch: 126 test_true_pfm: -182.57557284862253
episode: 504 training return: tensor(-1826800.2500, device='cuda:0')
episode: 505 training return: tensor(-6165067., device='cuda:0')
episode: 506 training return: tensor(-1549970.5000, device='cuda:0')
episode: 507 training return: tensor(-1411739.6250, device='cuda:0')
epoch: 127 test_true_pfm: -175.0780498851409
episode: 508 training return: tensor(-1820054.5000, device='cuda:0')
episode: 509 training return: tensor(-749400.4375, device='cuda:0')
episode: 510 training return: tensor(-2571261.5000, device='cuda:0')
episode: 511 training return: tensor(-981071.6250, device='cuda:0')
epoch: 128 test_true_pfm: -163.4682315672175
episode: 512 training return: tensor(-1350451.3750, device='cuda:0')
episode: 513 training return: tensor(-1057074.3750, device='cuda:0')
episode: 514 training return: tensor(-3074387.7500, device='cuda:0')
episode: 515 training return: tensor(-1313760.7500, device='cuda:0')
epoch: 129 test_true_pfm: -188.9824905033846
episode: 516 training return: tensor(-4948751., device='cuda:0')
episode: 517 training return: tensor(-1152823.8750, device='cuda:0')
episode: 518 training return: tensor(-2891396.5000, device='cuda:0')
episode: 519 training return: tensor(-1005876.2500, device='cuda:0')
epoch: 130 test_true_pfm: -174.69607380257932
episode: 520 training return: tensor(-1092513.2500, device='cuda:0')
episode: 521 training return: tensor(-1583938., device='cuda:0')
episode: 522 training return: tensor(-1475506.5000, device='cuda:0')
episode: 523 training return: tensor(-805976.3125, device='cuda:0')
epoch: 131 test_true_pfm: -119.33206455207171
episode: 524 training return: tensor(-606609.8750, device='cuda:0')
episode: 525 training return: tensor(-898202.1250, device='cuda:0')
episode: 526 training return: tensor(-2469754., device='cuda:0')
episode: 527 training return: tensor(-1281205.6250, device='cuda:0')
epoch: 132 test_true_pfm: -148.3593554916771
episode: 528 training return: tensor(-500332.4062, device='cuda:0')
episode: 529 training return: tensor(-911512.6250, device='cuda:0')
episode: 530 training return: tensor(-659445.6875, device='cuda:0')
episode: 531 training return: tensor(-548373.2500, device='cuda:0')
epoch: 133 test_true_pfm: -205.53588040315057
episode: 532 training return: tensor(-1207197.6250, device='cuda:0')
episode: 533 training return: tensor(-379243., device='cuda:0')
episode: 534 training return: tensor(-656215.9375, device='cuda:0')
episode: 535 training return: tensor(-500786.2500, device='cuda:0')
epoch: 134 test_true_pfm: -179.23598705156738
episode: 536 training return: tensor(-661351.1250, device='cuda:0')
episode: 537 training return: tensor(-1228158.3750, device='cuda:0')
episode: 538 training return: tensor(-1529886., device='cuda:0')
episode: 539 training return: tensor(-912858.5000, device='cuda:0')
epoch: 135 test_true_pfm: -130.785491282347
episode: 540 training return: tensor(-783698.5625, device='cuda:0')
episode: 541 training return: tensor(-1077835.2500, device='cuda:0')
episode: 542 training return: tensor(-869306.6250, device='cuda:0')
episode: 543 training return: tensor(-1378201.8750, device='cuda:0')
epoch: 136 test_true_pfm: -125.9842257144611
episode: 544 training return: tensor(-662433.9375, device='cuda:0')
episode: 545 training return: tensor(-727091.1875, device='cuda:0')
episode: 546 training return: tensor(-1225375.5000, device='cuda:0')
episode: 547 training return: tensor(-993868.9375, device='cuda:0')
epoch: 137 test_true_pfm: -145.90368240037554
episode: 548 training return: tensor(-1353084.5000, device='cuda:0')
episode: 549 training return: tensor(-983417.8750, device='cuda:0')
episode: 550 training return: tensor(-907967.6250, device='cuda:0')
episode: 551 training return: tensor(-588230., device='cuda:0')
epoch: 138 test_true_pfm: -241.5106151069443
episode: 552 training return: tensor(-1182849.8750, device='cuda:0')
episode: 553 training return: tensor(-870072.8750, device='cuda:0')
episode: 554 training return: tensor(-1316116.3750, device='cuda:0')
episode: 555 training return: tensor(-999992.7500, device='cuda:0')
epoch: 139 test_true_pfm: -187.76561024982436
episode: 556 training return: tensor(-711464.0625, device='cuda:0')
episode: 557 training return: tensor(-651591.2500, device='cuda:0')
episode: 558 training return: tensor(-898571.7500, device='cuda:0')
episode: 559 training return: tensor(-701120.1250, device='cuda:0')
epoch: 140 test_true_pfm: -119.60021868315441
episode: 560 training return: tensor(-582223.3125, device='cuda:0')
episode: 561 training return: tensor(-523905.6250, device='cuda:0')
episode: 562 training return: tensor(-584564.8750, device='cuda:0')
episode: 563 training return: tensor(-1310207.5000, device='cuda:0')
epoch: 141 test_true_pfm: -176.63028707606884
episode: 564 training return: tensor(-634882.8750, device='cuda:0')
episode: 565 training return: tensor(-1025562.6875, device='cuda:0')
episode: 566 training return: tensor(-2893251., device='cuda:0')
episode: 567 training return: tensor(-68206.8594, device='cuda:0')
epoch: 142 test_true_pfm: -184.9975116771263
episode: 568 training return: tensor(-693865.8750, device='cuda:0')
episode: 569 training return: tensor(-283036.5312, device='cuda:0')
episode: 570 training return: tensor(-745926.4375, device='cuda:0')
episode: 571 training return: tensor(-1907181.1250, device='cuda:0')
epoch: 143 test_true_pfm: -167.94824639657665
episode: 572 training return: tensor(-902413.7500, device='cuda:0')
episode: 573 training return: tensor(-702973.1250, device='cuda:0')
episode: 574 training return: tensor(-2143186.2500, device='cuda:0')
episode: 575 training return: tensor(-549067.8125, device='cuda:0')
epoch: 144 test_true_pfm: -142.32873576730822
episode: 576 training return: tensor(-969000., device='cuda:0')
episode: 577 training return: tensor(-586144.2500, device='cuda:0')
episode: 578 training return: tensor(-472386.2188, device='cuda:0')
episode: 579 training return: tensor(-145062.8125, device='cuda:0')
epoch: 145 test_true_pfm: 822.1281453784914
episode: 580 training return: tensor(-56807.9688, device='cuda:0')
episode: 581 training return: tensor(-703219.2500, device='cuda:0')
episode: 582 training return: tensor(-723488.8750, device='cuda:0')
episode: 583 training return: tensor(-575303.5000, device='cuda:0')
epoch: 146 test_true_pfm: -167.69275440701463
episode: 584 training return: tensor(-885649.6250, device='cuda:0')
episode: 585 training return: tensor(-1258355.5000, device='cuda:0')
episode: 586 training return: tensor(-414976.7812, device='cuda:0')
episode: 587 training return: tensor(-819990.5000, device='cuda:0')
epoch: 147 test_true_pfm: -191.82277927016062
episode: 588 training return: tensor(-1715299.2500, device='cuda:0')
episode: 589 training return: tensor(-1375464., device='cuda:0')
episode: 590 training return: tensor(-949822.9375, device='cuda:0')
episode: 591 training return: tensor(-843986.9375, device='cuda:0')
epoch: 148 test_true_pfm: -125.96497256192248
episode: 592 training return: tensor(-1263675., device='cuda:0')
episode: 593 training return: tensor(-1258844.5000, device='cuda:0')
episode: 594 training return: tensor(-536589.3125, device='cuda:0')
episode: 595 training return: tensor(-719152.7500, device='cuda:0')
epoch: 149 test_true_pfm: -186.16974292031628
episode: 596 training return: tensor(-861567.5000, device='cuda:0')
episode: 597 training return: tensor(-495839.5625, device='cuda:0')
episode: 598 training return: tensor(-636105.5625, device='cuda:0')
episode: 599 training return: tensor(-629681.6875, device='cuda:0')
epoch: 150 test_true_pfm: 211.84592978680885
