['--alg', 'ddpg', '--env', 'Swimmer-v3', '--learn', 'brac', '--traj', 'expert', '--seed', '1', '--data', '100000']
epoch: 0 training_loss 2.14520699352026 test_loss: 0.3486933708190918
epoch: 1 training_loss -0.07185619752854108 test_loss: -0.46185812950134275
epoch: 2 training_loss -0.70705958542414 test_loss: -0.931765079498291
epoch: 3 training_loss -1.29290456533432 test_loss: -1.1344643592834474
epoch: 4 training_loss -1.532438304424286 test_loss: -1.720199203491211
epoch: 5 training_loss -1.8714886856079103 test_loss: -1.914327049255371
epoch: 6 training_loss -2.0984368753433227 test_loss: -2.0844757080078127
epoch: 7 training_loss -2.3777296900749207 test_loss: -2.3439361572265627
epoch: 8 training_loss -2.5036709570884703 test_loss: -2.579486274719238
epoch: 9 training_loss -2.6882125902175904 test_loss: -2.808888816833496
epoch: 10 training_loss -2.7585988807678223 test_loss: -2.874064636230469
epoch: 11 training_loss -2.8426922369003296 test_loss: -3.056199836730957
epoch: 12 training_loss -2.963265743255615 test_loss: -3.0653894424438475
epoch: 13 training_loss -3.0422125244140625 test_loss: -2.9147993087768556
epoch: 14 training_loss -3.134165573120117 test_loss: -3.1575267791748045
epoch: 15 training_loss -3.195269238948822 test_loss: -3.1903125762939455
epoch: 16 training_loss -3.311581676006317 test_loss: -3.2421524047851564
epoch: 17 training_loss -3.3179277420043944 test_loss: -3.438329315185547
epoch: 18 training_loss -3.4567709279060366 test_loss: -3.4176315307617187
epoch: 19 training_loss -3.4389950823783875 test_loss: -3.463010787963867
epoch: 20 training_loss -3.482508418560028 test_loss: -3.463174057006836
epoch: 21 training_loss -3.5299660897254945 test_loss: -3.4197021484375
epoch: 22 training_loss -3.636422755718231 test_loss: -3.695268249511719
epoch: 23 training_loss -3.6615809082984923 test_loss: -3.6685508728027343
epoch: 24 training_loss -3.6876583647727967 test_loss: -3.6353733062744142
epoch: 25 training_loss -3.7528723526000975 test_loss: -3.8431316375732423
epoch: 26 training_loss -3.7981012988090517 test_loss: -3.9334224700927733
epoch: 27 training_loss -3.819005801677704 test_loss: -3.870810699462891
epoch: 28 training_loss -3.8118739914894104 test_loss: -3.8864681243896486
epoch: 29 training_loss -3.9171616721153257 test_loss: -4.041937255859375
epoch: 30 training_loss -3.944972269535065 test_loss: -3.933076858520508
epoch: 31 training_loss -3.9633906626701356 test_loss: -4.083834457397461
epoch: 32 training_loss -4.043737478256226 test_loss: -4.071973037719727
epoch: 33 training_loss -4.097130813598633 test_loss: -4.046134185791016
epoch: 34 training_loss -4.040198166370391 test_loss: -4.057088470458984
epoch: 35 training_loss -4.099605190753937 test_loss: -4.098440551757813
epoch: 36 training_loss -4.149058048725128 test_loss: -4.184614562988282
epoch: 37 training_loss -4.219416058063507 test_loss: -4.29063835144043
epoch: 38 training_loss -4.202255539894104 test_loss: -4.286035537719727
epoch: 39 training_loss -4.215471258163452 test_loss: -4.289945220947265
epoch: 40 training_loss -4.242948977947235 test_loss: -4.2449485778808596
epoch: 41 training_loss -4.3164450812339785 test_loss: -4.2209117889404295
epoch: 42 training_loss -4.395512187480927 test_loss: -4.364554977416992
epoch: 43 training_loss -4.331046617031097 test_loss: -4.371203231811523
epoch: 44 training_loss -4.3720130443573 test_loss: -4.370019912719727
epoch: 45 training_loss -4.414340901374817 test_loss: -4.3357391357421875
epoch: 46 training_loss -4.367812433242798 test_loss: -4.373397445678711
epoch: 47 training_loss -4.414338927268982 test_loss: -4.4563720703125
epoch: 48 training_loss -4.410991230010986 test_loss: -4.235665130615234
epoch: 49 training_loss -4.469870209693909 test_loss: -4.437165451049805
epoch: 50 training_loss -4.490381798744202 test_loss: -4.59013786315918
epoch: 51 training_loss -4.534486112594604 test_loss: -4.560139083862305
epoch: 52 training_loss -4.4823699378967286 test_loss: -4.474292373657226
epoch: 53 training_loss -4.623530588150024 test_loss: -4.630387878417968
epoch: 54 training_loss -4.591286270618439 test_loss: -4.6453502655029295
epoch: 55 training_loss -4.592756285667419 test_loss: -4.6644432067871096
epoch: 56 training_loss -4.621885237693786 test_loss: -4.3844749450683596
epoch: 57 training_loss -4.522833766937256 test_loss: -4.618479537963867
epoch: 58 training_loss -4.591844983100891 test_loss: -4.616802978515625
epoch: 59 training_loss -4.673172650337219 test_loss: -4.8018146514892575
epoch: 60 training_loss -4.709615688323975 test_loss: -4.576393127441406
epoch: 61 training_loss -4.682487387657165 test_loss: -4.781033325195312
epoch: 62 training_loss -4.693048205375671 test_loss: -4.719482040405273
epoch: 63 training_loss -4.691237001419068 test_loss: -4.775968933105469
epoch: 64 training_loss -4.680270938873291 test_loss: -4.726448440551758
epoch: 65 training_loss -4.7350058841705325 test_loss: -4.743231201171875
epoch: 66 training_loss -4.747500791549682 test_loss: -4.816352844238281
epoch: 67 training_loss -4.763535213470459 test_loss: -4.805776596069336
epoch: 68 training_loss -4.767348828315735 test_loss: -4.744222640991211
epoch: 69 training_loss -4.749862127304077 test_loss: -4.793743515014649
epoch: 70 training_loss -4.843568959236145 test_loss: -4.792744827270508
epoch: 71 training_loss -4.830760226249695 test_loss: -4.715113830566406
epoch: 72 training_loss -4.802149786949157 test_loss: -4.943773651123047
epoch: 73 training_loss -4.840216269493103 test_loss: -4.8247016906738285
epoch: 74 training_loss -4.865863375663757 test_loss: -4.954800796508789
epoch: 75 training_loss -4.900352878570557 test_loss: -4.940213394165039
epoch: 76 training_loss -4.903758578300476 test_loss: -4.916209030151367
epoch: 77 training_loss -4.869303011894226 test_loss: -4.897460174560547
epoch: 78 training_loss -4.849453234672547 test_loss: -4.891061401367187
epoch: 79 training_loss -4.894411506652832 test_loss: -4.994396209716797
epoch: 80 training_loss -4.916613087654114 test_loss: -4.977863693237305
epoch: 81 training_loss -4.954539933204651 test_loss: -4.916138458251953
epoch: 82 training_loss -4.922651586532592 test_loss: -4.950762939453125
epoch: 83 training_loss -4.9297997188568115 test_loss: -4.955964660644531
epoch: 84 training_loss -4.95939932346344 test_loss: -5.022120666503906
epoch: 85 training_loss -5.0040100574493405 test_loss: -5.033820724487304
epoch: 86 training_loss -4.956741414070129 test_loss: -5.013119125366211
epoch: 87 training_loss -4.983445439338684 test_loss: -5.008941268920898
epoch: 88 training_loss -5.001963820457458 test_loss: -5.106995010375977
epoch: 89 training_loss -5.030667562484741 test_loss: -4.936935806274414
epoch: 90 training_loss -5.031206750869751 test_loss: -5.077420806884765
epoch: 91 training_loss -5.035124168395996 test_loss: -5.133622741699218
epoch: 92 training_loss -5.048268084526062 test_loss: -5.059477233886719
epoch: 93 training_loss -5.021042618751526 test_loss: -4.860078048706055
epoch: 94 training_loss -5.046597061157226 test_loss: -5.13213882446289
epoch: 95 training_loss -5.006075315475464 test_loss: -5.061819076538086
epoch: 96 training_loss -5.106264390945435 test_loss: -5.1210472106933596
epoch: 97 training_loss -5.129107742309571 test_loss: -5.0901233673095705
epoch: 98 training_loss -5.091606779098511 test_loss: -5.181389617919922
epoch: 99 training_loss -5.113281745910644 test_loss: -5.171572875976563
epoch: 100 training_loss -5.1334921169281005 test_loss: -4.965396118164063
epoch: 101 training_loss -5.181687488555908 test_loss: -5.078275299072265
epoch: 102 training_loss -5.15710141658783 test_loss: -5.160956192016601
epoch: 103 training_loss -5.124197235107422 test_loss: -5.2088874816894535
epoch: 104 training_loss -5.149112548828125 test_loss: -5.270351409912109
epoch: 105 training_loss -5.210804643630982 test_loss: -5.276649475097656
epoch: 106 training_loss -5.201821041107178 test_loss: -5.292315673828125
epoch: 107 training_loss -5.125538182258606 test_loss: -5.256996154785156
epoch: 108 training_loss -5.200215311050415 test_loss: -5.228153228759766
epoch: 109 training_loss -5.233448061943054 test_loss: -5.300429153442383
epoch: 110 training_loss -5.230900692939758 test_loss: -5.116752243041992
epoch: 111 training_loss -5.277675580978394 test_loss: -5.157059097290039
epoch: 112 training_loss -5.179416770935059 test_loss: -5.253900146484375
epoch: 113 training_loss -5.254253339767456 test_loss: -5.294660186767578
epoch: 114 training_loss -5.26086046218872 test_loss: -5.318646240234375
epoch: 115 training_loss -5.225157928466797 test_loss: -5.312787246704102
epoch: 116 training_loss -5.271514353752136 test_loss: -5.329905319213867
epoch: 117 training_loss -5.281127882003784 test_loss: -5.2819572448730465
epoch: 118 training_loss -5.286134805679321 test_loss: -5.313907623291016
epoch: 119 training_loss -5.320161814689636 test_loss: -5.281693649291992
epoch: 120 training_loss -5.3123013782501225 test_loss: -5.352951431274414
epoch: 121 training_loss -5.334207696914673 test_loss: -5.393533325195312
epoch: 122 training_loss -5.336505661010742 test_loss: -5.3921630859375
epoch: 123 training_loss -5.323470516204834 test_loss: -5.235806655883789
epoch: 124 training_loss -5.326112112998962 test_loss: -5.382908630371094
epoch: 125 training_loss -5.296287198066711 test_loss: -5.317456436157227
epoch: 126 training_loss -5.335711946487427 test_loss: -5.307863998413086
epoch: 127 training_loss -5.34157961845398 test_loss: -5.456078720092774
epoch: 128 training_loss -5.360444002151489 test_loss: -5.423921966552735
epoch: 129 training_loss -5.369475688934326 test_loss: -5.376752471923828
epoch: 130 training_loss -5.3879933929443355 test_loss: -5.418080139160156
epoch: 131 training_loss -5.394420404434204 test_loss: -5.354197692871094
epoch: 132 training_loss -5.375058598518372 test_loss: -5.3708240509033205
epoch: 133 training_loss -5.370192656517029 test_loss: -5.420176315307617
epoch: 134 training_loss -5.402345690727234 test_loss: -5.488905715942383
epoch: 135 training_loss -5.432749195098877 test_loss: -5.516009521484375
epoch: 136 training_loss -5.383149876594543 test_loss: -5.479388427734375
epoch: 137 training_loss -5.450772476196289 test_loss: -5.279012680053711
epoch: 138 training_loss -5.361372623443604 test_loss: -5.52929573059082
epoch: 139 training_loss -5.483598170280456 test_loss: -5.468765640258789
epoch: 140 training_loss -5.471368670463562 test_loss: -5.533569717407227
epoch: 141 training_loss -5.423426990509033 test_loss: -5.336849594116211
epoch: 142 training_loss -5.450216636657715 test_loss: -5.571311950683594
epoch: 143 training_loss -5.4857665014266965 test_loss: -5.510702514648438
epoch: 144 training_loss -5.496008248329162 test_loss: -5.277346420288086
epoch: 145 training_loss -5.441022834777832 test_loss: -5.529162216186523
epoch: 146 training_loss -5.490674443244934 test_loss: -5.431257629394532
epoch: 147 training_loss -5.479997005462646 test_loss: -5.4927314758300785
epoch: 148 training_loss -5.4735810613632205 test_loss: -5.3299816131591795
epoch: 149 training_loss -5.502089648246765 test_loss: -5.5713237762451175
108.01224561229019
episode: 0 training return: tensor(-5.3171e+16, device='cuda:0')
episode: 1 training return: tensor(-4.5780e+17, device='cuda:0')
episode: 2 training return: tensor(-2.5480e+18, device='cuda:0')
episode: 3 training return: tensor(-3.4540e+17, device='cuda:0')
epoch: 1 test_true_pfm: -8.92114939708507
episode: 4 training return: tensor(-6.0442e+17, device='cuda:0')
episode: 5 training return: tensor(-7.0539e+15, device='cuda:0')
episode: 6 training return: tensor(-5.5190e+17, device='cuda:0')
episode: 7 training return: tensor(-6.0700e+11, device='cuda:0')
epoch: 2 test_true_pfm: -10.950505851813116
episode: 8 training return: tensor(-1.0123e+17, device='cuda:0')
episode: 9 training return: tensor(-6.0839e+10, device='cuda:0')
episode: 10 training return: tensor(-4.2256e+08, device='cuda:0')
episode: 11 training return: tensor(-6.7446e+08, device='cuda:0')
epoch: 3 test_true_pfm: -10.249227168950771
episode: 12 training return: tensor(-1.5324e+09, device='cuda:0')
episode: 13 training return: tensor(-7.2753e+08, device='cuda:0')
episode: 14 training return: tensor(-1.1259e+11, device='cuda:0')
episode: 15 training return: tensor(-1.3486e+09, device='cuda:0')
epoch: 4 test_true_pfm: -9.668962168896812
episode: 16 training return: tensor(-2.4158e+12, device='cuda:0')
episode: 17 training return: tensor(-1.1768e+09, device='cuda:0')
episode: 18 training return: tensor(-5.0236e+09, device='cuda:0')
episode: 19 training return: tensor(-1.2954e+09, device='cuda:0')
epoch: 5 test_true_pfm: -10.56465497106952
episode: 20 training return: tensor(-8.7032e+09, device='cuda:0')
episode: 21 training return: tensor(-5.2758e+10, device='cuda:0')
episode: 22 training return: tensor(-1.2514e+09, device='cuda:0')
episode: 23 training return: tensor(-5.7656e+10, device='cuda:0')
epoch: 6 test_true_pfm: -10.836580735506468
episode: 24 training return: tensor(-2.9034e+09, device='cuda:0')
episode: 25 training return: tensor(-1.8727e+09, device='cuda:0')
episode: 26 training return: tensor(-1.4274e+09, device='cuda:0')
episode: 27 training return: tensor(-1.0316e+10, device='cuda:0')
epoch: 7 test_true_pfm: -11.175529846615706
episode: 28 training return: tensor(-1.9022e+09, device='cuda:0')
episode: 29 training return: tensor(-5.7944e+08, device='cuda:0')
episode: 30 training return: tensor(-4.8407e+09, device='cuda:0')
episode: 31 training return: tensor(-2.4343e+10, device='cuda:0')
epoch: 8 test_true_pfm: -10.559228988197717
episode: 32 training return: tensor(-4.2683e+09, device='cuda:0')
episode: 33 training return: tensor(-8.3311e+08, device='cuda:0')
episode: 34 training return: tensor(-1.5645e+09, device='cuda:0')
episode: 35 training return: tensor(-5.6988e+09, device='cuda:0')
epoch: 9 test_true_pfm: -12.77401783564984
episode: 36 training return: tensor(-5.4525e+09, device='cuda:0')
episode: 37 training return: tensor(-3.2688e+09, device='cuda:0')
episode: 38 training return: tensor(-1.5112e+09, device='cuda:0')
episode: 39 training return: tensor(-2.2545e+12, device='cuda:0')
epoch: 10 test_true_pfm: -11.671372766928027
episode: 40 training return: tensor(-5.6394e+08, device='cuda:0')
episode: 41 training return: tensor(-1.5744e+11, device='cuda:0')
episode: 42 training return: tensor(-3.6734e+09, device='cuda:0')
episode: 43 training return: tensor(-4.4000e+10, device='cuda:0')
epoch: 11 test_true_pfm: -12.064882545305206
episode: 44 training return: tensor(-3.0553e+10, device='cuda:0')
episode: 45 training return: tensor(-1.0906e+11, device='cuda:0')
episode: 46 training return: tensor(-1.9404e+09, device='cuda:0')
episode: 47 training return: tensor(-1.6490e+09, device='cuda:0')
epoch: 12 test_true_pfm: -11.171806951495189
episode: 48 training return: tensor(-2.7754e+09, device='cuda:0')
episode: 49 training return: tensor(-2.1465e+08, device='cuda:0')
episode: 50 training return: tensor(-5.1712e+09, device='cuda:0')
episode: 51 training return: tensor(-6.0648e+09, device='cuda:0')
epoch: 13 test_true_pfm: -11.599700997793521
episode: 52 training return: tensor(-6.4452e+08, device='cuda:0')
episode: 53 training return: tensor(-9.5162e+08, device='cuda:0')
episode: 54 training return: tensor(-1.2198e+09, device='cuda:0')
episode: 55 training return: tensor(-4.5557e+08, device='cuda:0')
epoch: 14 test_true_pfm: -11.89626606859449
episode: 56 training return: tensor(-1.8914e+09, device='cuda:0')
episode: 57 training return: tensor(-2.8686e+09, device='cuda:0')
episode: 58 training return: tensor(-5.7149e+08, device='cuda:0')
episode: 59 training return: tensor(-5.8534e+10, device='cuda:0')
epoch: 15 test_true_pfm: -11.909832307039691
episode: 60 training return: tensor(-7.1837e+08, device='cuda:0')
episode: 61 training return: tensor(-2.8805e+11, device='cuda:0')
episode: 62 training return: tensor(-6.1296e+10, device='cuda:0')
episode: 63 training return: tensor(-2.7472e+09, device='cuda:0')
epoch: 16 test_true_pfm: -11.994258360935461
episode: 64 training return: tensor(-8.0126e+08, device='cuda:0')
episode: 65 training return: tensor(-1.3696e+09, device='cuda:0')
episode: 66 training return: tensor(-2.1078e+10, device='cuda:0')
episode: 67 training return: tensor(-2.8349e+09, device='cuda:0')
epoch: 17 test_true_pfm: -11.451341518749809
episode: 68 training return: tensor(-2.6050e+09, device='cuda:0')
episode: 69 training return: tensor(-4.8393e+09, device='cuda:0')
episode: 70 training return: tensor(-2.1967e+10, device='cuda:0')
episode: 71 training return: tensor(-3.6735e+09, device='cuda:0')
epoch: 18 test_true_pfm: -11.436440750639237
episode: 72 training return: tensor(-1.3518e+09, device='cuda:0')
episode: 73 training return: tensor(-1.6416e+09, device='cuda:0')
episode: 74 training return: tensor(-3.9655e+09, device='cuda:0')
episode: 75 training return: tensor(-2.9628e+09, device='cuda:0')
epoch: 19 test_true_pfm: -12.841560174779
episode: 76 training return: tensor(-1.1760e+09, device='cuda:0')
episode: 77 training return: tensor(-3.7825e+09, device='cuda:0')
episode: 78 training return: tensor(-1.0732e+10, device='cuda:0')
episode: 79 training return: tensor(-1.4356e+09, device='cuda:0')
epoch: 20 test_true_pfm: -11.033038420352415
episode: 80 training return: tensor(-2.6365e+09, device='cuda:0')
episode: 81 training return: tensor(-1.9354e+09, device='cuda:0')
episode: 82 training return: tensor(-1.7856e+09, device='cuda:0')
episode: 83 training return: tensor(-1.2492e+09, device='cuda:0')
epoch: 21 test_true_pfm: -12.205167547827774
episode: 84 training return: tensor(-1.7018e+10, device='cuda:0')
episode: 85 training return: tensor(-2.2265e+09, device='cuda:0')
episode: 86 training return: tensor(-4.3119e+09, device='cuda:0')
episode: 87 training return: tensor(-1.0136e+10, device='cuda:0')
epoch: 22 test_true_pfm: -12.075615424654128
episode: 88 training return: tensor(-1.6460e+09, device='cuda:0')
episode: 89 training return: tensor(-1.0791e+09, device='cuda:0')
episode: 90 training return: tensor(-1.5149e+09, device='cuda:0')
episode: 91 training return: tensor(-7.1410e+08, device='cuda:0')
epoch: 23 test_true_pfm: -12.309591946915258
episode: 92 training return: tensor(-1.7797e+09, device='cuda:0')
episode: 93 training return: tensor(-5.3989e+08, device='cuda:0')
episode: 94 training return: tensor(-1.3924e+09, device='cuda:0')
episode: 95 training return: tensor(-5.8749e+08, device='cuda:0')
epoch: 24 test_true_pfm: -11.341900626682943
episode: 96 training return: tensor(-9.1196e+08, device='cuda:0')
episode: 97 training return: tensor(-1.8052e+09, device='cuda:0')
episode: 98 training return: tensor(-1.6685e+09, device='cuda:0')
episode: 99 training return: tensor(-6.3312e+09, device='cuda:0')
epoch: 25 test_true_pfm: -10.832614340875796
episode: 100 training return: tensor(-1.0367e+09, device='cuda:0')
episode: 101 training return: tensor(-1.8056e+09, device='cuda:0')
episode: 102 training return: tensor(-1.8812e+10, device='cuda:0')
episode: 103 training return: tensor(-5.7635e+11, device='cuda:0')
epoch: 26 test_true_pfm: -13.234151142384096
episode: 104 training return: tensor(-3.1849e+09, device='cuda:0')
episode: 105 training return: tensor(-1.1957e+09, device='cuda:0')
episode: 106 training return: tensor(-5.8960e+08, device='cuda:0')
episode: 107 training return: tensor(-7.4871e+09, device='cuda:0')
epoch: 27 test_true_pfm: -12.008817308899205
episode: 108 training return: tensor(-1.5009e+09, device='cuda:0')
episode: 109 training return: tensor(-5.0862e+09, device='cuda:0')
episode: 110 training return: tensor(-3.2104e+09, device='cuda:0')
episode: 111 training return: tensor(-1.1630e+09, device='cuda:0')
epoch: 28 test_true_pfm: -11.768165666550786
episode: 112 training return: tensor(-4.7763e+09, device='cuda:0')
episode: 113 training return: tensor(-5.9097e+09, device='cuda:0')
episode: 114 training return: tensor(-3.7912e+08, device='cuda:0')
episode: 115 training return: tensor(-1.9341e+09, device='cuda:0')
epoch: 29 test_true_pfm: -12.977362253988247
episode: 116 training return: tensor(-3.2808e+10, device='cuda:0')
episode: 117 training return: tensor(-1.0480e+11, device='cuda:0')
episode: 118 training return: tensor(-1.2909e+09, device='cuda:0')
episode: 119 training return: tensor(-8.8899e+08, device='cuda:0')
epoch: 30 test_true_pfm: -12.007130230352873
episode: 120 training return: tensor(-2.2938e+09, device='cuda:0')
episode: 121 training return: tensor(-1.5185e+11, device='cuda:0')
episode: 122 training return: tensor(-4.8466e+08, device='cuda:0')
episode: 123 training return: tensor(-5.3861e+08, device='cuda:0')
epoch: 31 test_true_pfm: -11.912046107916924
episode: 124 training return: tensor(-7.9478e+11, device='cuda:0')
episode: 125 training return: tensor(-3.8160e+10, device='cuda:0')
episode: 126 training return: tensor(-4.4653e+10, device='cuda:0')
episode: 127 training return: tensor(-3.5531e+10, device='cuda:0')
epoch: 32 test_true_pfm: -11.993676404846022
episode: 128 training return: tensor(-8.0294e+08, device='cuda:0')
episode: 129 training return: tensor(-4.5903e+08, device='cuda:0')
episode: 130 training return: tensor(-7.3100e+10, device='cuda:0')
episode: 131 training return: tensor(-1.3128e+09, device='cuda:0')
epoch: 33 test_true_pfm: -10.629959674695822
episode: 132 training return: tensor(-2.9441e+11, device='cuda:0')
episode: 133 training return: tensor(-3.6503e+08, device='cuda:0')
episode: 134 training return: tensor(-7.9654e+08, device='cuda:0')
episode: 135 training return: tensor(-1.0053e+09, device='cuda:0')
epoch: 34 test_true_pfm: -11.919993108630893
episode: 136 training return: tensor(-1.8054e+09, device='cuda:0')
episode: 137 training return: tensor(-8.0034e+08, device='cuda:0')
episode: 138 training return: tensor(-8.0830e+08, device='cuda:0')
episode: 139 training return: tensor(-7.9103e+08, device='cuda:0')
epoch: 35 test_true_pfm: -11.989261798257228
episode: 140 training return: tensor(-1.8097e+10, device='cuda:0')
episode: 141 training return: tensor(-1.5580e+09, device='cuda:0')
episode: 142 training return: tensor(-6.7716e+08, device='cuda:0')
episode: 143 training return: tensor(-3.5012e+10, device='cuda:0')
epoch: 36 test_true_pfm: -12.445030994512727
episode: 144 training return: tensor(-4.4913e+09, device='cuda:0')
episode: 145 training return: tensor(-1.2925e+09, device='cuda:0')
episode: 146 training return: tensor(-3.1231e+09, device='cuda:0')
episode: 147 training return: tensor(-2.8348e+09, device='cuda:0')
epoch: 37 test_true_pfm: -11.250133015326357
episode: 148 training return: tensor(-5.7844e+09, device='cuda:0')
episode: 149 training return: tensor(-3.2274e+09, device='cuda:0')
episode: 150 training return: tensor(-3.6759e+09, device='cuda:0')
episode: 151 training return: tensor(-1.4981e+09, device='cuda:0')
epoch: 38 test_true_pfm: -12.309582269339746
episode: 152 training return: tensor(-3.8199e+09, device='cuda:0')
episode: 153 training return: tensor(-1.4385e+09, device='cuda:0')
episode: 154 training return: tensor(-5.6159e+09, device='cuda:0')
episode: 155 training return: tensor(-8.3421e+08, device='cuda:0')
epoch: 39 test_true_pfm: -12.177202431910672
episode: 156 training return: tensor(-6.0424e+09, device='cuda:0')
episode: 157 training return: tensor(-7.8521e+09, device='cuda:0')
episode: 158 training return: tensor(-8.6988e+08, device='cuda:0')
episode: 159 training return: tensor(-4.3336e+10, device='cuda:0')
epoch: 40 test_true_pfm: -11.941719514615281
episode: 160 training return: tensor(-4.6164e+08, device='cuda:0')
episode: 161 training return: tensor(-1.6546e+09, device='cuda:0')
episode: 162 training return: tensor(-6.1214e+11, device='cuda:0')
episode: 163 training return: tensor(-3.6982e+09, device='cuda:0')
epoch: 41 test_true_pfm: -12.810509853689108
episode: 164 training return: tensor(-4.7383e+10, device='cuda:0')
episode: 165 training return: tensor(-2.1842e+09, device='cuda:0')
episode: 166 training return: tensor(-6.1920e+08, device='cuda:0')
episode: 167 training return: tensor(-4.1777e+09, device='cuda:0')
epoch: 42 test_true_pfm: -12.205453274964452
episode: 168 training return: tensor(-8.8279e+08, device='cuda:0')
episode: 169 training return: tensor(-1.7228e+09, device='cuda:0')
episode: 170 training return: tensor(-1.9050e+09, device='cuda:0')
episode: 171 training return: tensor(-1.6710e+09, device='cuda:0')
epoch: 43 test_true_pfm: -11.731033631705653
episode: 172 training return: tensor(-5.5876e+08, device='cuda:0')
episode: 173 training return: tensor(-2.1171e+09, device='cuda:0')
episode: 174 training return: tensor(-1.6348e+09, device='cuda:0')
episode: 175 training return: tensor(-6.6817e+08, device='cuda:0')
epoch: 44 test_true_pfm: -11.854495925163425
episode: 176 training return: tensor(-1.2481e+09, device='cuda:0')
episode: 177 training return: tensor(-1.9346e+09, device='cuda:0')
episode: 178 training return: tensor(-3.8282e+08, device='cuda:0')
episode: 179 training return: tensor(-7.5914e+08, device='cuda:0')
epoch: 45 test_true_pfm: -12.018877760873433
episode: 180 training return: tensor(-1.1768e+10, device='cuda:0')
episode: 181 training return: tensor(-3.2160e+10, device='cuda:0')
episode: 182 training return: tensor(-6.6315e+08, device='cuda:0')
episode: 183 training return: tensor(-1.1102e+12, device='cuda:0')
epoch: 46 test_true_pfm: -11.334192491025679
episode: 184 training return: tensor(-5.6700e+09, device='cuda:0')
episode: 185 training return: tensor(-6.1285e+10, device='cuda:0')
episode: 186 training return: tensor(-3.9964e+09, device='cuda:0')
episode: 187 training return: tensor(-1.2692e+09, device='cuda:0')
epoch: 47 test_true_pfm: -12.011140914883036
episode: 188 training return: tensor(-1.0535e+09, device='cuda:0')
episode: 189 training return: tensor(-2.8728e+09, device='cuda:0')
episode: 190 training return: tensor(-1.2808e+09, device='cuda:0')
episode: 191 training return: tensor(-3.5227e+11, device='cuda:0')
epoch: 48 test_true_pfm: -11.677510106191395
episode: 192 training return: tensor(-1.5328e+09, device='cuda:0')
episode: 193 training return: tensor(-4.0489e+09, device='cuda:0')
episode: 194 training return: tensor(-1.9842e+09, device='cuda:0')
episode: 195 training return: tensor(-2.4652e+09, device='cuda:0')
epoch: 49 test_true_pfm: -12.484634701754477
episode: 196 training return: tensor(-7.2254e+10, device='cuda:0')
episode: 197 training return: tensor(-4.8805e+09, device='cuda:0')
episode: 198 training return: tensor(-9.0282e+09, device='cuda:0')
episode: 199 training return: tensor(-1.8647e+09, device='cuda:0')
epoch: 50 test_true_pfm: -10.81555108127296
episode: 200 training return: tensor(-4.2130e+08, device='cuda:0')
episode: 201 training return: tensor(-2.9982e+11, device='cuda:0')
episode: 202 training return: tensor(-2.3692e+09, device='cuda:0')
episode: 203 training return: tensor(-1.1561e+09, device='cuda:0')
epoch: 51 test_true_pfm: -12.136063210993424
episode: 204 training return: tensor(-8.4750e+09, device='cuda:0')
episode: 205 training return: tensor(-4.5044e+09, device='cuda:0')
episode: 206 training return: tensor(-1.9726e+09, device='cuda:0')
episode: 207 training return: tensor(-2.7195e+09, device='cuda:0')
epoch: 52 test_true_pfm: -10.8203218734804
episode: 208 training return: tensor(-7.1983e+08, device='cuda:0')
episode: 209 training return: tensor(-6.8854e+09, device='cuda:0')
episode: 210 training return: tensor(-1.2630e+09, device='cuda:0')
episode: 211 training return: tensor(-9.2116e+10, device='cuda:0')
epoch: 53 test_true_pfm: -11.031378251365354
episode: 212 training return: tensor(-2.5759e+10, device='cuda:0')
episode: 213 training return: tensor(-8.0145e+08, device='cuda:0')
episode: 214 training return: tensor(-1.3635e+09, device='cuda:0')
episode: 215 training return: tensor(-1.4852e+09, device='cuda:0')
epoch: 54 test_true_pfm: -11.779321149920532
episode: 216 training return: tensor(-3.9097e+11, device='cuda:0')
episode: 217 training return: tensor(-2.1576e+09, device='cuda:0')
episode: 218 training return: tensor(-4.9415e+08, device='cuda:0')
episode: 219 training return: tensor(-1.4388e+09, device='cuda:0')
epoch: 55 test_true_pfm: -12.268030511981468
episode: 220 training return: tensor(-7.8862e+08, device='cuda:0')
episode: 221 training return: tensor(-3.1940e+08, device='cuda:0')
episode: 222 training return: tensor(-3.5142e+09, device='cuda:0')
episode: 223 training return: tensor(-7.0239e+08, device='cuda:0')
epoch: 56 test_true_pfm: -11.664706384474012
episode: 224 training return: tensor(-1.1093e+09, device='cuda:0')
episode: 225 training return: tensor(-1.4322e+10, device='cuda:0')
episode: 226 training return: tensor(-2.5941e+10, device='cuda:0')
episode: 227 training return: tensor(-8.5860e+08, device='cuda:0')
epoch: 57 test_true_pfm: -11.29010683694386
episode: 228 training return: tensor(-1.6049e+09, device='cuda:0')
episode: 229 training return: tensor(-2.1651e+11, device='cuda:0')
episode: 230 training return: tensor(-4.9493e+09, device='cuda:0')
episode: 231 training return: tensor(-4.4952e+09, device='cuda:0')
epoch: 58 test_true_pfm: -12.433053859143252
episode: 232 training return: tensor(-3.0971e+09, device='cuda:0')
episode: 233 training return: tensor(-1.0629e+09, device='cuda:0')
episode: 234 training return: tensor(-1.7597e+09, device='cuda:0')
episode: 235 training return: tensor(-3.0134e+09, device='cuda:0')
epoch: 59 test_true_pfm: -11.789721199538281
episode: 236 training return: tensor(-2.9407e+11, device='cuda:0')
episode: 237 training return: tensor(-5.9383e+09, device='cuda:0')
episode: 238 training return: tensor(-1.5707e+09, device='cuda:0')
episode: 239 training return: tensor(-1.7742e+09, device='cuda:0')
epoch: 60 test_true_pfm: -12.180811691405573
episode: 240 training return: tensor(-8.6195e+09, device='cuda:0')
episode: 241 training return: tensor(-3.8971e+09, device='cuda:0')
episode: 242 training return: tensor(-1.1672e+10, device='cuda:0')
episode: 243 training return: tensor(-7.2877e+08, device='cuda:0')
epoch: 61 test_true_pfm: -12.230524291147322
episode: 244 training return: tensor(-9.4895e+08, device='cuda:0')
episode: 245 training return: tensor(-9.9715e+08, device='cuda:0')
episode: 246 training return: tensor(-5.7533e+08, device='cuda:0')
episode: 247 training return: tensor(-1.4780e+09, device='cuda:0')
epoch: 62 test_true_pfm: -10.77784659133218
episode: 248 training return: tensor(-1.8983e+09, device='cuda:0')
episode: 249 training return: tensor(-6.4362e+08, device='cuda:0')
episode: 250 training return: tensor(-1.1851e+09, device='cuda:0')
episode: 251 training return: tensor(-8.6347e+09, device='cuda:0')
epoch: 63 test_true_pfm: -12.267903900274778
episode: 252 training return: tensor(-1.2922e+09, device='cuda:0')
episode: 253 training return: tensor(-5.1886e+08, device='cuda:0')
episode: 254 training return: tensor(-2.0746e+09, device='cuda:0')
episode: 255 training return: tensor(-6.5921e+09, device='cuda:0')
epoch: 64 test_true_pfm: -12.87091584562987
episode: 256 training return: tensor(-3.0277e+09, device='cuda:0')
episode: 257 training return: tensor(-7.0726e+08, device='cuda:0')
episode: 258 training return: tensor(-8.9491e+08, device='cuda:0')
episode: 259 training return: tensor(-2.4642e+09, device='cuda:0')
epoch: 65 test_true_pfm: -10.168057194688718
episode: 260 training return: tensor(-1.0012e+09, device='cuda:0')
episode: 261 training return: tensor(-5.7342e+09, device='cuda:0')
episode: 262 training return: tensor(-1.5185e+09, device='cuda:0')
episode: 263 training return: tensor(-3.1400e+09, device='cuda:0')
epoch: 66 test_true_pfm: -13.019776527683456
episode: 264 training return: tensor(-6.5746e+08, device='cuda:0')
episode: 265 training return: tensor(-3.3913e+10, device='cuda:0')
episode: 266 training return: tensor(-1.3365e+09, device='cuda:0')
episode: 267 training return: tensor(-6.4476e+09, device='cuda:0')
epoch: 67 test_true_pfm: -11.325939434868562
episode: 268 training return: tensor(-3.4708e+09, device='cuda:0')
episode: 269 training return: tensor(-8.6571e+08, device='cuda:0')
episode: 270 training return: tensor(-1.5365e+09, device='cuda:0')
episode: 271 training return: tensor(-2.5507e+09, device='cuda:0')
epoch: 68 test_true_pfm: -11.523076771885174
episode: 272 training return: tensor(-4.1077e+11, device='cuda:0')
episode: 273 training return: tensor(-1.6463e+09, device='cuda:0')
episode: 274 training return: tensor(-6.8914e+08, device='cuda:0')
episode: 275 training return: tensor(-4.6589e+08, device='cuda:0')
epoch: 69 test_true_pfm: -11.786718882326648
episode: 276 training return: tensor(-1.6273e+10, device='cuda:0')
episode: 277 training return: tensor(-9.8671e+08, device='cuda:0')
episode: 278 training return: tensor(-8.2323e+08, device='cuda:0')
episode: 279 training return: tensor(-2.8498e+09, device='cuda:0')
epoch: 70 test_true_pfm: -11.23649838475992
episode: 280 training return: tensor(-2.0525e+10, device='cuda:0')
episode: 281 training return: tensor(-9.9373e+09, device='cuda:0')
episode: 282 training return: tensor(-2.8639e+09, device='cuda:0')
episode: 283 training return: tensor(-1.0201e+12, device='cuda:0')
epoch: 71 test_true_pfm: -12.892325066563874
episode: 284 training return: tensor(-1.0263e+10, device='cuda:0')
episode: 285 training return: tensor(-1.6770e+09, device='cuda:0')
episode: 286 training return: tensor(-1.8179e+09, device='cuda:0')
episode: 287 training return: tensor(-6.4336e+08, device='cuda:0')
epoch: 72 test_true_pfm: -11.807989330241947
episode: 288 training return: tensor(-2.1439e+10, device='cuda:0')
episode: 289 training return: tensor(-2.8001e+08, device='cuda:0')
episode: 290 training return: tensor(-2.9426e+09, device='cuda:0')
episode: 291 training return: tensor(-7.7658e+09, device='cuda:0')
epoch: 73 test_true_pfm: -11.452284981368617
episode: 292 training return: tensor(-1.8996e+09, device='cuda:0')
episode: 293 training return: tensor(-7.1309e+08, device='cuda:0')
episode: 294 training return: tensor(-3.5476e+09, device='cuda:0')
episode: 295 training return: tensor(-1.2862e+09, device='cuda:0')
epoch: 74 test_true_pfm: -11.474876551645707
episode: 296 training return: tensor(-7.4888e+08, device='cuda:0')
episode: 297 training return: tensor(-7.8848e+09, device='cuda:0')
episode: 298 training return: tensor(-1.0249e+09, device='cuda:0')
episode: 299 training return: tensor(-1.6149e+09, device='cuda:0')
epoch: 75 test_true_pfm: -12.675055154517974
episode: 300 training return: tensor(-2.3094e+09, device='cuda:0')
episode: 301 training return: tensor(-3.9189e+09, device='cuda:0')
episode: 302 training return: tensor(-7.6980e+08, device='cuda:0')
episode: 303 training return: tensor(-3.4047e+09, device='cuda:0')
epoch: 76 test_true_pfm: -12.176292616622991
episode: 304 training return: tensor(-1.1143e+10, device='cuda:0')
episode: 305 training return: tensor(-1.2202e+09, device='cuda:0')
episode: 306 training return: tensor(-2.9901e+11, device='cuda:0')
episode: 307 training return: tensor(-1.1425e+10, device='cuda:0')
epoch: 77 test_true_pfm: -10.77303098450684
episode: 308 training return: tensor(-7.2927e+08, device='cuda:0')
episode: 309 training return: tensor(-6.2037e+08, device='cuda:0')
episode: 310 training return: tensor(-2.1053e+09, device='cuda:0')
episode: 311 training return: tensor(-1.7929e+09, device='cuda:0')
epoch: 78 test_true_pfm: -11.66424731243943
episode: 312 training return: tensor(-1.0069e+09, device='cuda:0')
episode: 313 training return: tensor(-4.4766e+09, device='cuda:0')
episode: 314 training return: tensor(-7.1080e+09, device='cuda:0')
episode: 315 training return: tensor(-3.3731e+09, device='cuda:0')
epoch: 79 test_true_pfm: -12.112921394999816
episode: 316 training return: tensor(-7.4642e+10, device='cuda:0')
episode: 317 training return: tensor(-2.1348e+09, device='cuda:0')
episode: 318 training return: tensor(-7.4149e+09, device='cuda:0')
episode: 319 training return: tensor(-8.2564e+10, device='cuda:0')
epoch: 80 test_true_pfm: -11.832714020339287
episode: 320 training return: tensor(-4.3470e+09, device='cuda:0')
episode: 321 training return: tensor(-7.6594e+08, device='cuda:0')
episode: 322 training return: tensor(-9.5370e+08, device='cuda:0')
episode: 323 training return: tensor(-4.2188e+09, device='cuda:0')
epoch: 81 test_true_pfm: -11.203125003728019
episode: 324 training return: tensor(-3.1958e+09, device='cuda:0')
episode: 325 training return: tensor(-2.1110e+11, device='cuda:0')
episode: 326 training return: tensor(-4.3937e+09, device='cuda:0')
episode: 327 training return: tensor(-2.8814e+09, device='cuda:0')
epoch: 82 test_true_pfm: -11.799760107398345
episode: 328 training return: tensor(-1.2886e+09, device='cuda:0')
episode: 329 training return: tensor(-2.1332e+09, device='cuda:0')
episode: 330 training return: tensor(-7.0550e+08, device='cuda:0')
episode: 331 training return: tensor(-7.2784e+10, device='cuda:0')
epoch: 83 test_true_pfm: -11.944743928379784
episode: 332 training return: tensor(-6.0221e+08, device='cuda:0')
episode: 333 training return: tensor(-3.7885e+11, device='cuda:0')
episode: 334 training return: tensor(-6.1488e+08, device='cuda:0')
episode: 335 training return: tensor(-9.2150e+08, device='cuda:0')
epoch: 84 test_true_pfm: -12.784157144716287
episode: 336 training return: tensor(-6.6536e+08, device='cuda:0')
episode: 337 training return: tensor(-4.8761e+11, device='cuda:0')
episode: 338 training return: tensor(-2.1751e+09, device='cuda:0')
episode: 339 training return: tensor(-1.9139e+11, device='cuda:0')
epoch: 85 test_true_pfm: -11.382967457551594
episode: 340 training return: tensor(-3.2570e+10, device='cuda:0')
episode: 341 training return: tensor(-2.5302e+09, device='cuda:0')
episode: 342 training return: tensor(-1.1029e+09, device='cuda:0')
episode: 343 training return: tensor(-3.2474e+10, device='cuda:0')
epoch: 86 test_true_pfm: -12.500674958943698
episode: 344 training return: tensor(-2.3455e+09, device='cuda:0')
episode: 345 training return: tensor(-4.4983e+09, device='cuda:0')
episode: 346 training return: tensor(-2.5024e+11, device='cuda:0')
episode: 347 training return: tensor(-1.4702e+09, device='cuda:0')
epoch: 87 test_true_pfm: -12.254859521381118
episode: 348 training return: tensor(-2.3457e+09, device='cuda:0')
episode: 349 training return: tensor(-6.3403e+08, device='cuda:0')
episode: 350 training return: tensor(-5.9548e+08, device='cuda:0')
episode: 351 training return: tensor(-2.0050e+11, device='cuda:0')
epoch: 88 test_true_pfm: -12.38533374835978
episode: 352 training return: tensor(-1.9222e+10, device='cuda:0')
episode: 353 training return: tensor(-8.9679e+08, device='cuda:0')
episode: 354 training return: tensor(-1.8906e+12, device='cuda:0')
episode: 355 training return: tensor(-1.1642e+10, device='cuda:0')
epoch: 89 test_true_pfm: -11.67851008946067
episode: 356 training return: tensor(-2.3239e+09, device='cuda:0')
episode: 357 training return: tensor(-3.4284e+11, device='cuda:0')
episode: 358 training return: tensor(-1.1664e+09, device='cuda:0')
episode: 359 training return: tensor(-3.6718e+09, device='cuda:0')
epoch: 90 test_true_pfm: -11.47604031787709
episode: 360 training return: tensor(-3.2568e+10, device='cuda:0')
episode: 361 training return: tensor(-5.1275e+09, device='cuda:0')
episode: 362 training return: tensor(-1.1026e+09, device='cuda:0')
episode: 363 training return: tensor(-7.3697e+08, device='cuda:0')
epoch: 91 test_true_pfm: -11.64427842923335
episode: 364 training return: tensor(-1.9551e+09, device='cuda:0')
episode: 365 training return: tensor(-1.3925e+09, device='cuda:0')
episode: 366 training return: tensor(-5.9888e+08, device='cuda:0')
episode: 367 training return: tensor(-2.8695e+09, device='cuda:0')
epoch: 92 test_true_pfm: -12.148262809055158
episode: 368 training return: tensor(-3.9469e+09, device='cuda:0')
episode: 369 training return: tensor(-1.5451e+11, device='cuda:0')
episode: 370 training return: tensor(-1.1412e+09, device='cuda:0')
episode: 371 training return: tensor(-4.3185e+09, device='cuda:0')
epoch: 93 test_true_pfm: -12.592627580696018
episode: 372 training return: tensor(-1.0719e+09, device='cuda:0')
episode: 373 training return: tensor(-1.1274e+09, device='cuda:0')
episode: 374 training return: tensor(-8.6889e+09, device='cuda:0')
episode: 375 training return: tensor(-1.0318e+10, device='cuda:0')
epoch: 94 test_true_pfm: -12.325074846144549
episode: 376 training return: tensor(-3.1049e+10, device='cuda:0')
episode: 377 training return: tensor(-1.4622e+11, device='cuda:0')
episode: 378 training return: tensor(-4.9367e+09, device='cuda:0')
episode: 379 training return: tensor(-3.8037e+09, device='cuda:0')
epoch: 95 test_true_pfm: -13.38264424951935
episode: 380 training return: tensor(-9.1302e+08, device='cuda:0')
episode: 381 training return: tensor(-2.4221e+09, device='cuda:0')
episode: 382 training return: tensor(-1.1804e+09, device='cuda:0')
episode: 383 training return: tensor(-1.8184e+09, device='cuda:0')
epoch: 96 test_true_pfm: -12.817578119454751
episode: 384 training return: tensor(-3.8847e+09, device='cuda:0')
episode: 385 training return: tensor(-2.1582e+09, device='cuda:0')
episode: 386 training return: tensor(-6.8928e+09, device='cuda:0')
episode: 387 training return: tensor(-5.3618e+09, device='cuda:0')
epoch: 97 test_true_pfm: -12.00368680246235
episode: 388 training return: tensor(-1.6743e+10, device='cuda:0')
episode: 389 training return: tensor(-5.0300e+09, device='cuda:0')
episode: 390 training return: tensor(-2.1749e+10, device='cuda:0')
episode: 391 training return: tensor(-9.6545e+08, device='cuda:0')
epoch: 98 test_true_pfm: -10.33063915299619
episode: 392 training return: tensor(-4.8152e+08, device='cuda:0')
episode: 393 training return: tensor(-8.2442e+10, device='cuda:0')
episode: 394 training return: tensor(-1.8720e+09, device='cuda:0')
episode: 395 training return: tensor(-1.3462e+09, device='cuda:0')
epoch: 99 test_true_pfm: -12.083286394269006
episode: 396 training return: tensor(-1.2102e+09, device='cuda:0')
episode: 397 training return: tensor(-6.8320e+09, device='cuda:0')
episode: 398 training return: tensor(-3.1844e+09, device='cuda:0')
episode: 399 training return: tensor(-4.0962e+11, device='cuda:0')
epoch: 100 test_true_pfm: -12.186954868659353
episode: 400 training return: tensor(-1.2143e+09, device='cuda:0')
episode: 401 training return: tensor(-3.7571e+09, device='cuda:0')
episode: 402 training return: tensor(-1.5134e+09, device='cuda:0')
episode: 403 training return: tensor(-3.3112e+11, device='cuda:0')
epoch: 101 test_true_pfm: -11.803058761052002
episode: 404 training return: tensor(-1.0672e+09, device='cuda:0')
episode: 405 training return: tensor(-3.7407e+11, device='cuda:0')
episode: 406 training return: tensor(-1.9595e+09, device='cuda:0')
episode: 407 training return: tensor(-9.6501e+08, device='cuda:0')
epoch: 102 test_true_pfm: -11.856004732832144
episode: 408 training return: tensor(-2.6757e+10, device='cuda:0')
episode: 409 training return: tensor(-4.1450e+09, device='cuda:0')
episode: 410 training return: tensor(-8.1824e+08, device='cuda:0')
episode: 411 training return: tensor(-1.4457e+11, device='cuda:0')
epoch: 103 test_true_pfm: -12.046540044775655
episode: 412 training return: tensor(-7.4858e+09, device='cuda:0')
episode: 413 training return: tensor(-2.6982e+09, device='cuda:0')
episode: 414 training return: tensor(-6.5536e+08, device='cuda:0')
episode: 415 training return: tensor(-1.2473e+11, device='cuda:0')
epoch: 104 test_true_pfm: -12.480202962652896
episode: 416 training return: tensor(-1.2959e+11, device='cuda:0')
episode: 417 training return: tensor(-7.4322e+08, device='cuda:0')
episode: 418 training return: tensor(-2.5490e+09, device='cuda:0')
episode: 419 training return: tensor(-6.0952e+08, device='cuda:0')
epoch: 105 test_true_pfm: -11.369653227033941
episode: 420 training return: tensor(-7.3855e+08, device='cuda:0')
episode: 421 training return: tensor(-2.3915e+10, device='cuda:0')
episode: 422 training return: tensor(-1.1450e+09, device='cuda:0')
episode: 423 training return: tensor(-1.8133e+09, device='cuda:0')
epoch: 106 test_true_pfm: -12.260471117723444
episode: 424 training return: tensor(-1.7380e+09, device='cuda:0')
episode: 425 training return: tensor(-3.6393e+09, device='cuda:0')
episode: 426 training return: tensor(-1.2289e+10, device='cuda:0')
episode: 427 training return: tensor(-8.6214e+08, device='cuda:0')
epoch: 107 test_true_pfm: -11.95977108045103
episode: 428 training return: tensor(-2.2971e+10, device='cuda:0')
episode: 429 training return: tensor(-9.6289e+08, device='cuda:0')
episode: 430 training return: tensor(-6.3133e+08, device='cuda:0')
episode: 431 training return: tensor(-1.8348e+10, device='cuda:0')
epoch: 108 test_true_pfm: -11.467519747205621
episode: 432 training return: tensor(-6.4037e+10, device='cuda:0')
episode: 433 training return: tensor(-1.1642e+09, device='cuda:0')
episode: 434 training return: tensor(-9.5604e+09, device='cuda:0')
episode: 435 training return: tensor(-1.8371e+09, device='cuda:0')
epoch: 109 test_true_pfm: -11.093020477303792
episode: 436 training return: tensor(-3.1817e+11, device='cuda:0')
episode: 437 training return: tensor(-1.0662e+10, device='cuda:0')
episode: 438 training return: tensor(-7.3148e+10, device='cuda:0')
episode: 439 training return: tensor(-9.6844e+08, device='cuda:0')
epoch: 110 test_true_pfm: -11.604956040777608
episode: 440 training return: tensor(-1.0300e+10, device='cuda:0')
episode: 441 training return: tensor(-7.0878e+08, device='cuda:0')
episode: 442 training return: tensor(-1.2717e+09, device='cuda:0')
episode: 443 training return: tensor(-1.6121e+11, device='cuda:0')
epoch: 111 test_true_pfm: -12.307494091157563
episode: 444 training return: tensor(-9.2114e+09, device='cuda:0')
episode: 445 training return: tensor(-4.6838e+08, device='cuda:0')
episode: 446 training return: tensor(-3.9904e+11, device='cuda:0')
episode: 447 training return: tensor(-1.9479e+09, device='cuda:0')
epoch: 112 test_true_pfm: -11.444391617619548
episode: 448 training return: tensor(-3.4305e+08, device='cuda:0')
episode: 449 training return: tensor(-8.5687e+08, device='cuda:0')
episode: 450 training return: tensor(-7.4475e+08, device='cuda:0')
episode: 451 training return: tensor(-3.4304e+10, device='cuda:0')
epoch: 113 test_true_pfm: -10.92751210339189
episode: 452 training return: tensor(-1.1576e+09, device='cuda:0')
episode: 453 training return: tensor(-9.2930e+08, device='cuda:0')
episode: 454 training return: tensor(-1.7962e+09, device='cuda:0')
episode: 455 training return: tensor(-1.8382e+10, device='cuda:0')
epoch: 114 test_true_pfm: -12.261791024810961
episode: 456 training return: tensor(-2.3348e+09, device='cuda:0')
episode: 457 training return: tensor(-1.9930e+09, device='cuda:0')
episode: 458 training return: tensor(-7.3955e+08, device='cuda:0')
episode: 459 training return: tensor(-3.1462e+08, device='cuda:0')
epoch: 115 test_true_pfm: -11.246105712065306
episode: 460 training return: tensor(-1.4955e+11, device='cuda:0')
episode: 461 training return: tensor(-4.3900e+09, device='cuda:0')
episode: 462 training return: tensor(-1.2657e+09, device='cuda:0')
episode: 463 training return: tensor(-1.0869e+09, device='cuda:0')
epoch: 116 test_true_pfm: -12.111472505877607
episode: 464 training return: tensor(-3.2468e+10, device='cuda:0')
episode: 465 training return: tensor(-3.0663e+09, device='cuda:0')
episode: 466 training return: tensor(-3.3194e+09, device='cuda:0')
episode: 467 training return: tensor(-2.0438e+09, device='cuda:0')
epoch: 117 test_true_pfm: -12.005903348155568
episode: 468 training return: tensor(-9.3188e+09, device='cuda:0')
episode: 469 training return: tensor(-2.2490e+09, device='cuda:0')
episode: 470 training return: tensor(-2.3876e+11, device='cuda:0')
episode: 471 training return: tensor(-2.2484e+11, device='cuda:0')
epoch: 118 test_true_pfm: -11.616679642197479
episode: 472 training return: tensor(-1.0979e+09, device='cuda:0')
episode: 473 training return: tensor(-4.9096e+09, device='cuda:0')
episode: 474 training return: tensor(-1.8958e+09, device='cuda:0')
episode: 475 training return: tensor(-2.5016e+11, device='cuda:0')
epoch: 119 test_true_pfm: -11.64110749277522
episode: 476 training return: tensor(-2.1621e+09, device='cuda:0')
episode: 477 training return: tensor(-8.0798e+09, device='cuda:0')
episode: 478 training return: tensor(-2.9945e+09, device='cuda:0')
episode: 479 training return: tensor(-6.2524e+08, device='cuda:0')
epoch: 120 test_true_pfm: -12.719644107454505
episode: 480 training return: tensor(-9.7985e+08, device='cuda:0')
episode: 481 training return: tensor(-1.1822e+11, device='cuda:0')
episode: 482 training return: tensor(-3.1375e+09, device='cuda:0')
episode: 483 training return: tensor(-4.5751e+08, device='cuda:0')
epoch: 121 test_true_pfm: -11.987207534510343
episode: 484 training return: tensor(-7.9189e+10, device='cuda:0')
episode: 485 training return: tensor(-1.6428e+09, device='cuda:0')
episode: 486 training return: tensor(-8.8442e+09, device='cuda:0')
episode: 487 training return: tensor(-4.5171e+09, device='cuda:0')
epoch: 122 test_true_pfm: -12.028982440068214
episode: 488 training return: tensor(-3.3041e+08, device='cuda:0')
episode: 489 training return: tensor(-1.4372e+10, device='cuda:0')
episode: 490 training return: tensor(-9.5417e+09, device='cuda:0')
episode: 491 training return: tensor(-1.1216e+10, device='cuda:0')
epoch: 123 test_true_pfm: -12.220598204807697
episode: 492 training return: tensor(-1.0198e+10, device='cuda:0')
episode: 493 training return: tensor(-1.6337e+10, device='cuda:0')
episode: 494 training return: tensor(-1.4641e+09, device='cuda:0')
episode: 495 training return: tensor(-9.9093e+08, device='cuda:0')
epoch: 124 test_true_pfm: -10.610808684432763
episode: 496 training return: tensor(-2.7412e+09, device='cuda:0')
episode: 497 training return: tensor(-4.6645e+08, device='cuda:0')
episode: 498 training return: tensor(-1.9901e+08, device='cuda:0')
episode: 499 training return: tensor(-9.3137e+08, device='cuda:0')
epoch: 125 test_true_pfm: -11.857293644565226
episode: 500 training return: tensor(-2.5431e+09, device='cuda:0')
episode: 501 training return: tensor(-8.8898e+08, device='cuda:0')
episode: 502 training return: tensor(-1.5188e+09, device='cuda:0')
episode: 503 training return: tensor(-3.8490e+09, device='cuda:0')
epoch: 126 test_true_pfm: -10.93148374454364
episode: 504 training return: tensor(-1.2596e+09, device='cuda:0')
episode: 505 training return: tensor(-2.7665e+09, device='cuda:0')
episode: 506 training return: tensor(-1.2714e+10, device='cuda:0')
episode: 507 training return: tensor(-4.8572e+09, device='cuda:0')
epoch: 127 test_true_pfm: -12.033586614817192
episode: 508 training return: tensor(-3.9918e+08, device='cuda:0')
episode: 509 training return: tensor(-7.9132e+09, device='cuda:0')
episode: 510 training return: tensor(-3.8837e+08, device='cuda:0')
episode: 511 training return: tensor(-1.4523e+09, device='cuda:0')
epoch: 128 test_true_pfm: -12.2840538320338
episode: 512 training return: tensor(-9.1783e+08, device='cuda:0')
episode: 513 training return: tensor(-6.2391e+08, device='cuda:0')
episode: 514 training return: tensor(-6.7681e+08, device='cuda:0')
episode: 515 training return: tensor(-2.6003e+12, device='cuda:0')
epoch: 129 test_true_pfm: -12.596828142985018
episode: 516 training return: tensor(-1.9020e+12, device='cuda:0')
episode: 517 training return: tensor(-1.3073e+09, device='cuda:0')
episode: 518 training return: tensor(-8.7262e+08, device='cuda:0')
episode: 519 training return: tensor(-1.3294e+10, device='cuda:0')
epoch: 130 test_true_pfm: -12.56370824628716
episode: 520 training return: tensor(-9.7621e+08, device='cuda:0')
episode: 521 training return: tensor(-4.2348e+09, device='cuda:0')
episode: 522 training return: tensor(-7.0726e+10, device='cuda:0')
episode: 523 training return: tensor(-1.9128e+09, device='cuda:0')
epoch: 131 test_true_pfm: -12.070161631662192
episode: 524 training return: tensor(-1.9673e+09, device='cuda:0')
episode: 525 training return: tensor(-6.5116e+08, device='cuda:0')
episode: 526 training return: tensor(-1.3259e+10, device='cuda:0')
episode: 527 training return: tensor(-1.0594e+10, device='cuda:0')
epoch: 132 test_true_pfm: -11.767522396231259
episode: 528 training return: tensor(-1.1616e+11, device='cuda:0')
episode: 529 training return: tensor(-1.4582e+09, device='cuda:0')
episode: 530 training return: tensor(-6.7514e+11, device='cuda:0')
episode: 531 training return: tensor(-6.3512e+10, device='cuda:0')
epoch: 133 test_true_pfm: -12.034734345920983
episode: 532 training return: tensor(-1.4992e+09, device='cuda:0')
episode: 533 training return: tensor(-7.2535e+09, device='cuda:0')
episode: 534 training return: tensor(-2.7947e+09, device='cuda:0')
episode: 535 training return: tensor(-3.5898e+09, device='cuda:0')
epoch: 134 test_true_pfm: -11.682415951025018
episode: 536 training return: tensor(-1.0181e+09, device='cuda:0')
episode: 537 training return: tensor(-1.5497e+09, device='cuda:0')
episode: 538 training return: tensor(-4.2105e+08, device='cuda:0')
episode: 539 training return: tensor(-7.6789e+08, device='cuda:0')
epoch: 135 test_true_pfm: -12.161133005658048
episode: 540 training return: tensor(-2.6412e+08, device='cuda:0')
episode: 541 training return: tensor(-7.0123e+10, device='cuda:0')
episode: 542 training return: tensor(-1.9935e+09, device='cuda:0')
episode: 543 training return: tensor(-1.7974e+09, device='cuda:0')
epoch: 136 test_true_pfm: -12.215333807460718
episode: 544 training return: tensor(-1.7918e+09, device='cuda:0')
episode: 545 training return: tensor(-2.6742e+09, device='cuda:0')
episode: 546 training return: tensor(-6.2019e+08, device='cuda:0')
episode: 547 training return: tensor(-3.4278e+09, device='cuda:0')
epoch: 137 test_true_pfm: -11.001337491836086
episode: 548 training return: tensor(-7.6713e+08, device='cuda:0')
episode: 549 training return: tensor(-1.0188e+11, device='cuda:0')
episode: 550 training return: tensor(-4.2958e+11, device='cuda:0')
episode: 551 training return: tensor(-1.2951e+09, device='cuda:0')
epoch: 138 test_true_pfm: -12.204880197245549
episode: 552 training return: tensor(-1.4615e+09, device='cuda:0')
episode: 553 training return: tensor(-4.6411e+09, device='cuda:0')
episode: 554 training return: tensor(-3.0788e+09, device='cuda:0')
episode: 555 training return: tensor(-7.6486e+08, device='cuda:0')
epoch: 139 test_true_pfm: -12.048626920587154
episode: 556 training return: tensor(-8.1510e+09, device='cuda:0')
episode: 557 training return: tensor(-5.3955e+09, device='cuda:0')
episode: 558 training return: tensor(-6.0784e+09, device='cuda:0')
episode: 559 training return: tensor(-1.7504e+10, device='cuda:0')
epoch: 140 test_true_pfm: -11.975779256537198
episode: 560 training return: tensor(-1.7635e+10, device='cuda:0')
episode: 561 training return: tensor(-1.5492e+11, device='cuda:0')
episode: 562 training return: tensor(-5.5892e+08, device='cuda:0')
episode: 563 training return: tensor(-1.7879e+10, device='cuda:0')
epoch: 141 test_true_pfm: -11.761526343837003
episode: 564 training return: tensor(-5.9082e+09, device='cuda:0')
episode: 565 training return: tensor(-9.3877e+08, device='cuda:0')
episode: 566 training return: tensor(-3.8888e+10, device='cuda:0')
episode: 567 training return: tensor(-2.0936e+09, device='cuda:0')
epoch: 142 test_true_pfm: -11.906766214831736
episode: 568 training return: tensor(-5.8087e+08, device='cuda:0')
episode: 569 training return: tensor(-9.7575e+09, device='cuda:0')
episode: 570 training return: tensor(-2.4034e+10, device='cuda:0')
episode: 571 training return: tensor(-7.9617e+08, device='cuda:0')
epoch: 143 test_true_pfm: -11.387190813060187
episode: 572 training return: tensor(-2.2299e+09, device='cuda:0')
episode: 573 training return: tensor(-9.0554e+08, device='cuda:0')
episode: 574 training return: tensor(-9.4186e+08, device='cuda:0')
episode: 575 training return: tensor(-1.4426e+09, device='cuda:0')
epoch: 144 test_true_pfm: -12.201185498855224
episode: 576 training return: tensor(-1.9055e+09, device='cuda:0')
episode: 577 training return: tensor(-3.8327e+08, device='cuda:0')
episode: 578 training return: tensor(-9.4077e+08, device='cuda:0')
episode: 579 training return: tensor(-2.0098e+09, device='cuda:0')
epoch: 145 test_true_pfm: -11.22522067654226
episode: 580 training return: tensor(-2.0637e+09, device='cuda:0')
episode: 581 training return: tensor(-1.2912e+10, device='cuda:0')
episode: 582 training return: tensor(-2.0347e+10, device='cuda:0')
episode: 583 training return: tensor(-1.3492e+09, device='cuda:0')
epoch: 146 test_true_pfm: -11.240879102759473
episode: 584 training return: tensor(-5.3617e+10, device='cuda:0')
episode: 585 training return: tensor(-2.8932e+10, device='cuda:0')
episode: 586 training return: tensor(-1.0622e+09, device='cuda:0')
episode: 587 training return: tensor(-9.9511e+08, device='cuda:0')
epoch: 147 test_true_pfm: -11.539725579812362
episode: 588 training return: tensor(-3.1887e+08, device='cuda:0')
episode: 589 training return: tensor(-6.8280e+09, device='cuda:0')
episode: 590 training return: tensor(-1.2826e+09, device='cuda:0')
episode: 591 training return: tensor(-2.0327e+09, device='cuda:0')
epoch: 148 test_true_pfm: -12.249429240201142
episode: 592 training return: tensor(-8.5104e+08, device='cuda:0')
episode: 593 training return: tensor(-6.9745e+09, device='cuda:0')
episode: 594 training return: tensor(-3.2978e+09, device='cuda:0')
episode: 595 training return: tensor(-1.4778e+10, device='cuda:0')
epoch: 149 test_true_pfm: -11.836596275451075
episode: 596 training return: tensor(-9.4856e+11, device='cuda:0')
episode: 597 training return: tensor(-3.6032e+11, device='cuda:0')
episode: 598 training return: tensor(-6.5738e+12, device='cuda:0')
episode: 599 training return: tensor(-4.2307e+10, device='cuda:0')
epoch: 150 test_true_pfm: -12.142357052008125
