['--alg', 'ddpg', '--env', 'Swimmer-v3', '--learn', 'brac', '--traj', 'expert', '--seed', '3', '--data', '100000']
epoch: 0 training_loss 2.269294042214751 test_loss: 0.5984089374542236
epoch: 1 training_loss -0.09499626049771905 test_loss: -0.4865473747253418
epoch: 2 training_loss -0.7876862119883299 test_loss: -1.0164037704467774
epoch: 3 training_loss -1.3528045696020126 test_loss: -1.5892261505126952
epoch: 4 training_loss -1.7071680688858033 test_loss: -1.90819091796875
epoch: 5 training_loss -2.009264885187149 test_loss: -2.1825546264648437
epoch: 6 training_loss -2.2564018249511717 test_loss: -2.366186332702637
epoch: 7 training_loss -2.4792253959178923 test_loss: -2.5841768264770506
epoch: 8 training_loss -2.658699288368225 test_loss: -2.803970527648926
epoch: 9 training_loss -2.8870653343200683 test_loss: -2.930245780944824
epoch: 10 training_loss -3.0107500314712525 test_loss: -2.9140256881713866
epoch: 11 training_loss -3.0089180159568785 test_loss: -3.1394279479980467
epoch: 12 training_loss -3.174776566028595 test_loss: -3.0898876190185547
epoch: 13 training_loss -3.275113742351532 test_loss: -3.307847595214844
epoch: 14 training_loss -3.2781636548042297 test_loss: -3.494015121459961
epoch: 15 training_loss -3.3946252942085264 test_loss: -3.504975128173828
epoch: 16 training_loss -3.4799166560173034 test_loss: -3.4847957611083986
epoch: 17 training_loss -3.580674338340759 test_loss: -3.6249786376953126
epoch: 18 training_loss -3.7006427025794983 test_loss: -3.6781757354736326
epoch: 19 training_loss -3.7247696685791016 test_loss: -3.8891460418701174
epoch: 20 training_loss -3.8004485416412352 test_loss: -3.874150848388672
epoch: 21 training_loss -3.759096305370331 test_loss: -3.8841697692871096
epoch: 22 training_loss -3.8523442053794863 test_loss: -3.8225780487060548
epoch: 23 training_loss -3.9821696949005125 test_loss: -4.060499954223633
epoch: 24 training_loss -3.968476674556732 test_loss: -3.9687831878662108
epoch: 25 training_loss -4.00586677312851 test_loss: -4.097761917114258
epoch: 26 training_loss -4.096680567264557 test_loss: -4.052883911132812
epoch: 27 training_loss -4.11689218044281 test_loss: -4.195649719238281
epoch: 28 training_loss -4.160468804836273 test_loss: -4.285216522216797
epoch: 29 training_loss -4.188438494205474 test_loss: -4.291258239746094
epoch: 30 training_loss -4.207627620697021 test_loss: -4.333660125732422
epoch: 31 training_loss -4.288578794002533 test_loss: -4.394765090942383
epoch: 32 training_loss -4.301317028999328 test_loss: -4.363061141967774
epoch: 33 training_loss -4.293572332859039 test_loss: -4.421195983886719
epoch: 34 training_loss -4.330220775604248 test_loss: -4.358365631103515
epoch: 35 training_loss -4.426510047912598 test_loss: -4.4993431091308596
epoch: 36 training_loss -4.400017395019531 test_loss: -4.2392230987548825
epoch: 37 training_loss -4.386404650211334 test_loss: -4.446030044555664
epoch: 38 training_loss -4.484780220985413 test_loss: -4.601399230957031
epoch: 39 training_loss -4.435761089324951 test_loss: -4.426192855834961
epoch: 40 training_loss -4.525398564338684 test_loss: -4.470479965209961
epoch: 41 training_loss -4.545965795516968 test_loss: -4.571251296997071
epoch: 42 training_loss -4.547507467269898 test_loss: -4.618734359741211
epoch: 43 training_loss -4.551938991546631 test_loss: -4.594513320922852
epoch: 44 training_loss -4.575894725322724 test_loss: -4.594244766235351
epoch: 45 training_loss -4.571307864189148 test_loss: -4.768653869628906
epoch: 46 training_loss -4.615526757240295 test_loss: -4.64456787109375
epoch: 47 training_loss -4.662844634056091 test_loss: -4.695700073242188
epoch: 48 training_loss -4.702816243171692 test_loss: -4.702410507202148
epoch: 49 training_loss -4.6506726360321045 test_loss: -4.712449264526367
epoch: 50 training_loss -4.708033595085144 test_loss: -4.708554077148437
epoch: 51 training_loss -4.733757410049439 test_loss: -4.8082019805908205
epoch: 52 training_loss -4.735669984817505 test_loss: -4.809603118896485
epoch: 53 training_loss -4.721283073425293 test_loss: -4.698513031005859
epoch: 54 training_loss -4.7245638465881346 test_loss: -4.901773452758789
epoch: 55 training_loss -4.749604501724243 test_loss: -4.827667236328125
epoch: 56 training_loss -4.7691419982910155 test_loss: -4.844749069213867
epoch: 57 training_loss -4.721105837821961 test_loss: -4.819518661499023
epoch: 58 training_loss -4.819227876663208 test_loss: -4.7707164764404295
epoch: 59 training_loss -4.882709493637085 test_loss: -4.823703002929688
epoch: 60 training_loss -4.87641948223114 test_loss: -4.809517288208008
epoch: 61 training_loss -4.855895161628723 test_loss: -4.869447326660156
epoch: 62 training_loss -4.859363265037537 test_loss: -4.9895679473876955
epoch: 63 training_loss -4.873781790733338 test_loss: -5.031044006347656
epoch: 64 training_loss -4.912673034667969 test_loss: -4.757882308959961
epoch: 65 training_loss -4.916952209472656 test_loss: -4.983924865722656
epoch: 66 training_loss -4.935056972503662 test_loss: -4.897462463378906
epoch: 67 training_loss -4.925941791534424 test_loss: -5.07942008972168
epoch: 68 training_loss -4.979762954711914 test_loss: -5.00804443359375
epoch: 69 training_loss -4.958436102867126 test_loss: -4.944496154785156
epoch: 70 training_loss -4.950365495681763 test_loss: -4.943769073486328
epoch: 71 training_loss -4.965814738273621 test_loss: -5.051544952392578
epoch: 72 training_loss -4.959073572158814 test_loss: -5.003421401977539
epoch: 73 training_loss -4.978245568275452 test_loss: -5.034453582763672
epoch: 74 training_loss -5.044402718544006 test_loss: -5.121751403808593
epoch: 75 training_loss -4.93179536819458 test_loss: -5.140703964233398
epoch: 76 training_loss -5.045173416137695 test_loss: -4.913699340820313
epoch: 77 training_loss -5.02886085987091 test_loss: -4.922270584106445
epoch: 78 training_loss -5.056030483245849 test_loss: -4.956130599975586
epoch: 79 training_loss -5.069993691444397 test_loss: -5.144865417480469
epoch: 80 training_loss -5.067107276916504 test_loss: -5.047650527954102
epoch: 81 training_loss -5.115742907524109 test_loss: -5.254555511474609
epoch: 82 training_loss -5.1346354579925535 test_loss: -5.1316078186035154
epoch: 83 training_loss -5.124019536972046 test_loss: -5.160617828369141
epoch: 84 training_loss -5.1335344409942625 test_loss: -5.226562118530273
epoch: 85 training_loss -5.128681364059449 test_loss: -5.162674331665039
epoch: 86 training_loss -5.1472166681289675 test_loss: -5.088971328735352
epoch: 87 training_loss -5.11652407169342 test_loss: -5.192160415649414
epoch: 88 training_loss -5.146730985641479 test_loss: -5.071385192871094
epoch: 89 training_loss -5.1589292764663695 test_loss: -5.260993194580078
epoch: 90 training_loss -5.160545353889465 test_loss: -5.234572219848633
epoch: 91 training_loss -5.203375391960144 test_loss: -5.237397384643555
epoch: 92 training_loss -5.238347578048706 test_loss: -5.1689300537109375
epoch: 93 training_loss -5.2017276048660275 test_loss: -5.209857940673828
epoch: 94 training_loss -5.176384553909302 test_loss: -5.302918243408203
epoch: 95 training_loss -5.239394173622132 test_loss: -5.357765579223633
epoch: 96 training_loss -5.169736208915711 test_loss: -4.971767425537109
epoch: 97 training_loss -5.250885853767395 test_loss: -5.2843677520751955
epoch: 98 training_loss -5.254880013465882 test_loss: -5.351764297485351
epoch: 99 training_loss -5.263697724342347 test_loss: -5.325476837158203
epoch: 100 training_loss -5.229796571731567 test_loss: -5.148656845092773
epoch: 101 training_loss -5.2912095928192135 test_loss: -5.2893016815185545
epoch: 102 training_loss -5.262992358207702 test_loss: -5.279748916625977
epoch: 103 training_loss -5.309933829307556 test_loss: -5.370227432250976
epoch: 104 training_loss -5.277821831703186 test_loss: -5.28618278503418
epoch: 105 training_loss -5.274760217666626 test_loss: -5.1527446746826175
epoch: 106 training_loss -5.316695938110351 test_loss: -5.296987152099609
epoch: 107 training_loss -5.297964057922363 test_loss: -5.361886978149414
epoch: 108 training_loss -5.322101135253906 test_loss: -5.388244247436523
epoch: 109 training_loss -5.352370219230652 test_loss: -5.439430618286133
epoch: 110 training_loss -5.361078000068664 test_loss: -5.4136096954345705
epoch: 111 training_loss -5.294357805252075 test_loss: -5.342620849609375
epoch: 112 training_loss -5.344165649414062 test_loss: -5.40632209777832
epoch: 113 training_loss -5.38891562461853 test_loss: -5.25659294128418
epoch: 114 training_loss -5.334501085281372 test_loss: -5.370622634887695
epoch: 115 training_loss -5.391509962081909 test_loss: -5.484004974365234
epoch: 116 training_loss -5.398832831382752 test_loss: -5.457547760009765
epoch: 117 training_loss -5.399693284034729 test_loss: -5.497285842895508
epoch: 118 training_loss -5.406541666984558 test_loss: -5.458065414428711
epoch: 119 training_loss -5.394284791946411 test_loss: -5.590135192871093
epoch: 120 training_loss -5.443540601730347 test_loss: -5.490210342407226
epoch: 121 training_loss -5.3960086584091185 test_loss: -5.397204208374023
epoch: 122 training_loss -5.407353301048278 test_loss: -5.347098159790039
epoch: 123 training_loss -5.370133986473084 test_loss: -5.506024932861328
epoch: 124 training_loss -5.444143085479737 test_loss: -5.369459533691407
epoch: 125 training_loss -5.474563517570496 test_loss: -5.526559066772461
epoch: 126 training_loss -5.488419799804688 test_loss: -5.5033222198486325
epoch: 127 training_loss -5.460417284965515 test_loss: -5.497146987915039
epoch: 128 training_loss -5.481219806671143 test_loss: -5.559311294555664
epoch: 129 training_loss -5.451303968429565 test_loss: -5.545150375366211
epoch: 130 training_loss -5.46137357711792 test_loss: -5.520491027832032
epoch: 131 training_loss -5.480079689025879 test_loss: -5.4879508972167965
epoch: 132 training_loss -5.494739627838134 test_loss: -5.6049846649169925
epoch: 133 training_loss -5.418048338890076 test_loss: -5.5472663879394535
epoch: 134 training_loss -5.504153246879578 test_loss: -5.396636199951172
epoch: 135 training_loss -5.525374140739441 test_loss: -5.478980636596679
epoch: 136 training_loss -5.497907495498657 test_loss: -5.486888885498047
epoch: 137 training_loss -5.518455982208252 test_loss: -5.5270130157470705
epoch: 138 training_loss -5.541415915489197 test_loss: -5.546654891967774
epoch: 139 training_loss -5.5115106439590456 test_loss: -5.505519104003906
epoch: 140 training_loss -5.518354325294495 test_loss: -5.562688827514648
epoch: 141 training_loss -5.474250802993774 test_loss: -5.466744995117187
epoch: 142 training_loss -5.5399561834335325 test_loss: -5.541916275024414
epoch: 143 training_loss -5.5174974346160885 test_loss: -5.5066581726074215
epoch: 144 training_loss -5.613106789588929 test_loss: -5.475918960571289
epoch: 145 training_loss -5.542883439064026 test_loss: -5.604419708251953
epoch: 146 training_loss -5.546081147193909 test_loss: -5.531084060668945
epoch: 147 training_loss -5.582699956893921 test_loss: -5.599158096313476
epoch: 148 training_loss -5.57848475933075 test_loss: -5.615852737426758
epoch: 149 training_loss -5.594350090026856 test_loss: -5.477170944213867
131.5566494971491
episode: 0 training return: tensor(-1.1568e+13, device='cuda:0')
episode: 1 training return: tensor(-2.1013e+10, device='cuda:0')
episode: 2 training return: tensor(-2.7045e+11, device='cuda:0')
episode: 3 training return: tensor(-5.2334e+13, device='cuda:0')
epoch: 1 test_true_pfm: 19.30101233930828
episode: 4 training return: tensor(-3.7553e+12, device='cuda:0')
episode: 5 training return: tensor(-6.6517e+12, device='cuda:0')
episode: 6 training return: tensor(-6.4593e+11, device='cuda:0')
episode: 7 training return: tensor(-7.6761e+10, device='cuda:0')
epoch: 2 test_true_pfm: 17.78831937869541
episode: 8 training return: tensor(-4.0515e+12, device='cuda:0')
episode: 9 training return: tensor(-1.4749e+12, device='cuda:0')
episode: 10 training return: tensor(-4.8050e+09, device='cuda:0')
episode: 11 training return: tensor(-3.3161e+08, device='cuda:0')
epoch: 3 test_true_pfm: 23.118135851118122
episode: 12 training return: tensor(-8.2993e+09, device='cuda:0')
episode: 13 training return: tensor(-1.8991e+09, device='cuda:0')
episode: 14 training return: tensor(-7.7449e+10, device='cuda:0')
episode: 15 training return: tensor(-7.8578e+10, device='cuda:0')
epoch: 4 test_true_pfm: 26.534580019368214
episode: 16 training return: tensor(-6.4216e+11, device='cuda:0')
episode: 17 training return: tensor(-2.9766e+16, device='cuda:0')
episode: 18 training return: tensor(-3.7136e+09, device='cuda:0')
episode: 19 training return: tensor(-1.3997e+09, device='cuda:0')
epoch: 5 test_true_pfm: 10.047918087830508
episode: 20 training return: tensor(-13340878., device='cuda:0')
episode: 21 training return: tensor(-13308389., device='cuda:0')
episode: 22 training return: tensor(-13425986., device='cuda:0')
episode: 23 training return: tensor(-12897436., device='cuda:0')
epoch: 6 test_true_pfm: 19.918259565384023
episode: 24 training return: tensor(-10404993., device='cuda:0')
episode: 25 training return: tensor(-11402467., device='cuda:0')
episode: 26 training return: tensor(-10054611., device='cuda:0')
episode: 27 training return: tensor(-13141689., device='cuda:0')
epoch: 7 test_true_pfm: 10.406231529556706
episode: 28 training return: tensor(-13041454., device='cuda:0')
episode: 29 training return: tensor(-12988498., device='cuda:0')
episode: 30 training return: tensor(-13046368., device='cuda:0')
episode: 31 training return: tensor(-13379810., device='cuda:0')
epoch: 8 test_true_pfm: 7.243530693928986
episode: 32 training return: tensor(-12975694., device='cuda:0')
episode: 33 training return: tensor(-12899626., device='cuda:0')
episode: 34 training return: tensor(-13108692., device='cuda:0')
episode: 35 training return: tensor(-11057885., device='cuda:0')
epoch: 9 test_true_pfm: 13.851323819496539
episode: 36 training return: tensor(-13428767., device='cuda:0')
episode: 37 training return: tensor(-13445259., device='cuda:0')
episode: 38 training return: tensor(-13290845., device='cuda:0')
episode: 39 training return: tensor(-12691561., device='cuda:0')
epoch: 10 test_true_pfm: 15.08173338940448
episode: 40 training return: tensor(-13657626., device='cuda:0')
episode: 41 training return: tensor(-13387845., device='cuda:0')
episode: 42 training return: tensor(-13150263., device='cuda:0')
episode: 43 training return: tensor(-11860414., device='cuda:0')
epoch: 11 test_true_pfm: 14.193355083711015
episode: 44 training return: tensor(-13244824., device='cuda:0')
episode: 45 training return: tensor(-13087763., device='cuda:0')
episode: 46 training return: tensor(-12919738., device='cuda:0')
episode: 47 training return: tensor(-13449833., device='cuda:0')
epoch: 12 test_true_pfm: 11.377880825421077
episode: 48 training return: tensor(-13146945., device='cuda:0')
episode: 49 training return: tensor(-13532985., device='cuda:0')
episode: 50 training return: tensor(-9342773., device='cuda:0')
episode: 51 training return: tensor(-13282995., device='cuda:0')
epoch: 13 test_true_pfm: 10.406358015691342
episode: 52 training return: tensor(-12454280., device='cuda:0')
episode: 53 training return: tensor(-9310048., device='cuda:0')
episode: 54 training return: tensor(-13380714., device='cuda:0')
episode: 55 training return: tensor(-13316862., device='cuda:0')
epoch: 14 test_true_pfm: 8.357640744530713
episode: 56 training return: tensor(-12339919., device='cuda:0')
episode: 57 training return: tensor(-13568544., device='cuda:0')
episode: 58 training return: tensor(-12098135., device='cuda:0')
episode: 59 training return: tensor(-13214860., device='cuda:0')
epoch: 15 test_true_pfm: 12.836308132114757
episode: 60 training return: tensor(-12607728., device='cuda:0')
episode: 61 training return: tensor(-13150310., device='cuda:0')
episode: 62 training return: tensor(-13349726., device='cuda:0')
episode: 63 training return: tensor(-10324884., device='cuda:0')
epoch: 16 test_true_pfm: 14.251124039351714
episode: 64 training return: tensor(-13229244., device='cuda:0')
episode: 65 training return: tensor(-10132616., device='cuda:0')
episode: 66 training return: tensor(-13324723., device='cuda:0')
episode: 67 training return: tensor(-12191089., device='cuda:0')
epoch: 17 test_true_pfm: 11.853946395299712
episode: 68 training return: tensor(-13466811., device='cuda:0')
episode: 69 training return: tensor(-11725877., device='cuda:0')
episode: 70 training return: tensor(-12235550., device='cuda:0')
episode: 71 training return: tensor(-13473815., device='cuda:0')
epoch: 18 test_true_pfm: 15.071135224659361
episode: 72 training return: tensor(-10109662., device='cuda:0')
episode: 73 training return: tensor(-13617332., device='cuda:0')
episode: 74 training return: tensor(-12978237., device='cuda:0')
episode: 75 training return: tensor(-13269272., device='cuda:0')
epoch: 19 test_true_pfm: 12.403930079949738
episode: 76 training return: tensor(-13052028., device='cuda:0')
episode: 77 training return: tensor(-13474280., device='cuda:0')
episode: 78 training return: tensor(-13035732., device='cuda:0')
episode: 79 training return: tensor(-13277780., device='cuda:0')
epoch: 20 test_true_pfm: 15.199324295372374
episode: 80 training return: tensor(-13216652., device='cuda:0')
episode: 81 training return: tensor(-13511848., device='cuda:0')
episode: 82 training return: tensor(-12416907., device='cuda:0')
episode: 83 training return: tensor(-13362794., device='cuda:0')
epoch: 21 test_true_pfm: 14.18553751519163
episode: 84 training return: tensor(-13311855., device='cuda:0')
episode: 85 training return: tensor(-13277957., device='cuda:0')
episode: 86 training return: tensor(-13027881., device='cuda:0')
episode: 87 training return: tensor(-12297988., device='cuda:0')
epoch: 22 test_true_pfm: 5.684749095677423
episode: 88 training return: tensor(-11987929., device='cuda:0')
episode: 89 training return: tensor(-13064963., device='cuda:0')
episode: 90 training return: tensor(-10119942., device='cuda:0')
episode: 91 training return: tensor(-13708725., device='cuda:0')
epoch: 23 test_true_pfm: 11.34378571953032
episode: 92 training return: tensor(-11739399., device='cuda:0')
episode: 93 training return: tensor(-11300712., device='cuda:0')
episode: 94 training return: tensor(-13092186., device='cuda:0')
episode: 95 training return: tensor(-11137776., device='cuda:0')
epoch: 24 test_true_pfm: 14.823322370323876
episode: 96 training return: tensor(-13489219., device='cuda:0')
episode: 97 training return: tensor(-13038483., device='cuda:0')
episode: 98 training return: tensor(-13645899., device='cuda:0')
episode: 99 training return: tensor(-13216505., device='cuda:0')
epoch: 25 test_true_pfm: 15.178027976423781
episode: 100 training return: tensor(-13287553., device='cuda:0')
episode: 101 training return: tensor(-12909053., device='cuda:0')
episode: 102 training return: tensor(-10419250., device='cuda:0')
episode: 103 training return: tensor(-13393740., device='cuda:0')
epoch: 26 test_true_pfm: 15.24583366518874
episode: 104 training return: tensor(-13101095., device='cuda:0')
episode: 105 training return: tensor(-13103694., device='cuda:0')
episode: 106 training return: tensor(-10190914., device='cuda:0')
episode: 107 training return: tensor(-12942607., device='cuda:0')
epoch: 27 test_true_pfm: 12.283704714138858
episode: 108 training return: tensor(-13451845., device='cuda:0')
episode: 109 training return: tensor(-13156450., device='cuda:0')
episode: 110 training return: tensor(-13091378., device='cuda:0')
episode: 111 training return: tensor(-13097111., device='cuda:0')
epoch: 28 test_true_pfm: 20.233217695675584
episode: 112 training return: tensor(-13501832., device='cuda:0')
episode: 113 training return: tensor(-12433915., device='cuda:0')
episode: 114 training return: tensor(-12923345., device='cuda:0')
episode: 115 training return: tensor(-13462782., device='cuda:0')
epoch: 29 test_true_pfm: 8.545947713261045
episode: 116 training return: tensor(-13288867., device='cuda:0')
episode: 117 training return: tensor(-10160346., device='cuda:0')
episode: 118 training return: tensor(-13429461., device='cuda:0')
episode: 119 training return: tensor(-13488823., device='cuda:0')
epoch: 30 test_true_pfm: 14.272159361825203
episode: 120 training return: tensor(-13082596., device='cuda:0')
episode: 121 training return: tensor(-12857165., device='cuda:0')
episode: 122 training return: tensor(-13164009., device='cuda:0')
episode: 123 training return: tensor(-12913783., device='cuda:0')
epoch: 31 test_true_pfm: 11.346879842382894
episode: 124 training return: tensor(-10044072., device='cuda:0')
episode: 125 training return: tensor(-13126405., device='cuda:0')
episode: 126 training return: tensor(-12777253., device='cuda:0')
episode: 127 training return: tensor(-9527978., device='cuda:0')
epoch: 32 test_true_pfm: 15.56573579417138
episode: 128 training return: tensor(-12960883., device='cuda:0')
episode: 129 training return: tensor(-13438465., device='cuda:0')
episode: 130 training return: tensor(-13200124., device='cuda:0')
episode: 131 training return: tensor(-13132317., device='cuda:0')
epoch: 33 test_true_pfm: 13.24363774522046
episode: 132 training return: tensor(-13440855., device='cuda:0')
episode: 133 training return: tensor(-13225819., device='cuda:0')
episode: 134 training return: tensor(-13246841., device='cuda:0')
episode: 135 training return: tensor(-13004147., device='cuda:0')
epoch: 34 test_true_pfm: 10.513966047057385
episode: 136 training return: tensor(-9393610., device='cuda:0')
episode: 137 training return: tensor(-13460691., device='cuda:0')
episode: 138 training return: tensor(-13374628., device='cuda:0')
episode: 139 training return: tensor(-12771830., device='cuda:0')
epoch: 35 test_true_pfm: 7.957981490260224
episode: 140 training return: tensor(-12084429., device='cuda:0')
episode: 141 training return: tensor(-9844636., device='cuda:0')
episode: 142 training return: tensor(-13033369., device='cuda:0')
episode: 143 training return: tensor(-10729808., device='cuda:0')
epoch: 36 test_true_pfm: 22.310386716174712
episode: 144 training return: tensor(-12939204., device='cuda:0')
episode: 145 training return: tensor(-13260262., device='cuda:0')
episode: 146 training return: tensor(-10469193., device='cuda:0')
episode: 147 training return: tensor(-11928242., device='cuda:0')
epoch: 37 test_true_pfm: 9.268472797434516
episode: 148 training return: tensor(-11524291., device='cuda:0')
episode: 149 training return: tensor(-12539669., device='cuda:0')
episode: 150 training return: tensor(-11040199., device='cuda:0')
episode: 151 training return: tensor(-13155603., device='cuda:0')
epoch: 38 test_true_pfm: 11.454824199008266
episode: 152 training return: tensor(-11799803., device='cuda:0')
episode: 153 training return: tensor(-13105042., device='cuda:0')
episode: 154 training return: tensor(-11966308., device='cuda:0')
episode: 155 training return: tensor(-11609677., device='cuda:0')
epoch: 39 test_true_pfm: 9.18135727494327
episode: 156 training return: tensor(-13321158., device='cuda:0')
episode: 157 training return: tensor(-13183988., device='cuda:0')
episode: 158 training return: tensor(-13100292., device='cuda:0')
episode: 159 training return: tensor(-12411316., device='cuda:0')
epoch: 40 test_true_pfm: 13.370631005488397
episode: 160 training return: tensor(-13408443., device='cuda:0')
episode: 161 training return: tensor(-12999584., device='cuda:0')
episode: 162 training return: tensor(-13126287., device='cuda:0')
episode: 163 training return: tensor(-13384195., device='cuda:0')
epoch: 41 test_true_pfm: 19.427844519418834
episode: 164 training return: tensor(-11570252., device='cuda:0')
episode: 165 training return: tensor(-13251985., device='cuda:0')
episode: 166 training return: tensor(-13201677., device='cuda:0')
episode: 167 training return: tensor(-13154380., device='cuda:0')
epoch: 42 test_true_pfm: 15.759236435414612
episode: 168 training return: tensor(-12658545., device='cuda:0')
episode: 169 training return: tensor(-13233907., device='cuda:0')
episode: 170 training return: tensor(-13365664., device='cuda:0')
episode: 171 training return: tensor(-9836881., device='cuda:0')
epoch: 43 test_true_pfm: 8.78588249297688
episode: 172 training return: tensor(-13458421., device='cuda:0')
episode: 173 training return: tensor(-13442621., device='cuda:0')
episode: 174 training return: tensor(-13210818., device='cuda:0')
episode: 175 training return: tensor(-11613323., device='cuda:0')
epoch: 44 test_true_pfm: 12.747622569593592
episode: 176 training return: tensor(-13011391., device='cuda:0')
episode: 177 training return: tensor(-13356495., device='cuda:0')
episode: 178 training return: tensor(-11457185., device='cuda:0')
episode: 179 training return: tensor(-13006244., device='cuda:0')
epoch: 45 test_true_pfm: 11.060609549308044
episode: 180 training return: tensor(-13084290., device='cuda:0')
episode: 181 training return: tensor(-13459064., device='cuda:0')
episode: 182 training return: tensor(-13510672., device='cuda:0')
episode: 183 training return: tensor(-13322786., device='cuda:0')
epoch: 46 test_true_pfm: 19.745350709313747
episode: 184 training return: tensor(-13345223., device='cuda:0')
episode: 185 training return: tensor(-13461936., device='cuda:0')
episode: 186 training return: tensor(-13035923., device='cuda:0')
episode: 187 training return: tensor(-13072573., device='cuda:0')
epoch: 47 test_true_pfm: 16.486321407448113
episode: 188 training return: tensor(-13274207., device='cuda:0')
episode: 189 training return: tensor(-13165052., device='cuda:0')
episode: 190 training return: tensor(-13249203., device='cuda:0')
episode: 191 training return: tensor(-10177402., device='cuda:0')
epoch: 48 test_true_pfm: 16.65188070214905
episode: 192 training return: tensor(-11320106., device='cuda:0')
episode: 193 training return: tensor(-13026250., device='cuda:0')
episode: 194 training return: tensor(-12063677., device='cuda:0')
episode: 195 training return: tensor(-13132591., device='cuda:0')
epoch: 49 test_true_pfm: 6.409244170508089
episode: 196 training return: tensor(-12897079., device='cuda:0')
episode: 197 training return: tensor(-13153934., device='cuda:0')
episode: 198 training return: tensor(-13501512., device='cuda:0')
episode: 199 training return: tensor(-13289094., device='cuda:0')
epoch: 50 test_true_pfm: 16.340994955528238
episode: 200 training return: tensor(-10130880., device='cuda:0')
episode: 201 training return: tensor(-10758055., device='cuda:0')
episode: 202 training return: tensor(-12871816., device='cuda:0')
episode: 203 training return: tensor(-12609717., device='cuda:0')
epoch: 51 test_true_pfm: 16.65745787938611
episode: 204 training return: tensor(-13107356., device='cuda:0')
episode: 205 training return: tensor(-13432490., device='cuda:0')
episode: 206 training return: tensor(-13315300., device='cuda:0')
episode: 207 training return: tensor(-12520573., device='cuda:0')
epoch: 52 test_true_pfm: 18.43881534004002
episode: 208 training return: tensor(-13100195., device='cuda:0')
episode: 209 training return: tensor(-12923699., device='cuda:0')
episode: 210 training return: tensor(-13159307., device='cuda:0')
episode: 211 training return: tensor(-13588425., device='cuda:0')
epoch: 53 test_true_pfm: 15.844062124430923
episode: 212 training return: tensor(-13510774., device='cuda:0')
episode: 213 training return: tensor(-13351943., device='cuda:0')
episode: 214 training return: tensor(-13308986., device='cuda:0')
episode: 215 training return: tensor(-13288626., device='cuda:0')
epoch: 54 test_true_pfm: 12.854753257076936
episode: 216 training return: tensor(-13402367., device='cuda:0')
episode: 217 training return: tensor(-13396871., device='cuda:0')
episode: 218 training return: tensor(-13525550., device='cuda:0')
episode: 219 training return: tensor(-13213446., device='cuda:0')
epoch: 55 test_true_pfm: 16.891274485380464
episode: 220 training return: tensor(-11559866., device='cuda:0')
episode: 221 training return: tensor(-13412531., device='cuda:0')
episode: 222 training return: tensor(-12443783., device='cuda:0')
episode: 223 training return: tensor(-11138149., device='cuda:0')
epoch: 56 test_true_pfm: 19.867921739725524
episode: 224 training return: tensor(-12872162., device='cuda:0')
episode: 225 training return: tensor(-11657155., device='cuda:0')
episode: 226 training return: tensor(-12263422., device='cuda:0')
episode: 227 training return: tensor(-11502562., device='cuda:0')
epoch: 57 test_true_pfm: 9.986489332654624
episode: 228 training return: tensor(-13084487., device='cuda:0')
episode: 229 training return: tensor(-13470799., device='cuda:0')
episode: 230 training return: tensor(-13622579., device='cuda:0')
episode: 231 training return: tensor(-13431787., device='cuda:0')
epoch: 58 test_true_pfm: 14.350055434664203
episode: 232 training return: tensor(-9355479., device='cuda:0')
episode: 233 training return: tensor(-13005524., device='cuda:0')
episode: 234 training return: tensor(-11685383., device='cuda:0')
episode: 235 training return: tensor(-13209374., device='cuda:0')
epoch: 59 test_true_pfm: 11.785800996231753
episode: 236 training return: tensor(-13705683., device='cuda:0')
episode: 237 training return: tensor(-13120878., device='cuda:0')
episode: 238 training return: tensor(-13003628., device='cuda:0')
episode: 239 training return: tensor(-13402338., device='cuda:0')
epoch: 60 test_true_pfm: 11.047708166695447
episode: 240 training return: tensor(-13203930., device='cuda:0')
episode: 241 training return: tensor(-13055814., device='cuda:0')
episode: 242 training return: tensor(-12947144., device='cuda:0')
episode: 243 training return: tensor(-13517172., device='cuda:0')
epoch: 61 test_true_pfm: 11.067041744570348
episode: 244 training return: tensor(-10119498., device='cuda:0')
episode: 245 training return: tensor(-12774998., device='cuda:0')
episode: 246 training return: tensor(-13200310., device='cuda:0')
episode: 247 training return: tensor(-13285203., device='cuda:0')
epoch: 62 test_true_pfm: 15.64548400880194
episode: 248 training return: tensor(-13441360., device='cuda:0')
episode: 249 training return: tensor(-9510141., device='cuda:0')
episode: 250 training return: tensor(-13145308., device='cuda:0')
episode: 251 training return: tensor(-13659009., device='cuda:0')
epoch: 63 test_true_pfm: 12.664217365793785
episode: 252 training return: tensor(-9698405., device='cuda:0')
episode: 253 training return: tensor(-8595481., device='cuda:0')
episode: 254 training return: tensor(-12998190., device='cuda:0')
episode: 255 training return: tensor(-11369462., device='cuda:0')
epoch: 64 test_true_pfm: 8.905403651404168
episode: 256 training return: tensor(-13100739., device='cuda:0')
episode: 257 training return: tensor(-13147679., device='cuda:0')
episode: 258 training return: tensor(-12991264., device='cuda:0')
episode: 259 training return: tensor(-13406279., device='cuda:0')
epoch: 65 test_true_pfm: 12.636453155576316
episode: 260 training return: tensor(-13383909., device='cuda:0')
episode: 261 training return: tensor(-12979910., device='cuda:0')
episode: 262 training return: tensor(-13010936., device='cuda:0')
episode: 263 training return: tensor(-12066971., device='cuda:0')
epoch: 66 test_true_pfm: 17.05016231082598
episode: 264 training return: tensor(-13233727., device='cuda:0')
episode: 265 training return: tensor(-12805291., device='cuda:0')
episode: 266 training return: tensor(-13063555., device='cuda:0')
episode: 267 training return: tensor(-13283871., device='cuda:0')
epoch: 67 test_true_pfm: 8.255313248389786
episode: 268 training return: tensor(-13288746., device='cuda:0')
episode: 269 training return: tensor(-13189988., device='cuda:0')
episode: 270 training return: tensor(-13515888., device='cuda:0')
episode: 271 training return: tensor(-13172474., device='cuda:0')
epoch: 68 test_true_pfm: 15.974918008167773
episode: 272 training return: tensor(-13250554., device='cuda:0')
episode: 273 training return: tensor(-13017339., device='cuda:0')
episode: 274 training return: tensor(-13434065., device='cuda:0')
episode: 275 training return: tensor(-13141876., device='cuda:0')
epoch: 69 test_true_pfm: 13.26245169322064
episode: 276 training return: tensor(-9339637., device='cuda:0')
episode: 277 training return: tensor(-13303208., device='cuda:0')
episode: 278 training return: tensor(-9532388., device='cuda:0')
episode: 279 training return: tensor(-13249350., device='cuda:0')
epoch: 70 test_true_pfm: 17.17308783872462
episode: 280 training return: tensor(-13321116., device='cuda:0')
episode: 281 training return: tensor(-10258006., device='cuda:0')
episode: 282 training return: tensor(-13162195., device='cuda:0')
episode: 283 training return: tensor(-13556901., device='cuda:0')
epoch: 71 test_true_pfm: 10.58507531905665
episode: 284 training return: tensor(-13239725., device='cuda:0')
episode: 285 training return: tensor(-12959718., device='cuda:0')
episode: 286 training return: tensor(-10527621., device='cuda:0')
episode: 287 training return: tensor(-13535822., device='cuda:0')
epoch: 72 test_true_pfm: 17.615662537617908
episode: 288 training return: tensor(-13433540., device='cuda:0')
episode: 289 training return: tensor(-13417168., device='cuda:0')
episode: 290 training return: tensor(-13595815., device='cuda:0')
episode: 291 training return: tensor(-12967893., device='cuda:0')
epoch: 73 test_true_pfm: 13.869621318186384
episode: 292 training return: tensor(-13526695., device='cuda:0')
episode: 293 training return: tensor(-13420585., device='cuda:0')
episode: 294 training return: tensor(-12915296., device='cuda:0')
episode: 295 training return: tensor(-13133982., device='cuda:0')
epoch: 74 test_true_pfm: 14.911018205763426
episode: 296 training return: tensor(-13343299., device='cuda:0')
episode: 297 training return: tensor(-10045853., device='cuda:0')
episode: 298 training return: tensor(-13231690., device='cuda:0')
episode: 299 training return: tensor(-13443103., device='cuda:0')
epoch: 75 test_true_pfm: 12.41782459975352
episode: 300 training return: tensor(-13066900., device='cuda:0')
episode: 301 training return: tensor(-13161670., device='cuda:0')
episode: 302 training return: tensor(-13518138., device='cuda:0')
episode: 303 training return: tensor(-13081869., device='cuda:0')
epoch: 76 test_true_pfm: 21.5838689489346
episode: 304 training return: tensor(-11761872., device='cuda:0')
episode: 305 training return: tensor(-13123491., device='cuda:0')
episode: 306 training return: tensor(-12915996., device='cuda:0')
episode: 307 training return: tensor(-13319610., device='cuda:0')
epoch: 77 test_true_pfm: 18.50803294116556
episode: 308 training return: tensor(-12961799., device='cuda:0')
episode: 309 training return: tensor(-9525417., device='cuda:0')
episode: 310 training return: tensor(-9143190., device='cuda:0')
episode: 311 training return: tensor(-13189323., device='cuda:0')
epoch: 78 test_true_pfm: 20.297478750687347
episode: 312 training return: tensor(-13347475., device='cuda:0')
episode: 313 training return: tensor(-12900156., device='cuda:0')
episode: 314 training return: tensor(-12332001., device='cuda:0')
episode: 315 training return: tensor(-13146398., device='cuda:0')
epoch: 79 test_true_pfm: 14.015704350259051
episode: 316 training return: tensor(-13035612., device='cuda:0')
episode: 317 training return: tensor(-13228099., device='cuda:0')
episode: 318 training return: tensor(-13519477., device='cuda:0')
episode: 319 training return: tensor(-13281104., device='cuda:0')
epoch: 80 test_true_pfm: 10.04726238534204
episode: 320 training return: tensor(-10724889., device='cuda:0')
episode: 321 training return: tensor(-13191374., device='cuda:0')
episode: 322 training return: tensor(-9651932., device='cuda:0')
episode: 323 training return: tensor(-13283232., device='cuda:0')
epoch: 81 test_true_pfm: 13.939475389624203
episode: 324 training return: tensor(-13337453., device='cuda:0')
episode: 325 training return: tensor(-13411955., device='cuda:0')
episode: 326 training return: tensor(-12673058., device='cuda:0')
episode: 327 training return: tensor(-13239399., device='cuda:0')
epoch: 82 test_true_pfm: 17.324728662312655
episode: 328 training return: tensor(-13289045., device='cuda:0')
episode: 329 training return: tensor(-12433321., device='cuda:0')
episode: 330 training return: tensor(-13254463., device='cuda:0')
episode: 331 training return: tensor(-12934286., device='cuda:0')
epoch: 83 test_true_pfm: 14.412676524038599
episode: 332 training return: tensor(-13489669., device='cuda:0')
episode: 333 training return: tensor(-10690640., device='cuda:0')
episode: 334 training return: tensor(-13401512., device='cuda:0')
episode: 335 training return: tensor(-13429327., device='cuda:0')
epoch: 84 test_true_pfm: 18.854305514557794
episode: 336 training return: tensor(-9779307., device='cuda:0')
episode: 337 training return: tensor(-13074500., device='cuda:0')
episode: 338 training return: tensor(-12069330., device='cuda:0')
episode: 339 training return: tensor(-12982412., device='cuda:0')
epoch: 85 test_true_pfm: 15.074870193188213
episode: 340 training return: tensor(-11222538., device='cuda:0')
episode: 341 training return: tensor(-12660196., device='cuda:0')
episode: 342 training return: tensor(-13243528., device='cuda:0')
episode: 343 training return: tensor(-13062918., device='cuda:0')
epoch: 86 test_true_pfm: 23.17579160602128
episode: 344 training return: tensor(-10480381., device='cuda:0')
episode: 345 training return: tensor(-13353925., device='cuda:0')
episode: 346 training return: tensor(-10026856., device='cuda:0')
episode: 347 training return: tensor(-12915336., device='cuda:0')
epoch: 87 test_true_pfm: 18.343427618719165
episode: 348 training return: tensor(-13359931., device='cuda:0')
episode: 349 training return: tensor(-12911888., device='cuda:0')
episode: 350 training return: tensor(-13135269., device='cuda:0')
episode: 351 training return: tensor(-13492203., device='cuda:0')
epoch: 88 test_true_pfm: 9.818471396366476
episode: 352 training return: tensor(-13647683., device='cuda:0')
episode: 353 training return: tensor(-12994078., device='cuda:0')
episode: 354 training return: tensor(-13557523., device='cuda:0')
episode: 355 training return: tensor(-13375865., device='cuda:0')
epoch: 89 test_true_pfm: 17.664548374309827
episode: 356 training return: tensor(-13068436., device='cuda:0')
episode: 357 training return: tensor(-11245979., device='cuda:0')
episode: 358 training return: tensor(-9278457., device='cuda:0')
episode: 359 training return: tensor(-12570953., device='cuda:0')
epoch: 90 test_true_pfm: 9.269907210473225
episode: 360 training return: tensor(-11640126., device='cuda:0')
episode: 361 training return: tensor(-12704850., device='cuda:0')
episode: 362 training return: tensor(-13282464., device='cuda:0')
episode: 363 training return: tensor(-13494444., device='cuda:0')
epoch: 91 test_true_pfm: 13.429051922373754
episode: 364 training return: tensor(-13303628., device='cuda:0')
episode: 365 training return: tensor(-10870073., device='cuda:0')
episode: 366 training return: tensor(-13180725., device='cuda:0')
episode: 367 training return: tensor(-13310452., device='cuda:0')
epoch: 92 test_true_pfm: 18.274230536013313
episode: 368 training return: tensor(-11188444., device='cuda:0')
episode: 369 training return: tensor(-13337403., device='cuda:0')
episode: 370 training return: tensor(-13117375., device='cuda:0')
episode: 371 training return: tensor(-11080667., device='cuda:0')
epoch: 93 test_true_pfm: 13.255202918791998
episode: 372 training return: tensor(-13332587., device='cuda:0')
episode: 373 training return: tensor(-13372292., device='cuda:0')
episode: 374 training return: tensor(-13236877., device='cuda:0')
episode: 375 training return: tensor(-13083749., device='cuda:0')
epoch: 94 test_true_pfm: 12.489614076154604
episode: 376 training return: tensor(-13413668., device='cuda:0')
episode: 377 training return: tensor(-12989057., device='cuda:0')
episode: 378 training return: tensor(-10423733., device='cuda:0')
episode: 379 training return: tensor(-12789287., device='cuda:0')
epoch: 95 test_true_pfm: 13.717837660838763
episode: 380 training return: tensor(-13198868., device='cuda:0')
episode: 381 training return: tensor(-13264431., device='cuda:0')
episode: 382 training return: tensor(-10371191., device='cuda:0')
episode: 383 training return: tensor(-13109084., device='cuda:0')
epoch: 96 test_true_pfm: 14.191890235014313
episode: 384 training return: tensor(-13308049., device='cuda:0')
episode: 385 training return: tensor(-12998849., device='cuda:0')
episode: 386 training return: tensor(-9604240., device='cuda:0')
episode: 387 training return: tensor(-13231785., device='cuda:0')
epoch: 97 test_true_pfm: 17.93096236785725
episode: 388 training return: tensor(-13033626., device='cuda:0')
episode: 389 training return: tensor(-13082054., device='cuda:0')
episode: 390 training return: tensor(-13239921., device='cuda:0')
episode: 391 training return: tensor(-13739554., device='cuda:0')
epoch: 98 test_true_pfm: 6.742232737085381
episode: 392 training return: tensor(-13515962., device='cuda:0')
episode: 393 training return: tensor(-13419537., device='cuda:0')
episode: 394 training return: tensor(-9863924., device='cuda:0')
episode: 395 training return: tensor(-13130776., device='cuda:0')
epoch: 99 test_true_pfm: 11.605804262952462
episode: 396 training return: tensor(-13568952., device='cuda:0')
episode: 397 training return: tensor(-13441294., device='cuda:0')
episode: 398 training return: tensor(-9013587., device='cuda:0')
episode: 399 training return: tensor(-13206457., device='cuda:0')
epoch: 100 test_true_pfm: 11.27463592460568
episode: 400 training return: tensor(-9370348., device='cuda:0')
episode: 401 training return: tensor(-12530375., device='cuda:0')
episode: 402 training return: tensor(-13059160., device='cuda:0')
episode: 403 training return: tensor(-13346494., device='cuda:0')
epoch: 101 test_true_pfm: 19.35903286884648
episode: 404 training return: tensor(-13361743., device='cuda:0')
episode: 405 training return: tensor(-13613070., device='cuda:0')
episode: 406 training return: tensor(-11616014., device='cuda:0')
episode: 407 training return: tensor(-13265518., device='cuda:0')
epoch: 102 test_true_pfm: 13.602820918084642
episode: 408 training return: tensor(-13162388., device='cuda:0')
episode: 409 training return: tensor(-11122612., device='cuda:0')
episode: 410 training return: tensor(-13262363., device='cuda:0')
episode: 411 training return: tensor(-10723504., device='cuda:0')
epoch: 103 test_true_pfm: 14.40862397895161
episode: 412 training return: tensor(-11453550., device='cuda:0')
episode: 413 training return: tensor(-13468256., device='cuda:0')
episode: 414 training return: tensor(-13301064., device='cuda:0')
episode: 415 training return: tensor(-9476137., device='cuda:0')
epoch: 104 test_true_pfm: 11.01663902506569
episode: 416 training return: tensor(-12800676., device='cuda:0')
episode: 417 training return: tensor(-12609934., device='cuda:0')
episode: 418 training return: tensor(-13212000., device='cuda:0')
episode: 419 training return: tensor(-12339935., device='cuda:0')
epoch: 105 test_true_pfm: 13.254906091348943
episode: 420 training return: tensor(-13073087., device='cuda:0')
episode: 421 training return: tensor(-13476012., device='cuda:0')
episode: 422 training return: tensor(-9053752., device='cuda:0')
episode: 423 training return: tensor(-9726039., device='cuda:0')
epoch: 106 test_true_pfm: 6.124973990967598
episode: 424 training return: tensor(-13034666., device='cuda:0')
episode: 425 training return: tensor(-13508965., device='cuda:0')
episode: 426 training return: tensor(-13210700., device='cuda:0')
episode: 427 training return: tensor(-12680495., device='cuda:0')
epoch: 107 test_true_pfm: 14.752995613457042
episode: 428 training return: tensor(-9911312., device='cuda:0')
episode: 429 training return: tensor(-13583222., device='cuda:0')
episode: 430 training return: tensor(-11500213., device='cuda:0')
episode: 431 training return: tensor(-13057808., device='cuda:0')
epoch: 108 test_true_pfm: 11.661613689587986
episode: 432 training return: tensor(-11123276., device='cuda:0')
episode: 433 training return: tensor(-13448743., device='cuda:0')
episode: 434 training return: tensor(-11021027., device='cuda:0')
episode: 435 training return: tensor(-13275540., device='cuda:0')
epoch: 109 test_true_pfm: 12.172412682457496
episode: 436 training return: tensor(-13378050., device='cuda:0')
episode: 437 training return: tensor(-13045212., device='cuda:0')
episode: 438 training return: tensor(-13175858., device='cuda:0')
episode: 439 training return: tensor(-12887898., device='cuda:0')
epoch: 110 test_true_pfm: 9.069168743877276
episode: 440 training return: tensor(-13015771., device='cuda:0')
episode: 441 training return: tensor(-10305990., device='cuda:0')
episode: 442 training return: tensor(-13054239., device='cuda:0')
episode: 443 training return: tensor(-13075803., device='cuda:0')
epoch: 111 test_true_pfm: 14.056358524777314
episode: 444 training return: tensor(-12315707., device='cuda:0')
episode: 445 training return: tensor(-13289766., device='cuda:0')
episode: 446 training return: tensor(-13304107., device='cuda:0')
episode: 447 training return: tensor(-12941375., device='cuda:0')
epoch: 112 test_true_pfm: 21.09751810999078
episode: 448 training return: tensor(-12745771., device='cuda:0')
episode: 449 training return: tensor(-13075570., device='cuda:0')
episode: 450 training return: tensor(-12928640., device='cuda:0')
episode: 451 training return: tensor(-13198399., device='cuda:0')
epoch: 113 test_true_pfm: 11.502448962100178
episode: 452 training return: tensor(-13725279., device='cuda:0')
episode: 453 training return: tensor(-13098093., device='cuda:0')
episode: 454 training return: tensor(-13019785., device='cuda:0')
episode: 455 training return: tensor(-13360808., device='cuda:0')
epoch: 114 test_true_pfm: 14.892189488207489
episode: 456 training return: tensor(-13286380., device='cuda:0')
episode: 457 training return: tensor(-11036998., device='cuda:0')
episode: 458 training return: tensor(-10587201., device='cuda:0')
episode: 459 training return: tensor(-13345024., device='cuda:0')
epoch: 115 test_true_pfm: 14.112825412450462
episode: 460 training return: tensor(-13259438., device='cuda:0')
episode: 461 training return: tensor(-12156572., device='cuda:0')
episode: 462 training return: tensor(-13268394., device='cuda:0')
episode: 463 training return: tensor(-9201566., device='cuda:0')
epoch: 116 test_true_pfm: 13.504656238293288
episode: 464 training return: tensor(-13396734., device='cuda:0')
episode: 465 training return: tensor(-13525344., device='cuda:0')
episode: 466 training return: tensor(-11129632., device='cuda:0')
episode: 467 training return: tensor(-12996924., device='cuda:0')
epoch: 117 test_true_pfm: 17.074614580764766
episode: 468 training return: tensor(-13099179., device='cuda:0')
episode: 469 training return: tensor(-13214758., device='cuda:0')
episode: 470 training return: tensor(-12994265., device='cuda:0')
episode: 471 training return: tensor(-13322386., device='cuda:0')
epoch: 118 test_true_pfm: 16.935271681214143
episode: 472 training return: tensor(-12136538., device='cuda:0')
episode: 473 training return: tensor(-12153817., device='cuda:0')
episode: 474 training return: tensor(-13181743., device='cuda:0')
episode: 475 training return: tensor(-13220020., device='cuda:0')
epoch: 119 test_true_pfm: 13.352080235541413
episode: 476 training return: tensor(-13312469., device='cuda:0')
episode: 477 training return: tensor(-13398549., device='cuda:0')
episode: 478 training return: tensor(-11557062., device='cuda:0')
episode: 479 training return: tensor(-13406280., device='cuda:0')
epoch: 120 test_true_pfm: 16.393075107681646
episode: 480 training return: tensor(-13052794., device='cuda:0')
episode: 481 training return: tensor(-10373789., device='cuda:0')
episode: 482 training return: tensor(-11585121., device='cuda:0')
episode: 483 training return: tensor(-13358282., device='cuda:0')
epoch: 121 test_true_pfm: 15.350738903225192
episode: 484 training return: tensor(-11626429., device='cuda:0')
episode: 485 training return: tensor(-12785654., device='cuda:0')
episode: 486 training return: tensor(-9145925., device='cuda:0')
episode: 487 training return: tensor(-13382178., device='cuda:0')
epoch: 122 test_true_pfm: 14.685568407023572
episode: 488 training return: tensor(-12945669., device='cuda:0')
episode: 489 training return: tensor(-12886739., device='cuda:0')
episode: 490 training return: tensor(-11754970., device='cuda:0')
episode: 491 training return: tensor(-12035719., device='cuda:0')
epoch: 123 test_true_pfm: 15.092372228807964
episode: 492 training return: tensor(-13177375., device='cuda:0')
episode: 493 training return: tensor(-9549645., device='cuda:0')
episode: 494 training return: tensor(-13328261., device='cuda:0')
episode: 495 training return: tensor(-13202904., device='cuda:0')
epoch: 124 test_true_pfm: 9.58256844147983
episode: 496 training return: tensor(-9324727., device='cuda:0')
episode: 497 training return: tensor(-13307450., device='cuda:0')
episode: 498 training return: tensor(-10609248., device='cuda:0')
episode: 499 training return: tensor(-9736200., device='cuda:0')
epoch: 125 test_true_pfm: 11.030308861234664
episode: 500 training return: tensor(-13130582., device='cuda:0')
episode: 501 training return: tensor(-12435243., device='cuda:0')
episode: 502 training return: tensor(-13162879., device='cuda:0')
episode: 503 training return: tensor(-13073635., device='cuda:0')
epoch: 126 test_true_pfm: 19.161755633527687
episode: 504 training return: tensor(-11993944., device='cuda:0')
episode: 505 training return: tensor(-12181369., device='cuda:0')
episode: 506 training return: tensor(-13406249., device='cuda:0')
episode: 507 training return: tensor(-13327344., device='cuda:0')
epoch: 127 test_true_pfm: 11.382433106125395
episode: 508 training return: tensor(-9908058., device='cuda:0')
episode: 509 training return: tensor(-13363650., device='cuda:0')
episode: 510 training return: tensor(-12690643., device='cuda:0')
episode: 511 training return: tensor(-13559290., device='cuda:0')
epoch: 128 test_true_pfm: 16.663511731579796
episode: 512 training return: tensor(-13234636., device='cuda:0')
episode: 513 training return: tensor(-12203648., device='cuda:0')
episode: 514 training return: tensor(-11865543., device='cuda:0')
episode: 515 training return: tensor(-12758588., device='cuda:0')
epoch: 129 test_true_pfm: 9.121993742776585
episode: 516 training return: tensor(-9218585., device='cuda:0')
episode: 517 training return: tensor(-13585550., device='cuda:0')
episode: 518 training return: tensor(-12325509., device='cuda:0')
episode: 519 training return: tensor(-12714446., device='cuda:0')
epoch: 130 test_true_pfm: 15.432804588058625
episode: 520 training return: tensor(-13236672., device='cuda:0')
episode: 521 training return: tensor(-13024054., device='cuda:0')
episode: 522 training return: tensor(-13093206., device='cuda:0')
episode: 523 training return: tensor(-13102025., device='cuda:0')
epoch: 131 test_true_pfm: 7.6196799419400065
episode: 524 training return: tensor(-13224820., device='cuda:0')
episode: 525 training return: tensor(-13281297., device='cuda:0')
episode: 526 training return: tensor(-11623809., device='cuda:0')
episode: 527 training return: tensor(-12951488., device='cuda:0')
epoch: 132 test_true_pfm: 12.71723084544029
episode: 528 training return: tensor(-13146154., device='cuda:0')
episode: 529 training return: tensor(-12972471., device='cuda:0')
episode: 530 training return: tensor(-11305970., device='cuda:0')
episode: 531 training return: tensor(-13123680., device='cuda:0')
epoch: 133 test_true_pfm: 16.733921411577334
episode: 532 training return: tensor(-13329114., device='cuda:0')
episode: 533 training return: tensor(-13363036., device='cuda:0')
episode: 534 training return: tensor(-13383097., device='cuda:0')
episode: 535 training return: tensor(-13058406., device='cuda:0')
epoch: 134 test_true_pfm: 9.957810327246108
episode: 536 training return: tensor(-10226983., device='cuda:0')
episode: 537 training return: tensor(-13564723., device='cuda:0')
episode: 538 training return: tensor(-12461268., device='cuda:0')
episode: 539 training return: tensor(-11401976., device='cuda:0')
epoch: 135 test_true_pfm: 14.869901460473269
episode: 540 training return: tensor(-13125693., device='cuda:0')
episode: 541 training return: tensor(-13288044., device='cuda:0')
episode: 542 training return: tensor(-13072621., device='cuda:0')
episode: 543 training return: tensor(-12498641., device='cuda:0')
epoch: 136 test_true_pfm: 4.080412151261496
episode: 544 training return: tensor(-13394791., device='cuda:0')
episode: 545 training return: tensor(-11759938., device='cuda:0')
episode: 546 training return: tensor(-10850315., device='cuda:0')
episode: 547 training return: tensor(-13340744., device='cuda:0')
epoch: 137 test_true_pfm: 20.709863205235415
episode: 548 training return: tensor(-13195797., device='cuda:0')
episode: 549 training return: tensor(-11246663., device='cuda:0')
episode: 550 training return: tensor(-12534304., device='cuda:0')
episode: 551 training return: tensor(-13409011., device='cuda:0')
epoch: 138 test_true_pfm: 17.472408148815283
episode: 552 training return: tensor(-12947398., device='cuda:0')
episode: 553 training return: tensor(-13142987., device='cuda:0')
episode: 554 training return: tensor(-13063498., device='cuda:0')
episode: 555 training return: tensor(-13144348., device='cuda:0')
epoch: 139 test_true_pfm: 18.395650560970804
episode: 556 training return: tensor(-13635016., device='cuda:0')
episode: 557 training return: tensor(-13483018., device='cuda:0')
episode: 558 training return: tensor(-11001969., device='cuda:0')
episode: 559 training return: tensor(-13362761., device='cuda:0')
epoch: 140 test_true_pfm: 12.110721725721131
episode: 560 training return: tensor(-13370339., device='cuda:0')
episode: 561 training return: tensor(-9933562., device='cuda:0')
episode: 562 training return: tensor(-13398014., device='cuda:0')
episode: 563 training return: tensor(-13387389., device='cuda:0')
epoch: 141 test_true_pfm: 13.580985095632126
episode: 564 training return: tensor(-11278038., device='cuda:0')
episode: 565 training return: tensor(-13602523., device='cuda:0')
episode: 566 training return: tensor(-12835960., device='cuda:0')
episode: 567 training return: tensor(-13012711., device='cuda:0')
epoch: 142 test_true_pfm: 12.730439473893899
episode: 568 training return: tensor(-13401950., device='cuda:0')
episode: 569 training return: tensor(-13333523., device='cuda:0')
episode: 570 training return: tensor(-8821183., device='cuda:0')
episode: 571 training return: tensor(-13299556., device='cuda:0')
epoch: 143 test_true_pfm: 13.943584118865791
episode: 572 training return: tensor(-12349466., device='cuda:0')
episode: 573 training return: tensor(-12935890., device='cuda:0')
episode: 574 training return: tensor(-13434978., device='cuda:0')
episode: 575 training return: tensor(-12881624., device='cuda:0')
epoch: 144 test_true_pfm: 8.77909482237445
episode: 576 training return: tensor(-12363827., device='cuda:0')
episode: 577 training return: tensor(-13570530., device='cuda:0')
episode: 578 training return: tensor(-11415919., device='cuda:0')
episode: 579 training return: tensor(-13080473., device='cuda:0')
epoch: 145 test_true_pfm: 14.860234939176596
episode: 580 training return: tensor(-13261118., device='cuda:0')
episode: 581 training return: tensor(-12710437., device='cuda:0')
episode: 582 training return: tensor(-12990282., device='cuda:0')
episode: 583 training return: tensor(-13265885., device='cuda:0')
epoch: 146 test_true_pfm: 11.326751359299028
episode: 584 training return: tensor(-13101174., device='cuda:0')
episode: 585 training return: tensor(-10700765., device='cuda:0')
episode: 586 training return: tensor(-13217910., device='cuda:0')
episode: 587 training return: tensor(-13412731., device='cuda:0')
epoch: 147 test_true_pfm: 14.637249929999978
episode: 588 training return: tensor(-13015754., device='cuda:0')
episode: 589 training return: tensor(-13132037., device='cuda:0')
episode: 590 training return: tensor(-13415130., device='cuda:0')
episode: 591 training return: tensor(-12995073., device='cuda:0')
epoch: 148 test_true_pfm: 12.958969576262117
episode: 592 training return: tensor(-13259406., device='cuda:0')
episode: 593 training return: tensor(-13293800., device='cuda:0')
episode: 594 training return: tensor(-13321298., device='cuda:0')
episode: 595 training return: tensor(-13298081., device='cuda:0')
epoch: 149 test_true_pfm: 9.587032060574078
episode: 596 training return: tensor(-13039117., device='cuda:0')
episode: 597 training return: tensor(-13697116., device='cuda:0')
episode: 598 training return: tensor(-13153764., device='cuda:0')
episode: 599 training return: tensor(-13160409., device='cuda:0')
epoch: 150 test_true_pfm: 12.491711663142393
