['--alg', 'ddpg', '--env', 'Swimmer-v3', '--learn', 'brac', '--traj', 'expert', '--seed', '2', '--data', '100000']
epoch: 0 training_loss 2.169349294193089 test_loss: 0.2475972890853882
epoch: 1 training_loss -0.2647658038744703 test_loss: -0.7638980865478515
epoch: 2 training_loss -1.0558850598335265 test_loss: -1.2959222793579102
epoch: 3 training_loss -1.5541574084758758 test_loss: -1.7736265182495117
epoch: 4 training_loss -1.8939189064502715 test_loss: -2.025528907775879
epoch: 5 training_loss -2.137637912034988 test_loss: -2.302539825439453
epoch: 6 training_loss -2.3632716703414918 test_loss: -2.495968055725098
epoch: 7 training_loss -2.5576802814006805 test_loss: -2.6900503158569338
epoch: 8 training_loss -2.7131494760513304 test_loss: -2.8783853530883787
epoch: 9 training_loss -2.8585067081451414 test_loss: -2.853338050842285
epoch: 10 training_loss -2.9962421536445616 test_loss: -3.1659082412719726
epoch: 11 training_loss -3.14684130191803 test_loss: -3.1414600372314454
epoch: 12 training_loss -3.1801410269737245 test_loss: -3.388742446899414
epoch: 13 training_loss -3.2986317610740663 test_loss: -3.297286605834961
epoch: 14 training_loss -3.37332524061203 test_loss: -3.4528274536132812
epoch: 15 training_loss -3.4345476531982424 test_loss: -3.3875095367431642
epoch: 16 training_loss -3.4476695394515993 test_loss: -3.532312774658203
epoch: 17 training_loss -3.569746479988098 test_loss: -3.7296756744384765
epoch: 18 training_loss -3.657428481578827 test_loss: -3.675881195068359
epoch: 19 training_loss -3.6096836733818054 test_loss: -3.6687595367431642
epoch: 20 training_loss -3.7186796259880066 test_loss: -3.8125022888183593
epoch: 21 training_loss -3.806386699676514 test_loss: -3.8520477294921873
epoch: 22 training_loss -3.7978559231758116 test_loss: -3.873805618286133
epoch: 23 training_loss -3.9314248943328858 test_loss: -3.8818809509277346
epoch: 24 training_loss -3.888583655357361 test_loss: -3.999165725708008
epoch: 25 training_loss -3.9806468653678895 test_loss: -3.9921249389648437
epoch: 26 training_loss -3.955318169593811 test_loss: -4.013640213012695
epoch: 27 training_loss -4.022321362495422 test_loss: -3.975443649291992
epoch: 28 training_loss -4.105052697658539 test_loss: -4.132368469238282
epoch: 29 training_loss -4.116111023426056 test_loss: -3.9395706176757814
epoch: 30 training_loss -4.102783129215241 test_loss: -4.0990135192871096
epoch: 31 training_loss -4.119022812843323 test_loss: -4.190703201293945
epoch: 32 training_loss -4.145446417331695 test_loss: -4.17608757019043
epoch: 33 training_loss -4.185501306056977 test_loss: -4.163923263549805
epoch: 34 training_loss -4.2424232339859005 test_loss: -4.330736923217773
epoch: 35 training_loss -4.262192511558533 test_loss: -4.356272888183594
epoch: 36 training_loss -4.218990256786347 test_loss: -4.3555908203125
epoch: 37 training_loss -4.270213627815247 test_loss: -4.364303207397461
epoch: 38 training_loss -4.302778434753418 test_loss: -4.400045776367188
epoch: 39 training_loss -4.312488710880279 test_loss: -4.288440704345703
epoch: 40 training_loss -4.277374877929687 test_loss: -4.384950637817383
epoch: 41 training_loss -4.373354187011719 test_loss: -4.294546508789063
epoch: 42 training_loss -4.3788995885849 test_loss: -4.32313232421875
epoch: 43 training_loss -4.39639946937561 test_loss: -4.505448532104492
epoch: 44 training_loss -4.439214701652527 test_loss: -4.49932632446289
epoch: 45 training_loss -4.430630183219909 test_loss: -4.5094257354736325
epoch: 46 training_loss -4.510699195861816 test_loss: -4.523671340942383
epoch: 47 training_loss -4.512565910816193 test_loss: -4.581634521484375
epoch: 48 training_loss -4.483597283363342 test_loss: -4.410540771484375
epoch: 49 training_loss -4.537577414512635 test_loss: -4.629738998413086
epoch: 50 training_loss -4.506391549110413 test_loss: -4.408248519897461
epoch: 51 training_loss -4.534639084339142 test_loss: -4.55073356628418
epoch: 52 training_loss -4.591014876365661 test_loss: -4.637286758422851
epoch: 53 training_loss -4.611891355514526 test_loss: -4.395392227172851
epoch: 54 training_loss -4.541611957550049 test_loss: -4.444190979003906
epoch: 55 training_loss -4.5379441499710085 test_loss: -4.372469711303711
epoch: 56 training_loss -4.598794679641724 test_loss: -4.716432571411133
epoch: 57 training_loss -4.652336316108704 test_loss: -4.6527141571044925
epoch: 58 training_loss -4.652434201240539 test_loss: -4.71440315246582
epoch: 59 training_loss -4.638187160491944 test_loss: -4.468294906616211
epoch: 60 training_loss -4.706232211589813 test_loss: -4.750558090209961
epoch: 61 training_loss -4.69268187046051 test_loss: -4.682826232910156
epoch: 62 training_loss -4.693224248886108 test_loss: -4.6613410949707035
epoch: 63 training_loss -4.581274721622467 test_loss: -4.754468154907227
epoch: 64 training_loss -4.694412145614624 test_loss: -4.7260181427001955
epoch: 65 training_loss -4.7181008696556095 test_loss: -4.855611038208008
epoch: 66 training_loss -4.762176041603088 test_loss: -4.821762466430664
epoch: 67 training_loss -4.745443224906921 test_loss: -4.428571319580078
epoch: 68 training_loss -4.779272437095642 test_loss: -4.856233596801758
epoch: 69 training_loss -4.784513254165649 test_loss: -4.9004974365234375
epoch: 70 training_loss -4.800190796852112 test_loss: -4.69230842590332
epoch: 71 training_loss -4.784747972488403 test_loss: -4.742892074584961
epoch: 72 training_loss -4.798999404907226 test_loss: -4.828401947021485
epoch: 73 training_loss -4.816435394287109 test_loss: -4.8317211151123045
epoch: 74 training_loss -4.861961116790772 test_loss: -4.855884552001953
epoch: 75 training_loss -4.804815149307251 test_loss: -4.898730087280273
epoch: 76 training_loss -4.902841000556946 test_loss: -4.846141815185547
epoch: 77 training_loss -4.884986162185669 test_loss: -4.959506607055664
epoch: 78 training_loss -4.867068786621093 test_loss: -4.721490097045899
epoch: 79 training_loss -4.8788483476638795 test_loss: -4.934076309204102
epoch: 80 training_loss -4.892778606414795 test_loss: -4.948863983154297
epoch: 81 training_loss -4.887414531707764 test_loss: -4.905590057373047
epoch: 82 training_loss -4.8913933801651 test_loss: -4.900649642944336
epoch: 83 training_loss -4.885228939056397 test_loss: -4.924414825439453
epoch: 84 training_loss -4.905657811164856 test_loss: -4.892608642578125
epoch: 85 training_loss -4.952318735122681 test_loss: -5.049539947509766
epoch: 86 training_loss -4.943439755439758 test_loss: -4.846345520019531
epoch: 87 training_loss -4.936413164138794 test_loss: -4.976263427734375
epoch: 88 training_loss -4.977414650917053 test_loss: -4.852912521362304
epoch: 89 training_loss -5.010130739212036 test_loss: -5.059976196289062
epoch: 90 training_loss -5.014743747711182 test_loss: -4.974369049072266
epoch: 91 training_loss -5.0143868398666385 test_loss: -5.138848876953125
epoch: 92 training_loss -4.977070322036743 test_loss: -4.939094543457031
epoch: 93 training_loss -5.019722051620484 test_loss: -4.983493041992188
epoch: 94 training_loss -5.0206377315521244 test_loss: -5.1762035369873045
epoch: 95 training_loss -5.007073822021485 test_loss: -5.023595809936523
epoch: 96 training_loss -5.055180916786194 test_loss: -5.078026199340821
epoch: 97 training_loss -5.011818161010742 test_loss: -4.962649154663086
epoch: 98 training_loss -5.051111855506897 test_loss: -5.035959625244141
epoch: 99 training_loss -5.076938762664795 test_loss: -5.0934303283691404
epoch: 100 training_loss -5.073634252548218 test_loss: -5.083684921264648
epoch: 101 training_loss -5.100805125236511 test_loss: -5.147064208984375
epoch: 102 training_loss -5.090518374443054 test_loss: -5.073443222045898
epoch: 103 training_loss -5.0844501113891605 test_loss: -5.10283088684082
epoch: 104 training_loss -5.114571237564087 test_loss: -5.028638076782227
epoch: 105 training_loss -5.101590056419372 test_loss: -5.018603897094726
epoch: 106 training_loss -5.114587860107422 test_loss: -5.068609619140625
epoch: 107 training_loss -5.109041585922241 test_loss: -5.21967887878418
epoch: 108 training_loss -5.150096611976624 test_loss: -5.203202819824218
epoch: 109 training_loss -5.128585782051086 test_loss: -5.110928344726562
epoch: 110 training_loss -5.1092017555236815 test_loss: -5.149081420898438
epoch: 111 training_loss -5.1616146278381345 test_loss: -5.151748275756836
epoch: 112 training_loss -5.1762180519104 test_loss: -5.171921157836914
epoch: 113 training_loss -5.169933247566223 test_loss: -5.258012771606445
epoch: 114 training_loss -5.168296332359314 test_loss: -5.199017333984375
epoch: 115 training_loss -5.194417915344238 test_loss: -5.138034439086914
epoch: 116 training_loss -5.139845633506775 test_loss: -5.048004531860352
epoch: 117 training_loss -5.2105557727813725 test_loss: -5.217887878417969
epoch: 118 training_loss -5.2323017406463626 test_loss: -5.155506896972656
epoch: 119 training_loss -5.2079249238967895 test_loss: -5.258882141113281
epoch: 120 training_loss -5.242113122940063 test_loss: -5.256293869018554
epoch: 121 training_loss -5.253835935592651 test_loss: -5.296598434448242
epoch: 122 training_loss -5.187623391151428 test_loss: -5.269930267333985
epoch: 123 training_loss -5.256107697486877 test_loss: -5.236717987060547
epoch: 124 training_loss -5.2302211475372316 test_loss: -5.310217666625976
epoch: 125 training_loss -5.214632229804993 test_loss: -5.2599021911621096
epoch: 126 training_loss -5.313354406356812 test_loss: -5.243461608886719
epoch: 127 training_loss -5.266941704750061 test_loss: -5.151025009155274
epoch: 128 training_loss -5.212454500198365 test_loss: -5.212277603149414
epoch: 129 training_loss -5.272753820419312 test_loss: -5.242591094970703
epoch: 130 training_loss -5.245253539085388 test_loss: -5.236614227294922
epoch: 131 training_loss -5.25668342590332 test_loss: -5.313061904907227
epoch: 132 training_loss -5.333502607345581 test_loss: -5.250447082519531
epoch: 133 training_loss -5.261693887710571 test_loss: -5.138200759887695
epoch: 134 training_loss -5.337637424468994 test_loss: -5.2208305358886715
epoch: 135 training_loss -5.271487469673157 test_loss: -5.300321578979492
epoch: 136 training_loss -5.251491861343384 test_loss: -5.291996002197266
epoch: 137 training_loss -5.239459538459778 test_loss: -5.32698974609375
epoch: 138 training_loss -5.341283550262451 test_loss: -5.253988265991211
epoch: 139 training_loss -5.346428418159485 test_loss: -5.327091979980469
epoch: 140 training_loss -5.330042481422424 test_loss: -5.365804290771484
epoch: 141 training_loss -5.3276419878005985 test_loss: -5.381833267211914
epoch: 142 training_loss -5.324138851165771 test_loss: -5.196890640258789
epoch: 143 training_loss -5.358076124191284 test_loss: -5.360137557983398
epoch: 144 training_loss -5.352746477127075 test_loss: -5.367464447021485
epoch: 145 training_loss -5.391558866500855 test_loss: -5.373521041870117
epoch: 146 training_loss -5.371079745292664 test_loss: -5.288466262817383
epoch: 147 training_loss -5.377198257446289 test_loss: -5.224236679077149
epoch: 148 training_loss -5.359763894081116 test_loss: -5.2767486572265625
epoch: 149 training_loss -5.317160863876342 test_loss: -5.323965835571289
124.35096800615733
episode: 0 training return: tensor(-18995624., device='cuda:0')
episode: 1 training return: tensor(-30900218., device='cuda:0')
episode: 2 training return: tensor(-26304400., device='cuda:0')
episode: 3 training return: tensor(-1.3652e+08, device='cuda:0')
epoch: 1 test_true_pfm: -18.17471029784437
episode: 4 training return: tensor(-44237292., device='cuda:0')
episode: 5 training return: tensor(-1.4433e+08, device='cuda:0')
episode: 6 training return: tensor(-41137680., device='cuda:0')
episode: 7 training return: tensor(-26225810., device='cuda:0')
epoch: 2 test_true_pfm: -19.94567782818632
episode: 8 training return: tensor(-68892480., device='cuda:0')
episode: 9 training return: tensor(-90722928., device='cuda:0')
episode: 10 training return: tensor(-720972.6250, device='cuda:0')
episode: 11 training return: tensor(-9375.8516, device='cuda:0')
epoch: 3 test_true_pfm: -17.886368130459175
episode: 12 training return: tensor(-17953.1582, device='cuda:0')
episode: 13 training return: tensor(-10552.1221, device='cuda:0')
episode: 14 training return: tensor(-6585043., device='cuda:0')
episode: 15 training return: tensor(-8724.8994, device='cuda:0')
epoch: 4 test_true_pfm: -26.422695628404135
episode: 16 training return: tensor(-25387.1699, device='cuda:0')
episode: 17 training return: tensor(-3473025., device='cuda:0')
episode: 18 training return: tensor(-11299.4541, device='cuda:0')
episode: 19 training return: tensor(-4708.5464, device='cuda:0')
epoch: 5 test_true_pfm: -21.565484991331854
episode: 20 training return: tensor(-12442.6309, device='cuda:0')
episode: 21 training return: tensor(-8991.8926, device='cuda:0')
episode: 22 training return: tensor(-8139.1909, device='cuda:0')
episode: 23 training return: tensor(-5992.2119, device='cuda:0')
epoch: 6 test_true_pfm: -19.176446043367918
episode: 24 training return: tensor(-12142.0088, device='cuda:0')
episode: 25 training return: tensor(-8995.8145, device='cuda:0')
episode: 26 training return: tensor(-16929.1875, device='cuda:0')
episode: 27 training return: tensor(-5132.4761, device='cuda:0')
epoch: 7 test_true_pfm: -17.277488077948853
episode: 28 training return: tensor(-10060.4268, device='cuda:0')
episode: 29 training return: tensor(-11641.7207, device='cuda:0')
episode: 30 training return: tensor(-20861.7363, device='cuda:0')
episode: 31 training return: tensor(-8390.2295, device='cuda:0')
epoch: 8 test_true_pfm: -21.388497526807896
episode: 32 training return: tensor(-18796.9902, device='cuda:0')
episode: 33 training return: tensor(-5430.0522, device='cuda:0')
episode: 34 training return: tensor(-7908.0049, device='cuda:0')
episode: 35 training return: tensor(-11256.2539, device='cuda:0')
epoch: 9 test_true_pfm: -19.401797409067996
episode: 36 training return: tensor(-8615.5117, device='cuda:0')
episode: 37 training return: tensor(-7884.2954, device='cuda:0')
episode: 38 training return: tensor(-16338.4531, device='cuda:0')
episode: 39 training return: tensor(-8636.9346, device='cuda:0')
epoch: 10 test_true_pfm: -15.44797330154562
episode: 40 training return: tensor(-4867.6816, device='cuda:0')
episode: 41 training return: tensor(-10101.9746, device='cuda:0')
episode: 42 training return: tensor(-13325.3799, device='cuda:0')
episode: 43 training return: tensor(-12610.0605, device='cuda:0')
epoch: 11 test_true_pfm: -19.440346441339056
episode: 44 training return: tensor(-4447.2393, device='cuda:0')
episode: 45 training return: tensor(-6250.3159, device='cuda:0')
episode: 46 training return: tensor(-13337.5293, device='cuda:0')
episode: 47 training return: tensor(-10843.3037, device='cuda:0')
epoch: 12 test_true_pfm: -21.826959584938315
episode: 48 training return: tensor(-12100.8652, device='cuda:0')
episode: 49 training return: tensor(-12964.6299, device='cuda:0')
episode: 50 training return: tensor(-20001.9121, device='cuda:0')
episode: 51 training return: tensor(-5324.8916, device='cuda:0')
epoch: 13 test_true_pfm: -19.08460800739148
episode: 52 training return: tensor(-5662.0640, device='cuda:0')
episode: 53 training return: tensor(-12163.2305, device='cuda:0')
episode: 54 training return: tensor(-5172.5723, device='cuda:0')
episode: 55 training return: tensor(-7133.5811, device='cuda:0')
epoch: 14 test_true_pfm: -17.183841860976028
episode: 56 training return: tensor(-11942.5234, device='cuda:0')
episode: 57 training return: tensor(-8171.5557, device='cuda:0')
episode: 58 training return: tensor(-10245.4746, device='cuda:0')
episode: 59 training return: tensor(-4093.6677, device='cuda:0')
epoch: 15 test_true_pfm: -22.376210559415647
episode: 60 training return: tensor(-7976.5249, device='cuda:0')
episode: 61 training return: tensor(-11445.9092, device='cuda:0')
episode: 62 training return: tensor(-12068.9111, device='cuda:0')
episode: 63 training return: tensor(-8266.6855, device='cuda:0')
epoch: 16 test_true_pfm: -21.74097485510604
episode: 64 training return: tensor(-11944.9717, device='cuda:0')
episode: 65 training return: tensor(-7947.0161, device='cuda:0')
episode: 66 training return: tensor(-14096.5439, device='cuda:0')
episode: 67 training return: tensor(-10726.4863, device='cuda:0')
epoch: 17 test_true_pfm: -19.15263270111374
episode: 68 training return: tensor(-5952.0386, device='cuda:0')
episode: 69 training return: tensor(-16064.9346, device='cuda:0')
episode: 70 training return: tensor(-12404.0117, device='cuda:0')
episode: 71 training return: tensor(-9217.3838, device='cuda:0')
epoch: 18 test_true_pfm: -19.31745596599365
episode: 72 training return: tensor(-20240.0645, device='cuda:0')
episode: 73 training return: tensor(-10361.4346, device='cuda:0')
episode: 74 training return: tensor(-23638.6855, device='cuda:0')
episode: 75 training return: tensor(-7370.5298, device='cuda:0')
epoch: 19 test_true_pfm: -18.625201034019277
episode: 76 training return: tensor(-3424.7571, device='cuda:0')
episode: 77 training return: tensor(-17685.8594, device='cuda:0')
episode: 78 training return: tensor(-12206.1377, device='cuda:0')
episode: 79 training return: tensor(-4493.1797, device='cuda:0')
epoch: 20 test_true_pfm: -19.183333546287024
episode: 80 training return: tensor(-27315.0684, device='cuda:0')
episode: 81 training return: tensor(-7251.0229, device='cuda:0')
episode: 82 training return: tensor(-11946.2354, device='cuda:0')
episode: 83 training return: tensor(-4478.3965, device='cuda:0')
epoch: 21 test_true_pfm: -16.482102567032783
episode: 84 training return: tensor(-8032.9360, device='cuda:0')
episode: 85 training return: tensor(-6724.2559, device='cuda:0')
episode: 86 training return: tensor(-4695.9771, device='cuda:0')
episode: 87 training return: tensor(-13526.7100, device='cuda:0')
epoch: 22 test_true_pfm: -17.959593761260987
episode: 88 training return: tensor(-11557.9307, device='cuda:0')
episode: 89 training return: tensor(-6557.5464, device='cuda:0')
episode: 90 training return: tensor(-8802.5996, device='cuda:0')
episode: 91 training return: tensor(-5639.0493, device='cuda:0')
epoch: 23 test_true_pfm: -21.47916994290482
episode: 92 training return: tensor(-5034.3960, device='cuda:0')
episode: 93 training return: tensor(-10728.2070, device='cuda:0')
episode: 94 training return: tensor(-13552.5215, device='cuda:0')
episode: 95 training return: tensor(-11458.2998, device='cuda:0')
epoch: 24 test_true_pfm: -22.91237606191634
episode: 96 training return: tensor(-22648.3457, device='cuda:0')
episode: 97 training return: tensor(-5753.5825, device='cuda:0')
episode: 98 training return: tensor(-7804.6406, device='cuda:0')
episode: 99 training return: tensor(-11678.6738, device='cuda:0')
epoch: 25 test_true_pfm: -16.381567290190162
episode: 100 training return: tensor(-13812.1895, device='cuda:0')
episode: 101 training return: tensor(-18323.6230, device='cuda:0')
episode: 102 training return: tensor(-4558.6362, device='cuda:0')
episode: 103 training return: tensor(-8374.6396, device='cuda:0')
epoch: 26 test_true_pfm: -16.563964306432993
episode: 104 training return: tensor(-10863.6328, device='cuda:0')
episode: 105 training return: tensor(-6459.1987, device='cuda:0')
episode: 106 training return: tensor(-10702.3838, device='cuda:0')
episode: 107 training return: tensor(-15454.4502, device='cuda:0')
epoch: 27 test_true_pfm: -19.876895055977307
episode: 108 training return: tensor(-7921.3013, device='cuda:0')
episode: 109 training return: tensor(-5435.5703, device='cuda:0')
episode: 110 training return: tensor(-20249.3340, device='cuda:0')
episode: 111 training return: tensor(-19484.1543, device='cuda:0')
epoch: 28 test_true_pfm: -19.526548237328978
episode: 112 training return: tensor(-13186.9971, device='cuda:0')
episode: 113 training return: tensor(-9783.2959, device='cuda:0')
episode: 114 training return: tensor(-5190.8906, device='cuda:0')
episode: 115 training return: tensor(-5603.8979, device='cuda:0')
epoch: 29 test_true_pfm: -19.387316915240593
episode: 116 training return: tensor(-18588.6094, device='cuda:0')
episode: 117 training return: tensor(-15091.2227, device='cuda:0')
episode: 118 training return: tensor(-13144.8096, device='cuda:0')
episode: 119 training return: tensor(-6174.7666, device='cuda:0')
epoch: 30 test_true_pfm: -18.30055212754196
episode: 120 training return: tensor(-9400.8848, device='cuda:0')
episode: 121 training return: tensor(-6202.5532, device='cuda:0')
episode: 122 training return: tensor(-11557.5840, device='cuda:0')
episode: 123 training return: tensor(-15419.1465, device='cuda:0')
epoch: 31 test_true_pfm: -18.36326795301612
episode: 124 training return: tensor(-15949.9014, device='cuda:0')
episode: 125 training return: tensor(-10983.4111, device='cuda:0')
episode: 126 training return: tensor(-13138.2412, device='cuda:0')
episode: 127 training return: tensor(-20746.8281, device='cuda:0')
epoch: 32 test_true_pfm: -23.40614724766171
episode: 128 training return: tensor(-16993.5781, device='cuda:0')
episode: 129 training return: tensor(-9754.2461, device='cuda:0')
episode: 130 training return: tensor(-7076.7388, device='cuda:0')
episode: 131 training return: tensor(-22787.5352, device='cuda:0')
epoch: 33 test_true_pfm: -17.353237545024523
episode: 132 training return: tensor(-5996.8452, device='cuda:0')
episode: 133 training return: tensor(-18082.3457, device='cuda:0')
episode: 134 training return: tensor(-6449.8442, device='cuda:0')
episode: 135 training return: tensor(-10539.3691, device='cuda:0')
epoch: 34 test_true_pfm: -19.86401791629543
episode: 136 training return: tensor(-13371.4365, device='cuda:0')
episode: 137 training return: tensor(-4648.9473, device='cuda:0')
episode: 138 training return: tensor(-3326.0559, device='cuda:0')
episode: 139 training return: tensor(-17401.7637, device='cuda:0')
epoch: 35 test_true_pfm: -18.902303364647455
episode: 140 training return: tensor(-10020.1016, device='cuda:0')
episode: 141 training return: tensor(-5344.1172, device='cuda:0')
episode: 142 training return: tensor(-12496.8340, device='cuda:0')
episode: 143 training return: tensor(-13877.1709, device='cuda:0')
epoch: 36 test_true_pfm: -16.713249731576443
episode: 144 training return: tensor(-8110.7241, device='cuda:0')
episode: 145 training return: tensor(-6198.4888, device='cuda:0')
episode: 146 training return: tensor(-9682.8828, device='cuda:0')
episode: 147 training return: tensor(-3526.8977, device='cuda:0')
epoch: 37 test_true_pfm: -22.985013422502966
episode: 148 training return: tensor(-5242.6987, device='cuda:0')
episode: 149 training return: tensor(-17949.6699, device='cuda:0')
episode: 150 training return: tensor(-6592.0142, device='cuda:0')
episode: 151 training return: tensor(-9755.3525, device='cuda:0')
epoch: 38 test_true_pfm: -20.993725489515146
episode: 152 training return: tensor(-10437.7676, device='cuda:0')
episode: 153 training return: tensor(-7351.7920, device='cuda:0')
episode: 154 training return: tensor(-13676.4365, device='cuda:0')
episode: 155 training return: tensor(-14193.8408, device='cuda:0')
epoch: 39 test_true_pfm: -17.941047670506627
episode: 156 training return: tensor(-4192.3711, device='cuda:0')
episode: 157 training return: tensor(-12143.9678, device='cuda:0')
episode: 158 training return: tensor(-6174.7671, device='cuda:0')
episode: 159 training return: tensor(-7131.6201, device='cuda:0')
epoch: 40 test_true_pfm: -18.8913723257365
episode: 160 training return: tensor(-5378.9214, device='cuda:0')
episode: 161 training return: tensor(-8616.2217, device='cuda:0')
episode: 162 training return: tensor(-10585.3213, device='cuda:0')
episode: 163 training return: tensor(-8998.5967, device='cuda:0')
epoch: 41 test_true_pfm: -20.276795618661435
episode: 164 training return: tensor(-23264.0117, device='cuda:0')
episode: 165 training return: tensor(-17776.0020, device='cuda:0')
episode: 166 training return: tensor(-14202.3291, device='cuda:0')
episode: 167 training return: tensor(-11349.8125, device='cuda:0')
epoch: 42 test_true_pfm: -22.592469592898333
episode: 168 training return: tensor(-8251.7061, device='cuda:0')
episode: 169 training return: tensor(-8021.0835, device='cuda:0')
episode: 170 training return: tensor(-17349.6660, device='cuda:0')
episode: 171 training return: tensor(-15856.1973, device='cuda:0')
epoch: 43 test_true_pfm: -18.02098120444326
episode: 172 training return: tensor(-24439.8770, device='cuda:0')
episode: 173 training return: tensor(-11918.8838, device='cuda:0')
episode: 174 training return: tensor(-11041.3887, device='cuda:0')
episode: 175 training return: tensor(-12101.6797, device='cuda:0')
epoch: 44 test_true_pfm: -21.3888006521188
episode: 176 training return: tensor(-9837.5762, device='cuda:0')
episode: 177 training return: tensor(-6385.4985, device='cuda:0')
episode: 178 training return: tensor(-3482.4856, device='cuda:0')
episode: 179 training return: tensor(-14647.8691, device='cuda:0')
epoch: 45 test_true_pfm: -21.214347924827585
episode: 180 training return: tensor(-4935.1567, device='cuda:0')
episode: 181 training return: tensor(-16817.1035, device='cuda:0')
episode: 182 training return: tensor(-6578.6387, device='cuda:0')
episode: 183 training return: tensor(-7786.9409, device='cuda:0')
epoch: 46 test_true_pfm: -21.364000062529783
episode: 184 training return: tensor(-21889.6230, device='cuda:0')
episode: 185 training return: tensor(-10758.6963, device='cuda:0')
episode: 186 training return: tensor(-11278.3496, device='cuda:0')
episode: 187 training return: tensor(-10470.5732, device='cuda:0')
epoch: 47 test_true_pfm: -14.5004049002815
episode: 188 training return: tensor(-5888.3726, device='cuda:0')
episode: 189 training return: tensor(-13678.0898, device='cuda:0')
episode: 190 training return: tensor(-10403.6465, device='cuda:0')
episode: 191 training return: tensor(-4461.9253, device='cuda:0')
epoch: 48 test_true_pfm: -18.918579505423928
episode: 192 training return: tensor(-18883.1523, device='cuda:0')
episode: 193 training return: tensor(-14334.6260, device='cuda:0')
episode: 194 training return: tensor(-4562.5405, device='cuda:0')
episode: 195 training return: tensor(-7134.0200, device='cuda:0')
epoch: 49 test_true_pfm: -21.738721983542813
episode: 196 training return: tensor(-16294.8633, device='cuda:0')
episode: 197 training return: tensor(-7926.8945, device='cuda:0')
episode: 198 training return: tensor(-12685.5957, device='cuda:0')
episode: 199 training return: tensor(-5064.6924, device='cuda:0')
epoch: 50 test_true_pfm: -16.802138159290013
episode: 200 training return: tensor(-10637.7910, device='cuda:0')
episode: 201 training return: tensor(-13692.4160, device='cuda:0')
episode: 202 training return: tensor(-8816.5479, device='cuda:0')
episode: 203 training return: tensor(-7498.2739, device='cuda:0')
epoch: 51 test_true_pfm: -19.92976619671126
episode: 204 training return: tensor(-4836.8745, device='cuda:0')
episode: 205 training return: tensor(-6883.8457, device='cuda:0')
episode: 206 training return: tensor(-8771.6943, device='cuda:0')
episode: 207 training return: tensor(-3262.3328, device='cuda:0')
epoch: 52 test_true_pfm: -20.48704653356105
episode: 208 training return: tensor(-4641.5610, device='cuda:0')
episode: 209 training return: tensor(-5001.6372, device='cuda:0')
episode: 210 training return: tensor(-10065.8984, device='cuda:0')
episode: 211 training return: tensor(-4955.1851, device='cuda:0')
epoch: 53 test_true_pfm: -21.776001730863516
episode: 212 training return: tensor(-11858.7285, device='cuda:0')
episode: 213 training return: tensor(-7241.1294, device='cuda:0')
episode: 214 training return: tensor(-8124.6538, device='cuda:0')
episode: 215 training return: tensor(-4964.5088, device='cuda:0')
epoch: 54 test_true_pfm: -19.75787515940735
episode: 216 training return: tensor(-18502.9355, device='cuda:0')
episode: 217 training return: tensor(-8712.4307, device='cuda:0')
episode: 218 training return: tensor(-8194.3535, device='cuda:0')
episode: 219 training return: tensor(-5356.5498, device='cuda:0')
epoch: 55 test_true_pfm: -21.872960640183223
episode: 220 training return: tensor(-15085.8770, device='cuda:0')
episode: 221 training return: tensor(-7539.3242, device='cuda:0')
episode: 222 training return: tensor(-5979.2354, device='cuda:0')
episode: 223 training return: tensor(-5215.3594, device='cuda:0')
epoch: 56 test_true_pfm: -18.597036745803486
episode: 224 training return: tensor(-10850.9043, device='cuda:0')
episode: 225 training return: tensor(-24064.9746, device='cuda:0')
episode: 226 training return: tensor(-10473.5049, device='cuda:0')
episode: 227 training return: tensor(-3682.2275, device='cuda:0')
epoch: 57 test_true_pfm: -21.52256074330366
episode: 228 training return: tensor(-14702.4639, device='cuda:0')
episode: 229 training return: tensor(-4503.6841, device='cuda:0')
episode: 230 training return: tensor(-6154.9443, device='cuda:0')
episode: 231 training return: tensor(-3028.5500, device='cuda:0')
epoch: 58 test_true_pfm: -21.94456054411968
episode: 232 training return: tensor(-5655.8311, device='cuda:0')
episode: 233 training return: tensor(-10098.4990, device='cuda:0')
episode: 234 training return: tensor(-12571.6533, device='cuda:0')
episode: 235 training return: tensor(-6629.4619, device='cuda:0')
epoch: 59 test_true_pfm: -19.004867585043858
episode: 236 training return: tensor(-15634.7715, device='cuda:0')
episode: 237 training return: tensor(-10821.4404, device='cuda:0')
episode: 238 training return: tensor(-5867.4492, device='cuda:0')
episode: 239 training return: tensor(-11054.3096, device='cuda:0')
epoch: 60 test_true_pfm: -18.475278519503203
episode: 240 training return: tensor(-8120.6123, device='cuda:0')
episode: 241 training return: tensor(-15111.8975, device='cuda:0')
episode: 242 training return: tensor(-14312.2275, device='cuda:0')
episode: 243 training return: tensor(-19475.7852, device='cuda:0')
epoch: 61 test_true_pfm: -20.116617275356816
episode: 244 training return: tensor(-17510.9121, device='cuda:0')
episode: 245 training return: tensor(-7847.1504, device='cuda:0')
episode: 246 training return: tensor(-5028.1138, device='cuda:0')
episode: 247 training return: tensor(-6802.8716, device='cuda:0')
epoch: 62 test_true_pfm: -20.022523294758066
episode: 248 training return: tensor(-3697.5735, device='cuda:0')
episode: 249 training return: tensor(-11780.0205, device='cuda:0')
episode: 250 training return: tensor(-10294.1250, device='cuda:0')
episode: 251 training return: tensor(-8392.7627, device='cuda:0')
epoch: 63 test_true_pfm: -18.181981195521406
episode: 252 training return: tensor(-19216.0488, device='cuda:0')
episode: 253 training return: tensor(-12120.0254, device='cuda:0')
episode: 254 training return: tensor(-13842.6367, device='cuda:0')
episode: 255 training return: tensor(-16884.4102, device='cuda:0')
epoch: 64 test_true_pfm: -18.45249636598252
episode: 256 training return: tensor(-6661.1152, device='cuda:0')
episode: 257 training return: tensor(-11096.0898, device='cuda:0')
episode: 258 training return: tensor(-4460.4609, device='cuda:0')
episode: 259 training return: tensor(-9106.6582, device='cuda:0')
epoch: 65 test_true_pfm: -19.414765045910336
episode: 260 training return: tensor(-15587.9287, device='cuda:0')
episode: 261 training return: tensor(-4245.0679, device='cuda:0')
episode: 262 training return: tensor(-24246.8008, device='cuda:0')
episode: 263 training return: tensor(-5131.7144, device='cuda:0')
epoch: 66 test_true_pfm: -14.98796136352525
episode: 264 training return: tensor(-6554.2798, device='cuda:0')
episode: 265 training return: tensor(-7888.3398, device='cuda:0')
episode: 266 training return: tensor(-8237.2598, device='cuda:0')
episode: 267 training return: tensor(-18786.5820, device='cuda:0')
epoch: 67 test_true_pfm: -16.24751543201392
episode: 268 training return: tensor(-20020.7520, device='cuda:0')
episode: 269 training return: tensor(-10192.9951, device='cuda:0')
episode: 270 training return: tensor(-3569.7161, device='cuda:0')
episode: 271 training return: tensor(-6080.2490, device='cuda:0')
epoch: 68 test_true_pfm: -20.06179187546304
episode: 272 training return: tensor(-8596.1299, device='cuda:0')
episode: 273 training return: tensor(-16708.4551, device='cuda:0')
episode: 274 training return: tensor(-7865.9829, device='cuda:0')
episode: 275 training return: tensor(-23052.6816, device='cuda:0')
epoch: 69 test_true_pfm: -22.910696418506326
episode: 276 training return: tensor(-5710.5234, device='cuda:0')
episode: 277 training return: tensor(-17407.2617, device='cuda:0')
episode: 278 training return: tensor(-7777.6309, device='cuda:0')
episode: 279 training return: tensor(-19032.8457, device='cuda:0')
epoch: 70 test_true_pfm: -18.514008553336815
episode: 280 training return: tensor(-15553.4111, device='cuda:0')
episode: 281 training return: tensor(-9269.8584, device='cuda:0')
episode: 282 training return: tensor(-13245.5674, device='cuda:0')
episode: 283 training return: tensor(-9075.5645, device='cuda:0')
epoch: 71 test_true_pfm: -19.3268098167103
episode: 284 training return: tensor(-11146.4424, device='cuda:0')
episode: 285 training return: tensor(-6675.5693, device='cuda:0')
episode: 286 training return: tensor(-10670.7295, device='cuda:0')
episode: 287 training return: tensor(-4200.1753, device='cuda:0')
epoch: 72 test_true_pfm: -20.417494886663
episode: 288 training return: tensor(-7722.2886, device='cuda:0')
episode: 289 training return: tensor(-6472.2085, device='cuda:0')
episode: 290 training return: tensor(-4294.3105, device='cuda:0')
episode: 291 training return: tensor(-4984.1650, device='cuda:0')
epoch: 73 test_true_pfm: -17.627213155291223
episode: 292 training return: tensor(-6936.8105, device='cuda:0')
episode: 293 training return: tensor(-7566.3452, device='cuda:0')
episode: 294 training return: tensor(-13965.8760, device='cuda:0')
episode: 295 training return: tensor(-6194.3667, device='cuda:0')
epoch: 74 test_true_pfm: -20.99084642959934
episode: 296 training return: tensor(-10318.3418, device='cuda:0')
episode: 297 training return: tensor(-3498.6519, device='cuda:0')
episode: 298 training return: tensor(-6305.7778, device='cuda:0')
episode: 299 training return: tensor(-11259.9297, device='cuda:0')
epoch: 75 test_true_pfm: -18.73548530426295
episode: 300 training return: tensor(-15903.3525, device='cuda:0')
episode: 301 training return: tensor(-12960.0547, device='cuda:0')
episode: 302 training return: tensor(-11012.0312, device='cuda:0')
episode: 303 training return: tensor(-15399.0361, device='cuda:0')
epoch: 76 test_true_pfm: -20.10248774562525
episode: 304 training return: tensor(-7235.7979, device='cuda:0')
episode: 305 training return: tensor(-6688.4712, device='cuda:0')
episode: 306 training return: tensor(-6686.9351, device='cuda:0')
episode: 307 training return: tensor(-5214.4106, device='cuda:0')
epoch: 77 test_true_pfm: -18.248759985286277
episode: 308 training return: tensor(-17269.0859, device='cuda:0')
episode: 309 training return: tensor(-8329.0967, device='cuda:0')
episode: 310 training return: tensor(-8646.6572, device='cuda:0')
episode: 311 training return: tensor(-13406.9033, device='cuda:0')
epoch: 78 test_true_pfm: -19.085658590943506
episode: 312 training return: tensor(-12668.8623, device='cuda:0')
episode: 313 training return: tensor(-19883.5605, device='cuda:0')
episode: 314 training return: tensor(-17722.2227, device='cuda:0')
episode: 315 training return: tensor(-4379.5200, device='cuda:0')
epoch: 79 test_true_pfm: -19.433423500245674
episode: 316 training return: tensor(-7927.4819, device='cuda:0')
episode: 317 training return: tensor(-4062.3774, device='cuda:0')
episode: 318 training return: tensor(-20802.4414, device='cuda:0')
episode: 319 training return: tensor(-15127.2637, device='cuda:0')
epoch: 80 test_true_pfm: -18.48083275315099
episode: 320 training return: tensor(-8782.1553, device='cuda:0')
episode: 321 training return: tensor(-9348.6738, device='cuda:0')
episode: 322 training return: tensor(-11924.4355, device='cuda:0')
episode: 323 training return: tensor(-8012.1377, device='cuda:0')
epoch: 81 test_true_pfm: -22.172025478183578
episode: 324 training return: tensor(-18505.5781, device='cuda:0')
episode: 325 training return: tensor(-6620.9805, device='cuda:0')
episode: 326 training return: tensor(-8389.0039, device='cuda:0')
episode: 327 training return: tensor(-20688.9316, device='cuda:0')
epoch: 82 test_true_pfm: -18.966991545432943
episode: 328 training return: tensor(-7092.9487, device='cuda:0')
episode: 329 training return: tensor(-13264.1406, device='cuda:0')
episode: 330 training return: tensor(-6623.0156, device='cuda:0')
episode: 331 training return: tensor(-6710.4937, device='cuda:0')
epoch: 83 test_true_pfm: -17.521621536704394
episode: 332 training return: tensor(-8075.1885, device='cuda:0')
episode: 333 training return: tensor(-8803.8340, device='cuda:0')
episode: 334 training return: tensor(-3834.7517, device='cuda:0')
episode: 335 training return: tensor(-17635.2500, device='cuda:0')
epoch: 84 test_true_pfm: -23.30318708645693
episode: 336 training return: tensor(-15878.6670, device='cuda:0')
episode: 337 training return: tensor(-11895.8955, device='cuda:0')
episode: 338 training return: tensor(-5989.3745, device='cuda:0')
episode: 339 training return: tensor(-4314.9136, device='cuda:0')
epoch: 85 test_true_pfm: -17.98766998447723
episode: 340 training return: tensor(-11340.8965, device='cuda:0')
episode: 341 training return: tensor(-26168.6660, device='cuda:0')
episode: 342 training return: tensor(-5967.6963, device='cuda:0')
episode: 343 training return: tensor(-8457.8057, device='cuda:0')
epoch: 86 test_true_pfm: -21.782960595088834
episode: 344 training return: tensor(-13511.8975, device='cuda:0')
episode: 345 training return: tensor(-5180.7241, device='cuda:0')
episode: 346 training return: tensor(-14256.4043, device='cuda:0')
episode: 347 training return: tensor(-12231.5137, device='cuda:0')
epoch: 87 test_true_pfm: -19.610714516807967
episode: 348 training return: tensor(-4054.4048, device='cuda:0')
episode: 349 training return: tensor(-19765.8926, device='cuda:0')
episode: 350 training return: tensor(-11407.3389, device='cuda:0')
episode: 351 training return: tensor(-17307.7559, device='cuda:0')
epoch: 88 test_true_pfm: -20.560912415087557
episode: 352 training return: tensor(-7633.7505, device='cuda:0')
episode: 353 training return: tensor(-4878.1284, device='cuda:0')
episode: 354 training return: tensor(-8700.5029, device='cuda:0')
episode: 355 training return: tensor(-18101.4902, device='cuda:0')
epoch: 89 test_true_pfm: -17.7146281225201
episode: 356 training return: tensor(-9734.6533, device='cuda:0')
episode: 357 training return: tensor(-8551.6611, device='cuda:0')
episode: 358 training return: tensor(-7298.9756, device='cuda:0')
episode: 359 training return: tensor(-15174.3008, device='cuda:0')
epoch: 90 test_true_pfm: -21.941064414569297
episode: 360 training return: tensor(-7099.2915, device='cuda:0')
episode: 361 training return: tensor(-21573.7637, device='cuda:0')
episode: 362 training return: tensor(-8174.0947, device='cuda:0')
episode: 363 training return: tensor(-3887.8235, device='cuda:0')
epoch: 91 test_true_pfm: -19.4652024819366
episode: 364 training return: tensor(-8228.2588, device='cuda:0')
episode: 365 training return: tensor(-9795.2178, device='cuda:0')
episode: 366 training return: tensor(-20516.9902, device='cuda:0')
episode: 367 training return: tensor(-11926.3906, device='cuda:0')
epoch: 92 test_true_pfm: -22.320788223909425
episode: 368 training return: tensor(-22052.6113, device='cuda:0')
episode: 369 training return: tensor(-10569.3604, device='cuda:0')
episode: 370 training return: tensor(-3262.5903, device='cuda:0')
episode: 371 training return: tensor(-4885.8081, device='cuda:0')
epoch: 93 test_true_pfm: -21.343595513376634
episode: 372 training return: tensor(-10384.2490, device='cuda:0')
episode: 373 training return: tensor(-11091.2666, device='cuda:0')
episode: 374 training return: tensor(-11002.9072, device='cuda:0')
episode: 375 training return: tensor(-4603.0288, device='cuda:0')
epoch: 94 test_true_pfm: -21.489732692809554
episode: 376 training return: tensor(-5435.8569, device='cuda:0')
episode: 377 training return: tensor(-15495.1748, device='cuda:0')
episode: 378 training return: tensor(-10244.9365, device='cuda:0')
episode: 379 training return: tensor(-4493.4653, device='cuda:0')
epoch: 95 test_true_pfm: -17.330658891914
episode: 380 training return: tensor(-6366.5776, device='cuda:0')
episode: 381 training return: tensor(-6585.8315, device='cuda:0')
episode: 382 training return: tensor(-20272.9395, device='cuda:0')
episode: 383 training return: tensor(-14601.1709, device='cuda:0')
epoch: 96 test_true_pfm: -18.783516690805623
episode: 384 training return: tensor(-18894.2871, device='cuda:0')
episode: 385 training return: tensor(-7683.4038, device='cuda:0')
episode: 386 training return: tensor(-5250.9751, device='cuda:0')
episode: 387 training return: tensor(-15426.3301, device='cuda:0')
epoch: 97 test_true_pfm: -18.46549513087636
episode: 388 training return: tensor(-3425.0520, device='cuda:0')
episode: 389 training return: tensor(-8080.7598, device='cuda:0')
episode: 390 training return: tensor(-9287.9893, device='cuda:0')
episode: 391 training return: tensor(-5874.8940, device='cuda:0')
epoch: 98 test_true_pfm: -16.23275597960883
episode: 392 training return: tensor(-20880.6562, device='cuda:0')
episode: 393 training return: tensor(-13969.9092, device='cuda:0')
episode: 394 training return: tensor(-7602.1289, device='cuda:0')
episode: 395 training return: tensor(-8171.3945, device='cuda:0')
epoch: 99 test_true_pfm: -20.45622006545933
episode: 396 training return: tensor(-11405.8867, device='cuda:0')
episode: 397 training return: tensor(-5033.9912, device='cuda:0')
episode: 398 training return: tensor(-20546.1641, device='cuda:0')
episode: 399 training return: tensor(-20657.8555, device='cuda:0')
epoch: 100 test_true_pfm: -21.46467602446966
episode: 400 training return: tensor(-6781.5288, device='cuda:0')
episode: 401 training return: tensor(-15003.1094, device='cuda:0')
episode: 402 training return: tensor(-5189.0059, device='cuda:0')
episode: 403 training return: tensor(-9248.2061, device='cuda:0')
epoch: 101 test_true_pfm: -16.752101668970763
episode: 404 training return: tensor(-14398.1934, device='cuda:0')
episode: 405 training return: tensor(-11334.5156, device='cuda:0')
episode: 406 training return: tensor(-19237.6680, device='cuda:0')
episode: 407 training return: tensor(-6239.3843, device='cuda:0')
epoch: 102 test_true_pfm: -19.420730181326615
episode: 408 training return: tensor(-14031.9092, device='cuda:0')
episode: 409 training return: tensor(-7489.2583, device='cuda:0')
episode: 410 training return: tensor(-10828.0928, device='cuda:0')
episode: 411 training return: tensor(-16491.6172, device='cuda:0')
epoch: 103 test_true_pfm: -21.560542256401416
episode: 412 training return: tensor(-5342.1704, device='cuda:0')
episode: 413 training return: tensor(-13214.5078, device='cuda:0')
episode: 414 training return: tensor(-10492.8877, device='cuda:0')
episode: 415 training return: tensor(-13875.6982, device='cuda:0')
epoch: 104 test_true_pfm: -17.845870503260553
episode: 416 training return: tensor(-21689.4844, device='cuda:0')
episode: 417 training return: tensor(-8162.5767, device='cuda:0')
episode: 418 training return: tensor(-11505.0127, device='cuda:0')
episode: 419 training return: tensor(-7045.2104, device='cuda:0')
epoch: 105 test_true_pfm: -20.272988026797762
episode: 420 training return: tensor(-5254.3086, device='cuda:0')
episode: 421 training return: tensor(-6445.4980, device='cuda:0')
episode: 422 training return: tensor(-14464.7158, device='cuda:0')
episode: 423 training return: tensor(-14020.2764, device='cuda:0')
epoch: 106 test_true_pfm: -19.233356370465764
episode: 424 training return: tensor(-13501.4648, device='cuda:0')
episode: 425 training return: tensor(-29570.1055, device='cuda:0')
episode: 426 training return: tensor(-14354.4385, device='cuda:0')
episode: 427 training return: tensor(-3712.4177, device='cuda:0')
epoch: 107 test_true_pfm: -19.523333352183137
episode: 428 training return: tensor(-6625.9546, device='cuda:0')
episode: 429 training return: tensor(-8310.4697, device='cuda:0')
episode: 430 training return: tensor(-3249.7402, device='cuda:0')
episode: 431 training return: tensor(-18016.1953, device='cuda:0')
epoch: 108 test_true_pfm: -21.89839125239947
episode: 432 training return: tensor(-6357.0698, device='cuda:0')
episode: 433 training return: tensor(-9703.8340, device='cuda:0')
episode: 434 training return: tensor(-8625.5186, device='cuda:0')
episode: 435 training return: tensor(-4602.4639, device='cuda:0')
epoch: 109 test_true_pfm: -21.12944306481234
episode: 436 training return: tensor(-21268.4453, device='cuda:0')
episode: 437 training return: tensor(-4224.2627, device='cuda:0')
episode: 438 training return: tensor(-15844.4678, device='cuda:0')
episode: 439 training return: tensor(-5602.1450, device='cuda:0')
epoch: 110 test_true_pfm: -19.36017587562186
episode: 440 training return: tensor(-12989.4795, device='cuda:0')
episode: 441 training return: tensor(-7942.1064, device='cuda:0')
episode: 442 training return: tensor(-12644.4229, device='cuda:0')
episode: 443 training return: tensor(-12227.6064, device='cuda:0')
epoch: 111 test_true_pfm: -23.67134934049172
episode: 444 training return: tensor(-11094.2920, device='cuda:0')
episode: 445 training return: tensor(-11142.0928, device='cuda:0')
episode: 446 training return: tensor(-6536.9600, device='cuda:0')
episode: 447 training return: tensor(-12331.6514, device='cuda:0')
epoch: 112 test_true_pfm: -19.447179716911343
episode: 448 training return: tensor(-11992.5469, device='cuda:0')
episode: 449 training return: tensor(-7271.3262, device='cuda:0')
episode: 450 training return: tensor(-9923.7637, device='cuda:0')
episode: 451 training return: tensor(-24090.9883, device='cuda:0')
epoch: 113 test_true_pfm: -19.587614186088764
episode: 452 training return: tensor(-7001.0229, device='cuda:0')
episode: 453 training return: tensor(-10663.1396, device='cuda:0')
episode: 454 training return: tensor(-20994.0527, device='cuda:0')
episode: 455 training return: tensor(-6311.4404, device='cuda:0')
epoch: 114 test_true_pfm: -21.197309747746846
episode: 456 training return: tensor(-12421.8799, device='cuda:0')
episode: 457 training return: tensor(-6312.5352, device='cuda:0')
episode: 458 training return: tensor(-22121.8047, device='cuda:0')
episode: 459 training return: tensor(-4757.8560, device='cuda:0')
epoch: 115 test_true_pfm: -20.33667804611778
episode: 460 training return: tensor(-8985.2725, device='cuda:0')
episode: 461 training return: tensor(-14922.7324, device='cuda:0')
episode: 462 training return: tensor(-11502.0693, device='cuda:0')
episode: 463 training return: tensor(-11458.2168, device='cuda:0')
epoch: 116 test_true_pfm: -19.129655230963444
episode: 464 training return: tensor(-11431.7666, device='cuda:0')
episode: 465 training return: tensor(-5704., device='cuda:0')
episode: 466 training return: tensor(-8844.5078, device='cuda:0')
episode: 467 training return: tensor(-6509.4067, device='cuda:0')
epoch: 117 test_true_pfm: -21.679614148057535
episode: 468 training return: tensor(-5882.3496, device='cuda:0')
episode: 469 training return: tensor(-8892.5264, device='cuda:0')
episode: 470 training return: tensor(-8929.4668, device='cuda:0')
episode: 471 training return: tensor(-10697.3887, device='cuda:0')
epoch: 118 test_true_pfm: -19.607436140387172
episode: 472 training return: tensor(-6448.4902, device='cuda:0')
episode: 473 training return: tensor(-10212.9697, device='cuda:0')
episode: 474 training return: tensor(-5720.4297, device='cuda:0')
episode: 475 training return: tensor(-13232.2295, device='cuda:0')
epoch: 119 test_true_pfm: -18.648382742674354
episode: 476 training return: tensor(-8878.8359, device='cuda:0')
episode: 477 training return: tensor(-11059.0654, device='cuda:0')
episode: 478 training return: tensor(-6424.5479, device='cuda:0')
episode: 479 training return: tensor(-8801.5117, device='cuda:0')
epoch: 120 test_true_pfm: -20.28544013687168
episode: 480 training return: tensor(-11181.6182, device='cuda:0')
episode: 481 training return: tensor(-4844.0659, device='cuda:0')
episode: 482 training return: tensor(-7115.2271, device='cuda:0')
episode: 483 training return: tensor(-20073.7773, device='cuda:0')
epoch: 121 test_true_pfm: -21.35216694376363
episode: 484 training return: tensor(-8544.7197, device='cuda:0')
episode: 485 training return: tensor(-5243.7671, device='cuda:0')
episode: 486 training return: tensor(-3656.9148, device='cuda:0')
episode: 487 training return: tensor(-9320.0605, device='cuda:0')
epoch: 122 test_true_pfm: -20.28314780963127
episode: 488 training return: tensor(-23031.4375, device='cuda:0')
episode: 489 training return: tensor(-8750.3945, device='cuda:0')
episode: 490 training return: tensor(-7434.7949, device='cuda:0')
episode: 491 training return: tensor(-5390.8765, device='cuda:0')
epoch: 123 test_true_pfm: -20.21291695816057
episode: 492 training return: tensor(-10463.2646, device='cuda:0')
episode: 493 training return: tensor(-17109.0137, device='cuda:0')
episode: 494 training return: tensor(-8506.2129, device='cuda:0')
episode: 495 training return: tensor(-9989.2432, device='cuda:0')
epoch: 124 test_true_pfm: -17.2712947028949
episode: 496 training return: tensor(-7611.8726, device='cuda:0')
episode: 497 training return: tensor(-11577.9189, device='cuda:0')
episode: 498 training return: tensor(-8894.0088, device='cuda:0')
episode: 499 training return: tensor(-10479.3857, device='cuda:0')
epoch: 125 test_true_pfm: -16.730457809457768
episode: 500 training return: tensor(-10524.0859, device='cuda:0')
episode: 501 training return: tensor(-9854.0586, device='cuda:0')
episode: 502 training return: tensor(-12597.8193, device='cuda:0')
episode: 503 training return: tensor(-5605.6196, device='cuda:0')
epoch: 126 test_true_pfm: -19.25224905277718
episode: 504 training return: tensor(-12585.2588, device='cuda:0')
episode: 505 training return: tensor(-13636.7510, device='cuda:0')
episode: 506 training return: tensor(-4017.9761, device='cuda:0')
episode: 507 training return: tensor(-11254.8262, device='cuda:0')
epoch: 127 test_true_pfm: -20.715777455401543
episode: 508 training return: tensor(-5811.2979, device='cuda:0')
episode: 509 training return: tensor(-4665.1860, device='cuda:0')
episode: 510 training return: tensor(-5085.4907, device='cuda:0')
episode: 511 training return: tensor(-8463.1396, device='cuda:0')
epoch: 128 test_true_pfm: -19.18436350684833
episode: 512 training return: tensor(-22832.4492, device='cuda:0')
episode: 513 training return: tensor(-5107.4985, device='cuda:0')
episode: 514 training return: tensor(-10819.2969, device='cuda:0')
episode: 515 training return: tensor(-18165.2188, device='cuda:0')
epoch: 129 test_true_pfm: -20.93622520789523
episode: 516 training return: tensor(-17568.0996, device='cuda:0')
episode: 517 training return: tensor(-10006.0596, device='cuda:0')
episode: 518 training return: tensor(-3876.4109, device='cuda:0')
episode: 519 training return: tensor(-13278.0186, device='cuda:0')
epoch: 130 test_true_pfm: -22.846344604033295
episode: 520 training return: tensor(-8111.3208, device='cuda:0')
episode: 521 training return: tensor(-13633.8730, device='cuda:0')
episode: 522 training return: tensor(-5265.8159, device='cuda:0')
episode: 523 training return: tensor(-13713.6338, device='cuda:0')
epoch: 131 test_true_pfm: -19.89451660349461
episode: 524 training return: tensor(-8416.2852, device='cuda:0')
episode: 525 training return: tensor(-7261.6738, device='cuda:0')
episode: 526 training return: tensor(-8289.5127, device='cuda:0')
episode: 527 training return: tensor(-12702.6816, device='cuda:0')
epoch: 132 test_true_pfm: -19.11126132442663
episode: 528 training return: tensor(-10217.0781, device='cuda:0')
episode: 529 training return: tensor(-16291.4541, device='cuda:0')
episode: 530 training return: tensor(-9618.2051, device='cuda:0')
episode: 531 training return: tensor(-7238.3877, device='cuda:0')
epoch: 133 test_true_pfm: -16.959084276386456
episode: 532 training return: tensor(-10677.4346, device='cuda:0')
episode: 533 training return: tensor(-7845.5244, device='cuda:0')
episode: 534 training return: tensor(-11138.3408, device='cuda:0')
episode: 535 training return: tensor(-5020.4746, device='cuda:0')
epoch: 134 test_true_pfm: -22.02199668887588
episode: 536 training return: tensor(-10675.4463, device='cuda:0')
episode: 537 training return: tensor(-13370.3184, device='cuda:0')
episode: 538 training return: tensor(-18621.7090, device='cuda:0')
episode: 539 training return: tensor(-7707.6016, device='cuda:0')
epoch: 135 test_true_pfm: -18.953583410156313
episode: 540 training return: tensor(-8556.6094, device='cuda:0')
episode: 541 training return: tensor(-5936.0366, device='cuda:0')
episode: 542 training return: tensor(-16598.6660, device='cuda:0')
episode: 543 training return: tensor(-4986.2974, device='cuda:0')
epoch: 136 test_true_pfm: -20.10745596721103
episode: 544 training return: tensor(-16196.9834, device='cuda:0')
episode: 545 training return: tensor(-7278.7593, device='cuda:0')
episode: 546 training return: tensor(-17651.9648, device='cuda:0')
episode: 547 training return: tensor(-11336.5361, device='cuda:0')
epoch: 137 test_true_pfm: -20.283555468072507
episode: 548 training return: tensor(-9124.4736, device='cuda:0')
episode: 549 training return: tensor(-11089.9775, device='cuda:0')
episode: 550 training return: tensor(-12843.7793, device='cuda:0')
episode: 551 training return: tensor(-28230.4219, device='cuda:0')
epoch: 138 test_true_pfm: -15.521684376745778
episode: 552 training return: tensor(-7575.1353, device='cuda:0')
episode: 553 training return: tensor(-5699.6978, device='cuda:0')
episode: 554 training return: tensor(-9398.0938, device='cuda:0')
episode: 555 training return: tensor(-8327.7861, device='cuda:0')
epoch: 139 test_true_pfm: -19.697938532978057
episode: 556 training return: tensor(-21017.0664, device='cuda:0')
episode: 557 training return: tensor(-11263.3066, device='cuda:0')
episode: 558 training return: tensor(-11734.8721, device='cuda:0')
episode: 559 training return: tensor(-4762.6348, device='cuda:0')
epoch: 140 test_true_pfm: -20.622238177789562
episode: 560 training return: tensor(-14102.1826, device='cuda:0')
episode: 561 training return: tensor(-5663.5259, device='cuda:0')
episode: 562 training return: tensor(-5468.3696, device='cuda:0')
episode: 563 training return: tensor(-10978.8027, device='cuda:0')
epoch: 141 test_true_pfm: -20.965801346003296
episode: 564 training return: tensor(-12247.4443, device='cuda:0')
episode: 565 training return: tensor(-7560.9795, device='cuda:0')
episode: 566 training return: tensor(-14721.6973, device='cuda:0')
episode: 567 training return: tensor(-4594.2300, device='cuda:0')
epoch: 142 test_true_pfm: -19.188439747399485
episode: 568 training return: tensor(-23183.9883, device='cuda:0')
episode: 569 training return: tensor(-17896.2480, device='cuda:0')
episode: 570 training return: tensor(-15082.6162, device='cuda:0')
episode: 571 training return: tensor(-9699.8633, device='cuda:0')
epoch: 143 test_true_pfm: -20.12610084521235
episode: 572 training return: tensor(-6564.9482, device='cuda:0')
episode: 573 training return: tensor(-17553.7891, device='cuda:0')
episode: 574 training return: tensor(-23332.0938, device='cuda:0')
episode: 575 training return: tensor(-14065.0127, device='cuda:0')
epoch: 144 test_true_pfm: -18.171142702068416
episode: 576 training return: tensor(-16165.5234, device='cuda:0')
episode: 577 training return: tensor(-8949.2178, device='cuda:0')
episode: 578 training return: tensor(-7961.5137, device='cuda:0')
episode: 579 training return: tensor(-8064.4292, device='cuda:0')
epoch: 145 test_true_pfm: -22.315842717892078
episode: 580 training return: tensor(-4213.3931, device='cuda:0')
episode: 581 training return: tensor(-4224.9492, device='cuda:0')
episode: 582 training return: tensor(-19912.9180, device='cuda:0')
episode: 583 training return: tensor(-6606.4219, device='cuda:0')
epoch: 146 test_true_pfm: -19.440920687658327
episode: 584 training return: tensor(-26661.6895, device='cuda:0')
episode: 585 training return: tensor(-9596.0967, device='cuda:0')
episode: 586 training return: tensor(-8082.9692, device='cuda:0')
episode: 587 training return: tensor(-8800.7900, device='cuda:0')
epoch: 147 test_true_pfm: -19.495816317377013
episode: 588 training return: tensor(-3046.6936, device='cuda:0')
episode: 589 training return: tensor(-8718.0781, device='cuda:0')
episode: 590 training return: tensor(-23878.6152, device='cuda:0')
episode: 591 training return: tensor(-3856.7014, device='cuda:0')
epoch: 148 test_true_pfm: -15.836028069737072
episode: 592 training return: tensor(-6170.2129, device='cuda:0')
episode: 593 training return: tensor(-20699.2109, device='cuda:0')
episode: 594 training return: tensor(-11914.1660, device='cuda:0')
episode: 595 training return: tensor(-6673.2148, device='cuda:0')
epoch: 149 test_true_pfm: -17.631535533009508
episode: 596 training return: tensor(-15045.8711, device='cuda:0')
episode: 597 training return: tensor(-12233.5205, device='cuda:0')
episode: 598 training return: tensor(-9182.9043, device='cuda:0')
episode: 599 training return: tensor(-16002.0146, device='cuda:0')
epoch: 150 test_true_pfm: -18.394653558127157
