['--alg', 'ddpg', '--env', 'Swimmer-v3', '--learn', 'brac', '--traj', 'expert', '--seed', '4', '--data', '100000']
epoch: 0 training_loss 2.2461611086130144 test_loss: 0.5427683830261231
epoch: 1 training_loss -0.013106411546468735 test_loss: -0.3915583610534668
epoch: 2 training_loss -0.6796548170596362 test_loss: -0.8775360107421875
epoch: 3 training_loss -1.2159515079855918 test_loss: -1.3778712272644043
epoch: 4 training_loss -1.6291158926486968 test_loss: -1.7574586868286133
epoch: 5 training_loss -1.9221161335706711 test_loss: -2.0637182235717773
epoch: 6 training_loss -2.113694659471512 test_loss: -2.181848907470703
epoch: 7 training_loss -2.277745784521103 test_loss: -2.4092729568481444
epoch: 8 training_loss -2.473654844760895 test_loss: -2.568006134033203
epoch: 9 training_loss -2.6938563776016236 test_loss: -2.8250492095947264
epoch: 10 training_loss -2.8336085724830626 test_loss: -2.79543399810791
epoch: 11 training_loss -2.9308295392990114 test_loss: -3.063915824890137
epoch: 12 training_loss -3.011124622821808 test_loss: -3.1281412124633787
epoch: 13 training_loss -3.0568613529205324 test_loss: -3.2121925354003906
epoch: 14 training_loss -3.2394346809387207 test_loss: -3.2865928649902343
epoch: 15 training_loss -3.2524094724655153 test_loss: -3.3107975006103514
epoch: 16 training_loss -3.3421224570274353 test_loss: -3.3501697540283204
epoch: 17 training_loss -3.424748167991638 test_loss: -3.4919132232666015
epoch: 18 training_loss -3.4659859108924866 test_loss: -3.489208221435547
epoch: 19 training_loss -3.5079799818992616 test_loss: -3.511430358886719
epoch: 20 training_loss -3.577472550868988 test_loss: -3.6311920166015623
epoch: 21 training_loss -3.6225450301170348 test_loss: -3.7557464599609376
epoch: 22 training_loss -3.70465696811676 test_loss: -3.782889556884766
epoch: 23 training_loss -3.743033308982849 test_loss: -3.7149932861328123
epoch: 24 training_loss -3.7651003789901734 test_loss: -3.757635498046875
epoch: 25 training_loss -3.8218201756477357 test_loss: -3.722139358520508
epoch: 26 training_loss -3.9089123964309693 test_loss: -3.9805694580078126
epoch: 27 training_loss -3.9017842316627505 test_loss: -3.977352523803711
epoch: 28 training_loss -3.936460509300232 test_loss: -4.036831283569336
epoch: 29 training_loss -3.9901339960098268 test_loss: -3.9994468688964844
epoch: 30 training_loss -3.983172354698181 test_loss: -3.9949039459228515
epoch: 31 training_loss -4.063511712551117 test_loss: -4.048913955688477
epoch: 32 training_loss -4.053546860218048 test_loss: -4.166527557373047
epoch: 33 training_loss -4.145700263977051 test_loss: -4.25474739074707
epoch: 34 training_loss -4.166554629802704 test_loss: -4.176362228393555
epoch: 35 training_loss -4.1848271417617795 test_loss: -4.276451110839844
epoch: 36 training_loss -4.23964281797409 test_loss: -4.237641143798828
epoch: 37 training_loss -4.20867529630661 test_loss: -4.161009216308594
epoch: 38 training_loss -4.224918363094329 test_loss: -4.234701538085938
epoch: 39 training_loss -4.233901245594025 test_loss: -4.311190032958985
epoch: 40 training_loss -4.375372939109802 test_loss: -4.161237335205078
epoch: 41 training_loss -4.381228957176209 test_loss: -4.450507354736328
epoch: 42 training_loss -4.349395108222962 test_loss: -4.3609466552734375
epoch: 43 training_loss -4.409453353881836 test_loss: -4.402885818481446
epoch: 44 training_loss -4.372470984458923 test_loss: -4.501270294189453
epoch: 45 training_loss -4.421865127086639 test_loss: -4.345450210571289
epoch: 46 training_loss -4.469969024658203 test_loss: -4.574115371704101
epoch: 47 training_loss -4.50399263381958 test_loss: -4.527851104736328
epoch: 48 training_loss -4.523734874725342 test_loss: -4.4379524230957035
epoch: 49 training_loss -4.512197589874267 test_loss: -4.587914276123047
epoch: 50 training_loss -4.60574743270874 test_loss: -4.636577987670899
epoch: 51 training_loss -4.576017081737518 test_loss: -4.629973602294922
epoch: 52 training_loss -4.609887700080872 test_loss: -4.628601837158203
epoch: 53 training_loss -4.6084801697731015 test_loss: -4.660597229003907
epoch: 54 training_loss -4.674576358795166 test_loss: -4.808791732788086
epoch: 55 training_loss -4.6673766088485715 test_loss: -4.779960250854492
epoch: 56 training_loss -4.662271280288696 test_loss: -4.626428604125977
epoch: 57 training_loss -4.684595856666565 test_loss: -4.796273803710937
epoch: 58 training_loss -4.705156464576721 test_loss: -4.641152191162109
epoch: 59 training_loss -4.665902976989746 test_loss: -4.775637435913086
epoch: 60 training_loss -4.696196284294128 test_loss: -4.779032897949219
epoch: 61 training_loss -4.7167102289199825 test_loss: -4.6661865234375
epoch: 62 training_loss -4.776174049377442 test_loss: -4.876786804199218
epoch: 63 training_loss -4.782292098999023 test_loss: -4.5636962890625
epoch: 64 training_loss -4.825841474533081 test_loss: -4.775739288330078
epoch: 65 training_loss -4.812613682746887 test_loss: -4.936510467529297
epoch: 66 training_loss -4.822913780212402 test_loss: -4.702216339111328
epoch: 67 training_loss -4.84907172203064 test_loss: -4.954892349243164
epoch: 68 training_loss -4.843198494911194 test_loss: -4.907008361816406
epoch: 69 training_loss -4.886421751976013 test_loss: -4.676902770996094
epoch: 70 training_loss -4.88251606464386 test_loss: -4.953116989135742
epoch: 71 training_loss -4.903700776100159 test_loss: -4.921807861328125
epoch: 72 training_loss -4.868914365768433 test_loss: -5.041068649291992
epoch: 73 training_loss -4.941846489906311 test_loss: -5.0162841796875
epoch: 74 training_loss -4.882395844459534 test_loss: -4.998923873901367
epoch: 75 training_loss -4.945159583091736 test_loss: -4.9461822509765625
epoch: 76 training_loss -4.952408294677735 test_loss: -5.002673721313476
epoch: 77 training_loss -4.934328966140747 test_loss: -4.98560676574707
epoch: 78 training_loss -5.00523458480835 test_loss: -4.950500106811523
epoch: 79 training_loss -4.9971217346191406 test_loss: -4.990724563598633
epoch: 80 training_loss -4.978796157836914 test_loss: -5.064846801757812
epoch: 81 training_loss -4.9926828765869145 test_loss: -5.079990005493164
epoch: 82 training_loss -5.058815870285034 test_loss: -5.073628616333008
epoch: 83 training_loss -5.0431970643997195 test_loss: -5.099081039428711
epoch: 84 training_loss -5.052759757041931 test_loss: -5.166387939453125
epoch: 85 training_loss -5.027859864234924 test_loss: -5.076751708984375
epoch: 86 training_loss -5.084780201911927 test_loss: -5.137010955810547
epoch: 87 training_loss -5.064349522590637 test_loss: -4.994514465332031
epoch: 88 training_loss -5.077669258117676 test_loss: -5.187812805175781
epoch: 89 training_loss -5.0848726606369015 test_loss: -4.7528236389160154
epoch: 90 training_loss -5.050057063102722 test_loss: -5.087310028076172
epoch: 91 training_loss -5.134792418479919 test_loss: -5.23438720703125
epoch: 92 training_loss -5.114273934364319 test_loss: -5.160698699951172
epoch: 93 training_loss -5.157936081886292 test_loss: -5.221519851684571
epoch: 94 training_loss -5.131369862556458 test_loss: -5.062068176269531
epoch: 95 training_loss -5.169825406074524 test_loss: -5.257018280029297
epoch: 96 training_loss -5.1301364946365355 test_loss: -5.120135498046875
epoch: 97 training_loss -5.201732778549195 test_loss: -5.22364501953125
epoch: 98 training_loss -5.161383333206177 test_loss: -4.99506950378418
epoch: 99 training_loss -5.195618529319763 test_loss: -5.167526626586914
epoch: 100 training_loss -5.186028852462768 test_loss: -5.246152877807617
epoch: 101 training_loss -5.0916115045547485 test_loss: -5.081385803222656
epoch: 102 training_loss -5.211967039108276 test_loss: -5.299090957641601
epoch: 103 training_loss -5.121700253486633 test_loss: -5.148125457763672
epoch: 104 training_loss -5.247301092147827 test_loss: -5.2954254150390625
epoch: 105 training_loss -5.215291595458984 test_loss: -5.340273666381836
epoch: 106 training_loss -5.2231361150741575 test_loss: -5.295295333862304
epoch: 107 training_loss -5.273843421936035 test_loss: -5.262316131591797
epoch: 108 training_loss -5.257673544883728 test_loss: -5.17857780456543
epoch: 109 training_loss -5.228947229385376 test_loss: -5.294298553466797
epoch: 110 training_loss -5.275389471054077 test_loss: -5.380746078491211
epoch: 111 training_loss -5.304065699577332 test_loss: -5.241130065917969
epoch: 112 training_loss -5.190568933486938 test_loss: -5.136253356933594
epoch: 113 training_loss -5.281139559745789 test_loss: -5.259836196899414
epoch: 114 training_loss -5.232286729812622 test_loss: -5.386832046508789
epoch: 115 training_loss -5.331544694900512 test_loss: -5.396854019165039
epoch: 116 training_loss -5.3575276756286625 test_loss: -5.38067512512207
epoch: 117 training_loss -5.349179015159607 test_loss: -5.383486557006836
epoch: 118 training_loss -5.280131468772888 test_loss: -5.365346908569336
epoch: 119 training_loss -5.3058729028701785 test_loss: -5.384408950805664
epoch: 120 training_loss -5.305877652168274 test_loss: -5.2602485656738285
epoch: 121 training_loss -5.287973132133484 test_loss: -5.399863052368164
epoch: 122 training_loss -5.34525426864624 test_loss: -5.322484970092773
epoch: 123 training_loss -5.26466495513916 test_loss: -5.335107421875
epoch: 124 training_loss -5.36365074634552 test_loss: -5.40379524230957
epoch: 125 training_loss -5.373914313316345 test_loss: -5.397355270385742
epoch: 126 training_loss -5.352492661476135 test_loss: -5.354683303833008
epoch: 127 training_loss -5.382784962654114 test_loss: -5.461924743652344
epoch: 128 training_loss -5.332179131507874 test_loss: -5.446004104614258
epoch: 129 training_loss -5.400351572036743 test_loss: -5.360137557983398
epoch: 130 training_loss -5.36681788444519 test_loss: -5.378896713256836
epoch: 131 training_loss -5.379183225631714 test_loss: -5.406386947631836
epoch: 132 training_loss -5.383177299499511 test_loss: -5.4248920440673825
epoch: 133 training_loss -5.34634452342987 test_loss: -5.386030578613282
epoch: 134 training_loss -5.369535012245178 test_loss: -5.326033401489258
epoch: 135 training_loss -5.38346538066864 test_loss: -5.432235336303711
epoch: 136 training_loss -5.413617129325867 test_loss: -5.491107177734375
epoch: 137 training_loss -5.439369878768921 test_loss: -5.495588684082032
epoch: 138 training_loss -5.438582563400269 test_loss: -5.435054016113281
epoch: 139 training_loss -5.436268625259399 test_loss: -5.539706039428711
epoch: 140 training_loss -5.4223442554473875 test_loss: -5.354471206665039
epoch: 141 training_loss -5.45573468208313 test_loss: -5.450057601928711
epoch: 142 training_loss -5.4404590034484865 test_loss: -5.310792541503906
epoch: 143 training_loss -5.469284691810608 test_loss: -5.594245529174804
epoch: 144 training_loss -5.486285314559937 test_loss: -5.501257705688476
epoch: 145 training_loss -5.378222465515137 test_loss: -5.4206195831298825
epoch: 146 training_loss -5.461789312362671 test_loss: -5.515018463134766
epoch: 147 training_loss -5.414037837982177 test_loss: -5.413190460205078
epoch: 148 training_loss -5.45952410697937 test_loss: -5.473731994628906
epoch: 149 training_loss -5.480013103485107 test_loss: -5.560890579223633
111.03994401928017
episode: 0 training return: tensor(-1.9702e+17, device='cuda:0')
episode: 1 training return: tensor(-3.3824e+13, device='cuda:0')
episode: 2 training return: tensor(-3.9914e+11, device='cuda:0')
episode: 3 training return: tensor(-2.1649e+17, device='cuda:0')
epoch: 1 test_true_pfm: 13.61555458862895
episode: 4 training return: tensor(-1.2433e+12, device='cuda:0')
episode: 5 training return: tensor(-3.8020e+13, device='cuda:0')
episode: 6 training return: tensor(-9.1816e+12, device='cuda:0')
episode: 7 training return: tensor(-1.2561e+13, device='cuda:0')
epoch: 2 test_true_pfm: 18.157287012852837
episode: 8 training return: tensor(-3.1884e+14, device='cuda:0')
episode: 9 training return: tensor(-9327823., device='cuda:0')
episode: 10 training return: tensor(-3575.7126, device='cuda:0')
episode: 11 training return: tensor(80.1353, device='cuda:0')
epoch: 3 test_true_pfm: 13.403871089603928
episode: 12 training return: tensor(-1557.7052, device='cuda:0')
episode: 13 training return: tensor(-309.0710, device='cuda:0')
episode: 14 training return: tensor(-464.3492, device='cuda:0')
episode: 15 training return: tensor(-44.0548, device='cuda:0')
epoch: 4 test_true_pfm: 6.92806051686465
episode: 16 training return: tensor(101.1469, device='cuda:0')
episode: 17 training return: tensor(-187.1104, device='cuda:0')
episode: 18 training return: tensor(103.6626, device='cuda:0')
episode: 19 training return: tensor(-87.8832, device='cuda:0')
epoch: 5 test_true_pfm: 17.146092087534917
episode: 20 training return: tensor(-83.2591, device='cuda:0')
episode: 21 training return: tensor(-298.2351, device='cuda:0')
episode: 22 training return: tensor(-59.7338, device='cuda:0')
episode: 23 training return: tensor(-35.9200, device='cuda:0')
epoch: 6 test_true_pfm: 12.562856849339514
episode: 24 training return: tensor(-1356.7823, device='cuda:0')
episode: 25 training return: tensor(-1.0161, device='cuda:0')
episode: 26 training return: tensor(-234.2933, device='cuda:0')
episode: 27 training return: tensor(91.0324, device='cuda:0')
epoch: 7 test_true_pfm: 10.86340339776648
episode: 28 training return: tensor(-246.9384, device='cuda:0')
episode: 29 training return: tensor(-232.0904, device='cuda:0')
episode: 30 training return: tensor(-153.8544, device='cuda:0')
episode: 31 training return: tensor(-475.9407, device='cuda:0')
epoch: 8 test_true_pfm: 13.038678807351863
episode: 32 training return: tensor(-340.0760, device='cuda:0')
episode: 33 training return: tensor(-316.7285, device='cuda:0')
episode: 34 training return: tensor(-177.8955, device='cuda:0')
episode: 35 training return: tensor(13.3147, device='cuda:0')
epoch: 9 test_true_pfm: 12.497600267327254
episode: 36 training return: tensor(-87.5316, device='cuda:0')
episode: 37 training return: tensor(-24.4849, device='cuda:0')
episode: 38 training return: tensor(-84.3908, device='cuda:0')
episode: 39 training return: tensor(-348.9493, device='cuda:0')
epoch: 10 test_true_pfm: 9.646401741991841
episode: 40 training return: tensor(24.5504, device='cuda:0')
episode: 41 training return: tensor(-186.6881, device='cuda:0')
episode: 42 training return: tensor(67.0318, device='cuda:0')
episode: 43 training return: tensor(-186.9806, device='cuda:0')
epoch: 11 test_true_pfm: 11.96069499169
episode: 44 training return: tensor(-224.0520, device='cuda:0')
episode: 45 training return: tensor(-63.2817, device='cuda:0')
episode: 46 training return: tensor(-391.1436, device='cuda:0')
episode: 47 training return: tensor(-45.7879, device='cuda:0')
epoch: 12 test_true_pfm: 13.128097993361314
episode: 48 training return: tensor(-86.2280, device='cuda:0')
episode: 49 training return: tensor(-918.6962, device='cuda:0')
episode: 50 training return: tensor(-248.5251, device='cuda:0')
episode: 51 training return: tensor(78.3473, device='cuda:0')
epoch: 13 test_true_pfm: 14.67210713432317
episode: 52 training return: tensor(-15.6726, device='cuda:0')
episode: 53 training return: tensor(-516.2833, device='cuda:0')
episode: 54 training return: tensor(-97.4403, device='cuda:0')
episode: 55 training return: tensor(-274.5751, device='cuda:0')
epoch: 14 test_true_pfm: 10.252473829103474
episode: 56 training return: tensor(-167.3475, device='cuda:0')
episode: 57 training return: tensor(-191.4123, device='cuda:0')
episode: 58 training return: tensor(-489.8008, device='cuda:0')
episode: 59 training return: tensor(-279.3645, device='cuda:0')
epoch: 15 test_true_pfm: 9.982946608070636
episode: 60 training return: tensor(-162.4054, device='cuda:0')
episode: 61 training return: tensor(-492.6040, device='cuda:0')
episode: 62 training return: tensor(-53.0499, device='cuda:0')
episode: 63 training return: tensor(-43.7094, device='cuda:0')
epoch: 16 test_true_pfm: 9.178627514488454
episode: 64 training return: tensor(-61.5126, device='cuda:0')
episode: 65 training return: tensor(118.4427, device='cuda:0')
episode: 66 training return: tensor(119.6676, device='cuda:0')
episode: 67 training return: tensor(52.6759, device='cuda:0')
epoch: 17 test_true_pfm: 15.81594476898261
episode: 68 training return: tensor(1.0174, device='cuda:0')
episode: 69 training return: tensor(78.2388, device='cuda:0')
episode: 70 training return: tensor(-149.9452, device='cuda:0')
episode: 71 training return: tensor(-420.4372, device='cuda:0')
epoch: 18 test_true_pfm: 19.313767868263078
episode: 72 training return: tensor(-1229.9241, device='cuda:0')
episode: 73 training return: tensor(-196.5106, device='cuda:0')
episode: 74 training return: tensor(-1334.8352, device='cuda:0')
episode: 75 training return: tensor(-1194.0388, device='cuda:0')
epoch: 19 test_true_pfm: 16.465420137996375
episode: 76 training return: tensor(-168.9901, device='cuda:0')
episode: 77 training return: tensor(-89.2218, device='cuda:0')
episode: 78 training return: tensor(-157.1795, device='cuda:0')
episode: 79 training return: tensor(-170.1401, device='cuda:0')
epoch: 20 test_true_pfm: 13.749349727376067
episode: 80 training return: tensor(-386.7267, device='cuda:0')
episode: 81 training return: tensor(-805.4816, device='cuda:0')
episode: 82 training return: tensor(-174.9254, device='cuda:0')
episode: 83 training return: tensor(-868.4872, device='cuda:0')
epoch: 21 test_true_pfm: 15.875921484555324
episode: 84 training return: tensor(52.3313, device='cuda:0')
episode: 85 training return: tensor(-102.7601, device='cuda:0')
episode: 86 training return: tensor(37.4684, device='cuda:0')
episode: 87 training return: tensor(-589.5018, device='cuda:0')
epoch: 22 test_true_pfm: 12.044067406027974
episode: 88 training return: tensor(72.0933, device='cuda:0')
episode: 89 training return: tensor(114.2042, device='cuda:0')
episode: 90 training return: tensor(104.5450, device='cuda:0')
episode: 91 training return: tensor(-24.9631, device='cuda:0')
epoch: 23 test_true_pfm: 13.238279278963518
episode: 92 training return: tensor(-382.2990, device='cuda:0')
episode: 93 training return: tensor(24.3402, device='cuda:0')
episode: 94 training return: tensor(55.8618, device='cuda:0')
episode: 95 training return: tensor(-231.5833, device='cuda:0')
epoch: 24 test_true_pfm: 10.516750541079759
episode: 96 training return: tensor(-151.7365, device='cuda:0')
episode: 97 training return: tensor(-16.7266, device='cuda:0')
episode: 98 training return: tensor(-621.4761, device='cuda:0')
episode: 99 training return: tensor(-73.9224, device='cuda:0')
epoch: 25 test_true_pfm: 8.494384591042644
episode: 100 training return: tensor(-204.5149, device='cuda:0')
episode: 101 training return: tensor(62.4569, device='cuda:0')
episode: 102 training return: tensor(67.6813, device='cuda:0')
episode: 103 training return: tensor(61.4171, device='cuda:0')
epoch: 26 test_true_pfm: 17.084278226137933
episode: 104 training return: tensor(-75.6515, device='cuda:0')
episode: 105 training return: tensor(85.4049, device='cuda:0')
episode: 106 training return: tensor(80.6711, device='cuda:0')
episode: 107 training return: tensor(71.2201, device='cuda:0')
epoch: 27 test_true_pfm: 21.69160803212709
episode: 108 training return: tensor(90.9184, device='cuda:0')
episode: 109 training return: tensor(33.2576, device='cuda:0')
episode: 110 training return: tensor(69.6741, device='cuda:0')
episode: 111 training return: tensor(-108.8542, device='cuda:0')
epoch: 28 test_true_pfm: 10.036443633418738
episode: 112 training return: tensor(10.9719, device='cuda:0')
episode: 113 training return: tensor(-157.9510, device='cuda:0')
episode: 114 training return: tensor(19.2330, device='cuda:0')
episode: 115 training return: tensor(-365.1019, device='cuda:0')
epoch: 29 test_true_pfm: 19.737593340172317
episode: 116 training return: tensor(-16.5783, device='cuda:0')
episode: 117 training return: tensor(-315.7731, device='cuda:0')
episode: 118 training return: tensor(-258.4312, device='cuda:0')
episode: 119 training return: tensor(-980.0825, device='cuda:0')
epoch: 30 test_true_pfm: 11.33004387964068
episode: 120 training return: tensor(-430.6357, device='cuda:0')
episode: 121 training return: tensor(-98.0978, device='cuda:0')
episode: 122 training return: tensor(-892.2848, device='cuda:0')
episode: 123 training return: tensor(29.6559, device='cuda:0')
epoch: 31 test_true_pfm: 14.378405763333808
episode: 124 training return: tensor(-173.6644, device='cuda:0')
episode: 125 training return: tensor(-1043.2550, device='cuda:0')
episode: 126 training return: tensor(-0.2628, device='cuda:0')
episode: 127 training return: tensor(-161.4449, device='cuda:0')
epoch: 32 test_true_pfm: 13.592765451784496
episode: 128 training return: tensor(-1354.7339, device='cuda:0')
episode: 129 training return: tensor(-116.9742, device='cuda:0')
episode: 130 training return: tensor(140.2747, device='cuda:0')
episode: 131 training return: tensor(-494.6736, device='cuda:0')
epoch: 33 test_true_pfm: 12.91370615864945
episode: 132 training return: tensor(-45.2802, device='cuda:0')
episode: 133 training return: tensor(-662.7496, device='cuda:0')
episode: 134 training return: tensor(-688.7963, device='cuda:0')
episode: 135 training return: tensor(-533.3951, device='cuda:0')
epoch: 34 test_true_pfm: 15.270807450938761
episode: 136 training return: tensor(-1273.8497, device='cuda:0')
episode: 137 training return: tensor(-115.2592, device='cuda:0')
episode: 138 training return: tensor(-99.0222, device='cuda:0')
episode: 139 training return: tensor(-506.6705, device='cuda:0')
epoch: 35 test_true_pfm: 8.990290499690264
episode: 140 training return: tensor(94.8787, device='cuda:0')
episode: 141 training return: tensor(-404.3698, device='cuda:0')
episode: 142 training return: tensor(-146.9767, device='cuda:0')
episode: 143 training return: tensor(42.3461, device='cuda:0')
epoch: 36 test_true_pfm: 8.162049844845301
episode: 144 training return: tensor(-105.5726, device='cuda:0')
episode: 145 training return: tensor(72.7671, device='cuda:0')
episode: 146 training return: tensor(-132.8807, device='cuda:0')
episode: 147 training return: tensor(-133.9645, device='cuda:0')
epoch: 37 test_true_pfm: 12.649980238954578
episode: 148 training return: tensor(-113.8624, device='cuda:0')
episode: 149 training return: tensor(-30.7681, device='cuda:0')
episode: 150 training return: tensor(-393.2574, device='cuda:0')
episode: 151 training return: tensor(93.1948, device='cuda:0')
epoch: 38 test_true_pfm: 18.52302310891313
episode: 152 training return: tensor(-89.5324, device='cuda:0')
episode: 153 training return: tensor(-116.4299, device='cuda:0')
episode: 154 training return: tensor(-67.5051, device='cuda:0')
episode: 155 training return: tensor(-314.3966, device='cuda:0')
epoch: 39 test_true_pfm: 16.647496311709812
episode: 156 training return: tensor(22.4913, device='cuda:0')
episode: 157 training return: tensor(-456.1245, device='cuda:0')
episode: 158 training return: tensor(61.6378, device='cuda:0')
episode: 159 training return: tensor(-786.0781, device='cuda:0')
epoch: 40 test_true_pfm: 8.075263680371613
episode: 160 training return: tensor(-233.3083, device='cuda:0')
episode: 161 training return: tensor(-521.5480, device='cuda:0')
episode: 162 training return: tensor(-509.6610, device='cuda:0')
episode: 163 training return: tensor(-335.1402, device='cuda:0')
epoch: 41 test_true_pfm: 9.410638887325396
episode: 164 training return: tensor(-119.1731, device='cuda:0')
episode: 165 training return: tensor(-948.7751, device='cuda:0')
episode: 166 training return: tensor(43.3537, device='cuda:0')
episode: 167 training return: tensor(-148.0303, device='cuda:0')
epoch: 42 test_true_pfm: 7.481562987770632
episode: 168 training return: tensor(37.3263, device='cuda:0')
episode: 169 training return: tensor(-37.5498, device='cuda:0')
episode: 170 training return: tensor(-429.1960, device='cuda:0')
episode: 171 training return: tensor(-352.4331, device='cuda:0')
epoch: 43 test_true_pfm: 14.73504573661458
episode: 172 training return: tensor(-39.8560, device='cuda:0')
episode: 173 training return: tensor(-2215.4170, device='cuda:0')
episode: 174 training return: tensor(-782.1872, device='cuda:0')
episode: 175 training return: tensor(-197.6776, device='cuda:0')
epoch: 44 test_true_pfm: 13.90211124729843
episode: 176 training return: tensor(-13.4128, device='cuda:0')
episode: 177 training return: tensor(-148.0072, device='cuda:0')
episode: 178 training return: tensor(63.2610, device='cuda:0')
episode: 179 training return: tensor(-438.9252, device='cuda:0')
epoch: 45 test_true_pfm: 16.276005668001353
episode: 180 training return: tensor(-70.5016, device='cuda:0')
episode: 181 training return: tensor(71.3226, device='cuda:0')
episode: 182 training return: tensor(103.2034, device='cuda:0')
episode: 183 training return: tensor(130.4684, device='cuda:0')
epoch: 46 test_true_pfm: 10.246538388632583
episode: 184 training return: tensor(-98.4686, device='cuda:0')
episode: 185 training return: tensor(-310.9296, device='cuda:0')
episode: 186 training return: tensor(31.9027, device='cuda:0')
episode: 187 training return: tensor(53.3449, device='cuda:0')
epoch: 47 test_true_pfm: 9.481688500604173
episode: 188 training return: tensor(-200.2368, device='cuda:0')
episode: 189 training return: tensor(98.4686, device='cuda:0')
episode: 190 training return: tensor(-398.2872, device='cuda:0')
episode: 191 training return: tensor(61.8876, device='cuda:0')
epoch: 48 test_true_pfm: 10.691634665380494
episode: 192 training return: tensor(-145.5962, device='cuda:0')
episode: 193 training return: tensor(-139.3300, device='cuda:0')
episode: 194 training return: tensor(-59.1880, device='cuda:0')
episode: 195 training return: tensor(-154.9361, device='cuda:0')
epoch: 49 test_true_pfm: 14.848499658966858
episode: 196 training return: tensor(-10.5007, device='cuda:0')
episode: 197 training return: tensor(-287.5461, device='cuda:0')
episode: 198 training return: tensor(73.6302, device='cuda:0')
episode: 199 training return: tensor(-1052.0229, device='cuda:0')
epoch: 50 test_true_pfm: 15.115076427630589
episode: 200 training return: tensor(-323.1707, device='cuda:0')
episode: 201 training return: tensor(-127.7944, device='cuda:0')
episode: 202 training return: tensor(64.7753, device='cuda:0')
episode: 203 training return: tensor(-246.4032, device='cuda:0')
epoch: 51 test_true_pfm: 14.373790846073984
episode: 204 training return: tensor(21.3078, device='cuda:0')
episode: 205 training return: tensor(69.8556, device='cuda:0')
episode: 206 training return: tensor(-79.5845, device='cuda:0')
episode: 207 training return: tensor(30.4723, device='cuda:0')
epoch: 52 test_true_pfm: 7.959353369094963
episode: 208 training return: tensor(86.3341, device='cuda:0')
episode: 209 training return: tensor(-5.0035, device='cuda:0')
episode: 210 training return: tensor(-74.7826, device='cuda:0')
episode: 211 training return: tensor(-519.6168, device='cuda:0')
epoch: 53 test_true_pfm: 15.98009034704664
episode: 212 training return: tensor(-183.2132, device='cuda:0')
episode: 213 training return: tensor(-44.9945, device='cuda:0')
episode: 214 training return: tensor(-52.2860, device='cuda:0')
episode: 215 training return: tensor(-85.3103, device='cuda:0')
epoch: 54 test_true_pfm: 16.667449250782816
episode: 216 training return: tensor(124.3713, device='cuda:0')
episode: 217 training return: tensor(97.9880, device='cuda:0')
episode: 218 training return: tensor(-234.6574, device='cuda:0')
episode: 219 training return: tensor(-20.7619, device='cuda:0')
epoch: 55 test_true_pfm: 19.44622715418226
episode: 220 training return: tensor(-61.0166, device='cuda:0')
episode: 221 training return: tensor(-9.8776, device='cuda:0')
episode: 222 training return: tensor(1.8854, device='cuda:0')
episode: 223 training return: tensor(48.5866, device='cuda:0')
epoch: 56 test_true_pfm: 8.111204138054095
episode: 224 training return: tensor(48.8752, device='cuda:0')
episode: 225 training return: tensor(-14.2529, device='cuda:0')
episode: 226 training return: tensor(-107.8732, device='cuda:0')
episode: 227 training return: tensor(-574.1536, device='cuda:0')
epoch: 57 test_true_pfm: 3.597293344602799
episode: 228 training return: tensor(-174.9190, device='cuda:0')
episode: 229 training return: tensor(-284.9506, device='cuda:0')
episode: 230 training return: tensor(-83.8215, device='cuda:0')
episode: 231 training return: tensor(56.8578, device='cuda:0')
epoch: 58 test_true_pfm: 14.042818693449885
episode: 232 training return: tensor(-113.3804, device='cuda:0')
episode: 233 training return: tensor(-30.4594, device='cuda:0')
episode: 234 training return: tensor(-352.1527, device='cuda:0')
episode: 235 training return: tensor(-243.1317, device='cuda:0')
epoch: 59 test_true_pfm: 14.30618270153866
episode: 236 training return: tensor(48.7798, device='cuda:0')
episode: 237 training return: tensor(96.3202, device='cuda:0')
episode: 238 training return: tensor(63.6699, device='cuda:0')
episode: 239 training return: tensor(61.2813, device='cuda:0')
epoch: 60 test_true_pfm: 5.026845081895323
episode: 240 training return: tensor(-495.6466, device='cuda:0')
episode: 241 training return: tensor(-464.8368, device='cuda:0')
episode: 242 training return: tensor(77.4727, device='cuda:0')
episode: 243 training return: tensor(55.9965, device='cuda:0')
epoch: 61 test_true_pfm: 16.643548446091216
episode: 244 training return: tensor(-1352.3934, device='cuda:0')
episode: 245 training return: tensor(-128.6389, device='cuda:0')
episode: 246 training return: tensor(119.9001, device='cuda:0')
episode: 247 training return: tensor(50.5085, device='cuda:0')
epoch: 62 test_true_pfm: 10.385004040973836
episode: 248 training return: tensor(-592.8804, device='cuda:0')
episode: 249 training return: tensor(-1023.7669, device='cuda:0')
episode: 250 training return: tensor(-34.4111, device='cuda:0')
episode: 251 training return: tensor(110.8978, device='cuda:0')
epoch: 63 test_true_pfm: 14.45086262286425
episode: 252 training return: tensor(126.4808, device='cuda:0')
episode: 253 training return: tensor(-158.2460, device='cuda:0')
episode: 254 training return: tensor(-204.1860, device='cuda:0')
episode: 255 training return: tensor(-219.3243, device='cuda:0')
epoch: 64 test_true_pfm: 20.220144414992994
episode: 256 training return: tensor(100.4715, device='cuda:0')
episode: 257 training return: tensor(90.4080, device='cuda:0')
episode: 258 training return: tensor(121.0915, device='cuda:0')
episode: 259 training return: tensor(-56.3282, device='cuda:0')
epoch: 65 test_true_pfm: 20.704595172986863
episode: 260 training return: tensor(-297.8155, device='cuda:0')
episode: 261 training return: tensor(-346.9316, device='cuda:0')
episode: 262 training return: tensor(21.3883, device='cuda:0')
episode: 263 training return: tensor(-166.4243, device='cuda:0')
epoch: 66 test_true_pfm: 15.507706298907504
episode: 264 training return: tensor(-8.9729, device='cuda:0')
episode: 265 training return: tensor(-350.4660, device='cuda:0')
episode: 266 training return: tensor(77.4716, device='cuda:0')
episode: 267 training return: tensor(-148.5578, device='cuda:0')
epoch: 67 test_true_pfm: 13.624390114197166
episode: 268 training return: tensor(-175.4397, device='cuda:0')
episode: 269 training return: tensor(-448.5008, device='cuda:0')
episode: 270 training return: tensor(-765.6605, device='cuda:0')
episode: 271 training return: tensor(33.6612, device='cuda:0')
epoch: 68 test_true_pfm: 7.276164245491918
episode: 272 training return: tensor(-4.8762, device='cuda:0')
episode: 273 training return: tensor(-295.7487, device='cuda:0')
episode: 274 training return: tensor(67.4006, device='cuda:0')
episode: 275 training return: tensor(62.5535, device='cuda:0')
epoch: 69 test_true_pfm: 13.077419991464563
episode: 276 training return: tensor(-502.4099, device='cuda:0')
episode: 277 training return: tensor(35.3156, device='cuda:0')
episode: 278 training return: tensor(46.0589, device='cuda:0')
episode: 279 training return: tensor(-1472.5663, device='cuda:0')
epoch: 70 test_true_pfm: 15.988761751842564
episode: 280 training return: tensor(-64.7011, device='cuda:0')
episode: 281 training return: tensor(-956.8686, device='cuda:0')
episode: 282 training return: tensor(26.5037, device='cuda:0')
episode: 283 training return: tensor(75.7339, device='cuda:0')
epoch: 71 test_true_pfm: 16.866733365937634
episode: 284 training return: tensor(-111.0412, device='cuda:0')
episode: 285 training return: tensor(111.7596, device='cuda:0')
episode: 286 training return: tensor(-139.2604, device='cuda:0')
episode: 287 training return: tensor(105.2815, device='cuda:0')
epoch: 72 test_true_pfm: 9.231560363252651
episode: 288 training return: tensor(-463.3470, device='cuda:0')
episode: 289 training return: tensor(-30.5804, device='cuda:0')
episode: 290 training return: tensor(-164.1590, device='cuda:0')
episode: 291 training return: tensor(-89.6241, device='cuda:0')
epoch: 73 test_true_pfm: 13.36487353393365
episode: 292 training return: tensor(-57.2535, device='cuda:0')
episode: 293 training return: tensor(34.6266, device='cuda:0')
episode: 294 training return: tensor(-58.3965, device='cuda:0')
episode: 295 training return: tensor(-43.6704, device='cuda:0')
epoch: 74 test_true_pfm: 16.31671435790063
episode: 296 training return: tensor(-548.4353, device='cuda:0')
episode: 297 training return: tensor(-123.4777, device='cuda:0')
episode: 298 training return: tensor(-269.6351, device='cuda:0')
episode: 299 training return: tensor(-397.6185, device='cuda:0')
epoch: 75 test_true_pfm: 14.587367241318569
episode: 300 training return: tensor(-561.6445, device='cuda:0')
episode: 301 training return: tensor(-103.0583, device='cuda:0')
episode: 302 training return: tensor(-61.5651, device='cuda:0')
episode: 303 training return: tensor(-26.6186, device='cuda:0')
epoch: 76 test_true_pfm: 13.801812271479823
episode: 304 training return: tensor(-115.9132, device='cuda:0')
episode: 305 training return: tensor(-845.7767, device='cuda:0')
episode: 306 training return: tensor(-283.6152, device='cuda:0')
episode: 307 training return: tensor(-248.4000, device='cuda:0')
epoch: 77 test_true_pfm: 17.54888453085895
episode: 308 training return: tensor(-1066.9116, device='cuda:0')
episode: 309 training return: tensor(48.6796, device='cuda:0')
episode: 310 training return: tensor(37.2371, device='cuda:0')
episode: 311 training return: tensor(-1181.0270, device='cuda:0')
epoch: 78 test_true_pfm: 16.450597815922457
episode: 312 training return: tensor(-433.5446, device='cuda:0')
episode: 313 training return: tensor(66.1948, device='cuda:0')
episode: 314 training return: tensor(109.5149, device='cuda:0')
episode: 315 training return: tensor(-91.8686, device='cuda:0')
epoch: 79 test_true_pfm: 11.972073988466676
episode: 316 training return: tensor(-391.3478, device='cuda:0')
episode: 317 training return: tensor(-1517.9542, device='cuda:0')
episode: 318 training return: tensor(67.5172, device='cuda:0')
episode: 319 training return: tensor(-258.8695, device='cuda:0')
epoch: 80 test_true_pfm: 9.433534706160527
episode: 320 training return: tensor(-414.4427, device='cuda:0')
episode: 321 training return: tensor(-673.2789, device='cuda:0')
episode: 322 training return: tensor(-114.9996, device='cuda:0')
episode: 323 training return: tensor(23.3347, device='cuda:0')
epoch: 81 test_true_pfm: 19.053535480780507
episode: 324 training return: tensor(96.8168, device='cuda:0')
episode: 325 training return: tensor(65.2057, device='cuda:0')
episode: 326 training return: tensor(67.1281, device='cuda:0')
episode: 327 training return: tensor(-216.6916, device='cuda:0')
epoch: 82 test_true_pfm: 14.79102399789205
episode: 328 training return: tensor(-40.5586, device='cuda:0')
episode: 329 training return: tensor(-24.1331, device='cuda:0')
episode: 330 training return: tensor(83.9766, device='cuda:0')
episode: 331 training return: tensor(-1347.0624, device='cuda:0')
epoch: 83 test_true_pfm: 7.6777077358368775
episode: 332 training return: tensor(-50.6723, device='cuda:0')
episode: 333 training return: tensor(-132.8087, device='cuda:0')
episode: 334 training return: tensor(-185.2583, device='cuda:0')
episode: 335 training return: tensor(-16.2438, device='cuda:0')
epoch: 84 test_true_pfm: 10.975684657194137
episode: 336 training return: tensor(45.4160, device='cuda:0')
episode: 337 training return: tensor(-27.6477, device='cuda:0')
episode: 338 training return: tensor(-238.9684, device='cuda:0')
episode: 339 training return: tensor(-323.1539, device='cuda:0')
epoch: 85 test_true_pfm: 16.492309418605362
episode: 340 training return: tensor(-123.0190, device='cuda:0')
episode: 341 training return: tensor(-377.2777, device='cuda:0')
episode: 342 training return: tensor(-153.9824, device='cuda:0')
episode: 343 training return: tensor(-126.7962, device='cuda:0')
epoch: 86 test_true_pfm: 12.551229548261645
episode: 344 training return: tensor(-6.1155, device='cuda:0')
episode: 345 training return: tensor(-542.9652, device='cuda:0')
episode: 346 training return: tensor(-707.5943, device='cuda:0')
episode: 347 training return: tensor(-167.3155, device='cuda:0')
epoch: 87 test_true_pfm: 9.076146149390967
episode: 348 training return: tensor(-249.5801, device='cuda:0')
episode: 349 training return: tensor(-961.0218, device='cuda:0')
episode: 350 training return: tensor(-123.3073, device='cuda:0')
episode: 351 training return: tensor(90.7068, device='cuda:0')
epoch: 88 test_true_pfm: 19.132985206757695
episode: 352 training return: tensor(83.6174, device='cuda:0')
episode: 353 training return: tensor(114.1865, device='cuda:0')
episode: 354 training return: tensor(-18.3571, device='cuda:0')
episode: 355 training return: tensor(-1971.8345, device='cuda:0')
epoch: 89 test_true_pfm: 10.696884157656813
episode: 356 training return: tensor(-95.8904, device='cuda:0')
episode: 357 training return: tensor(-234.3802, device='cuda:0')
episode: 358 training return: tensor(-519.0292, device='cuda:0')
episode: 359 training return: tensor(-248.3142, device='cuda:0')
epoch: 90 test_true_pfm: 17.560339083988854
episode: 360 training return: tensor(-40.4137, device='cuda:0')
episode: 361 training return: tensor(-320.5742, device='cuda:0')
episode: 362 training return: tensor(-1453.2194, device='cuda:0')
episode: 363 training return: tensor(-620.5519, device='cuda:0')
epoch: 91 test_true_pfm: 13.971255267843105
episode: 364 training return: tensor(47.6728, device='cuda:0')
episode: 365 training return: tensor(62.9679, device='cuda:0')
episode: 366 training return: tensor(24.5735, device='cuda:0')
episode: 367 training return: tensor(-326.1144, device='cuda:0')
epoch: 92 test_true_pfm: 19.5280424350984
episode: 368 training return: tensor(-182.0524, device='cuda:0')
episode: 369 training return: tensor(87.7221, device='cuda:0')
episode: 370 training return: tensor(-347.5234, device='cuda:0')
episode: 371 training return: tensor(64.5360, device='cuda:0')
epoch: 93 test_true_pfm: 13.410547317846952
episode: 372 training return: tensor(-56.9364, device='cuda:0')
episode: 373 training return: tensor(15.6479, device='cuda:0')
episode: 374 training return: tensor(-227.6575, device='cuda:0')
episode: 375 training return: tensor(-100.0093, device='cuda:0')
epoch: 94 test_true_pfm: 17.030720619643095
episode: 376 training return: tensor(48.9786, device='cuda:0')
episode: 377 training return: tensor(-48.9884, device='cuda:0')
episode: 378 training return: tensor(-124.2818, device='cuda:0')
episode: 379 training return: tensor(-399.3825, device='cuda:0')
epoch: 95 test_true_pfm: 13.904204152934389
episode: 380 training return: tensor(38.8519, device='cuda:0')
episode: 381 training return: tensor(-22.6811, device='cuda:0')
episode: 382 training return: tensor(30.4085, device='cuda:0')
episode: 383 training return: tensor(-434.0820, device='cuda:0')
epoch: 96 test_true_pfm: 20.04016086929717
episode: 384 training return: tensor(-38.4067, device='cuda:0')
episode: 385 training return: tensor(-59.6332, device='cuda:0')
episode: 386 training return: tensor(-31.4163, device='cuda:0')
episode: 387 training return: tensor(26.8827, device='cuda:0')
epoch: 97 test_true_pfm: 13.782622000335119
episode: 388 training return: tensor(-33.6607, device='cuda:0')
episode: 389 training return: tensor(-49.1021, device='cuda:0')
episode: 390 training return: tensor(-74.8224, device='cuda:0')
episode: 391 training return: tensor(75.6693, device='cuda:0')
epoch: 98 test_true_pfm: 13.365680061971773
episode: 392 training return: tensor(-232.5629, device='cuda:0')
episode: 393 training return: tensor(-997.1782, device='cuda:0')
episode: 394 training return: tensor(-75.7916, device='cuda:0')
episode: 395 training return: tensor(-510.5000, device='cuda:0')
epoch: 99 test_true_pfm: 15.495989960064824
episode: 396 training return: tensor(-158.7224, device='cuda:0')
episode: 397 training return: tensor(47.0004, device='cuda:0')
episode: 398 training return: tensor(-160.2784, device='cuda:0')
episode: 399 training return: tensor(39.4348, device='cuda:0')
epoch: 100 test_true_pfm: 10.425652499443844
episode: 400 training return: tensor(75.5489, device='cuda:0')
episode: 401 training return: tensor(36.9089, device='cuda:0')
episode: 402 training return: tensor(-387.0088, device='cuda:0')
episode: 403 training return: tensor(-265.6353, device='cuda:0')
epoch: 101 test_true_pfm: 11.689456667163372
episode: 404 training return: tensor(-394.2746, device='cuda:0')
episode: 405 training return: tensor(-405.1299, device='cuda:0')
episode: 406 training return: tensor(26.6901, device='cuda:0')
episode: 407 training return: tensor(-59.9720, device='cuda:0')
epoch: 102 test_true_pfm: 14.88389332361405
episode: 408 training return: tensor(-39.7546, device='cuda:0')
episode: 409 training return: tensor(-1287.9503, device='cuda:0')
episode: 410 training return: tensor(16.7845, device='cuda:0')
episode: 411 training return: tensor(-113.8833, device='cuda:0')
epoch: 103 test_true_pfm: 15.113437269873762
episode: 412 training return: tensor(-233.9878, device='cuda:0')
episode: 413 training return: tensor(113.8489, device='cuda:0')
episode: 414 training return: tensor(-33.1597, device='cuda:0')
episode: 415 training return: tensor(88.7602, device='cuda:0')
epoch: 104 test_true_pfm: 15.167787022991075
episode: 416 training return: tensor(44.2439, device='cuda:0')
episode: 417 training return: tensor(-168.3929, device='cuda:0')
episode: 418 training return: tensor(-1179.4207, device='cuda:0')
episode: 419 training return: tensor(-305.0801, device='cuda:0')
epoch: 105 test_true_pfm: 14.016488179216818
episode: 420 training return: tensor(-113.2547, device='cuda:0')
episode: 421 training return: tensor(-324.2130, device='cuda:0')
episode: 422 training return: tensor(56.6340, device='cuda:0')
episode: 423 training return: tensor(-1247.9453, device='cuda:0')
epoch: 106 test_true_pfm: 19.396813457172797
episode: 424 training return: tensor(-69.7641, device='cuda:0')
episode: 425 training return: tensor(46.1484, device='cuda:0')
episode: 426 training return: tensor(94.7237, device='cuda:0')
episode: 427 training return: tensor(-216.0347, device='cuda:0')
epoch: 107 test_true_pfm: 20.409498610035666
episode: 428 training return: tensor(-53.0541, device='cuda:0')
episode: 429 training return: tensor(-80.3580, device='cuda:0')
episode: 430 training return: tensor(-170.3166, device='cuda:0')
episode: 431 training return: tensor(-68.3429, device='cuda:0')
epoch: 108 test_true_pfm: 8.609430180751273
episode: 432 training return: tensor(-569.9514, device='cuda:0')
episode: 433 training return: tensor(-1061.0833, device='cuda:0')
episode: 434 training return: tensor(41.2955, device='cuda:0')
episode: 435 training return: tensor(-364.6551, device='cuda:0')
epoch: 109 test_true_pfm: 20.578263420756016
episode: 436 training return: tensor(90.4040, device='cuda:0')
episode: 437 training return: tensor(-910.7772, device='cuda:0')
episode: 438 training return: tensor(-266.0067, device='cuda:0')
episode: 439 training return: tensor(-190.2902, device='cuda:0')
epoch: 110 test_true_pfm: 9.107408482940505
episode: 440 training return: tensor(-16.1919, device='cuda:0')
episode: 441 training return: tensor(91.5170, device='cuda:0')
episode: 442 training return: tensor(49.0739, device='cuda:0')
episode: 443 training return: tensor(-242.4722, device='cuda:0')
epoch: 111 test_true_pfm: 18.748928925033685
episode: 444 training return: tensor(-25.1578, device='cuda:0')
episode: 445 training return: tensor(-134.6355, device='cuda:0')
episode: 446 training return: tensor(-1552.1603, device='cuda:0')
episode: 447 training return: tensor(-345.4099, device='cuda:0')
epoch: 112 test_true_pfm: 15.79463345134405
episode: 448 training return: tensor(-276.1143, device='cuda:0')
episode: 449 training return: tensor(-341.8409, device='cuda:0')
episode: 450 training return: tensor(0.3298, device='cuda:0')
episode: 451 training return: tensor(81.5702, device='cuda:0')
epoch: 113 test_true_pfm: 23.886782525318417
episode: 452 training return: tensor(55.7562, device='cuda:0')
episode: 453 training return: tensor(78.2740, device='cuda:0')
episode: 454 training return: tensor(-44.6209, device='cuda:0')
episode: 455 training return: tensor(-39.0178, device='cuda:0')
epoch: 114 test_true_pfm: 11.748339509998265
episode: 456 training return: tensor(-195.5746, device='cuda:0')
episode: 457 training return: tensor(-139.6551, device='cuda:0')
episode: 458 training return: tensor(-953.0283, device='cuda:0')
episode: 459 training return: tensor(50.4844, device='cuda:0')
epoch: 115 test_true_pfm: 16.36131268947788
episode: 460 training return: tensor(-192.3862, device='cuda:0')
episode: 461 training return: tensor(104.6588, device='cuda:0')
episode: 462 training return: tensor(-353.1623, device='cuda:0')
episode: 463 training return: tensor(-56.2235, device='cuda:0')
epoch: 116 test_true_pfm: 15.191295988134877
episode: 464 training return: tensor(-96.3515, device='cuda:0')
episode: 465 training return: tensor(-1034.9902, device='cuda:0')
episode: 466 training return: tensor(-528.4730, device='cuda:0')
episode: 467 training return: tensor(0.8764, device='cuda:0')
epoch: 117 test_true_pfm: 11.774744317469143
episode: 468 training return: tensor(-103.6180, device='cuda:0')
episode: 469 training return: tensor(-151.5126, device='cuda:0')
episode: 470 training return: tensor(-139.8501, device='cuda:0')
episode: 471 training return: tensor(-236.2577, device='cuda:0')
epoch: 118 test_true_pfm: 10.600849214754493
episode: 472 training return: tensor(-201.3072, device='cuda:0')
episode: 473 training return: tensor(-703.5342, device='cuda:0')
episode: 474 training return: tensor(-470.5231, device='cuda:0')
episode: 475 training return: tensor(-339.9595, device='cuda:0')
epoch: 119 test_true_pfm: 14.275760340271564
episode: 476 training return: tensor(-31.3237, device='cuda:0')
episode: 477 training return: tensor(-45.1973, device='cuda:0')
episode: 478 training return: tensor(-349.2722, device='cuda:0')
episode: 479 training return: tensor(62.9546, device='cuda:0')
epoch: 120 test_true_pfm: 15.969978740085446
episode: 480 training return: tensor(-97.9242, device='cuda:0')
episode: 481 training return: tensor(-1020.6833, device='cuda:0')
episode: 482 training return: tensor(-22.4812, device='cuda:0')
episode: 483 training return: tensor(-133.9797, device='cuda:0')
epoch: 121 test_true_pfm: 21.476307456929227
episode: 484 training return: tensor(-806.7663, device='cuda:0')
episode: 485 training return: tensor(-267.1090, device='cuda:0')
episode: 486 training return: tensor(0.3986, device='cuda:0')
episode: 487 training return: tensor(-359.5882, device='cuda:0')
epoch: 122 test_true_pfm: 9.293144474685073
episode: 488 training return: tensor(-43.7706, device='cuda:0')
episode: 489 training return: tensor(73.3615, device='cuda:0')
episode: 490 training return: tensor(-598.0121, device='cuda:0')
episode: 491 training return: tensor(-1232.0363, device='cuda:0')
epoch: 123 test_true_pfm: 10.470764016222851
episode: 492 training return: tensor(-68.5984, device='cuda:0')
episode: 493 training return: tensor(-77.3381, device='cuda:0')
episode: 494 training return: tensor(-281.5800, device='cuda:0')
episode: 495 training return: tensor(2.5858, device='cuda:0')
epoch: 124 test_true_pfm: 13.55339907692957
episode: 496 training return: tensor(13.1921, device='cuda:0')
episode: 497 training return: tensor(73.7667, device='cuda:0')
episode: 498 training return: tensor(-270.5023, device='cuda:0')
episode: 499 training return: tensor(-87.9984, device='cuda:0')
epoch: 125 test_true_pfm: 14.083904693034764
episode: 500 training return: tensor(22.6429, device='cuda:0')
episode: 501 training return: tensor(-512.3339, device='cuda:0')
episode: 502 training return: tensor(73.2145, device='cuda:0')
episode: 503 training return: tensor(-149.9064, device='cuda:0')
epoch: 126 test_true_pfm: 13.291494334135157
episode: 504 training return: tensor(26.1987, device='cuda:0')
episode: 505 training return: tensor(-237.9635, device='cuda:0')
episode: 506 training return: tensor(-386.5640, device='cuda:0')
episode: 507 training return: tensor(-131.9194, device='cuda:0')
epoch: 127 test_true_pfm: 13.284734647307378
episode: 508 training return: tensor(-175.2290, device='cuda:0')
episode: 509 training return: tensor(90.4178, device='cuda:0')
episode: 510 training return: tensor(-56.4725, device='cuda:0')
episode: 511 training return: tensor(67.9579, device='cuda:0')
epoch: 128 test_true_pfm: 8.159698381970378
episode: 512 training return: tensor(-743.2183, device='cuda:0')
episode: 513 training return: tensor(32.0301, device='cuda:0')
episode: 514 training return: tensor(-312.0423, device='cuda:0')
episode: 515 training return: tensor(-519.3276, device='cuda:0')
epoch: 129 test_true_pfm: 17.120048303767824
episode: 516 training return: tensor(-65.9076, device='cuda:0')
episode: 517 training return: tensor(-174.2393, device='cuda:0')
episode: 518 training return: tensor(-207.1447, device='cuda:0')
episode: 519 training return: tensor(-127.4864, device='cuda:0')
epoch: 130 test_true_pfm: 13.076258623394901
episode: 520 training return: tensor(56.9298, device='cuda:0')
episode: 521 training return: tensor(-0.3620, device='cuda:0')
episode: 522 training return: tensor(-343.4882, device='cuda:0')
episode: 523 training return: tensor(-252.7547, device='cuda:0')
epoch: 131 test_true_pfm: 16.78811785345373
episode: 524 training return: tensor(49.1123, device='cuda:0')
episode: 525 training return: tensor(-1034.4974, device='cuda:0')
episode: 526 training return: tensor(71.2898, device='cuda:0')
episode: 527 training return: tensor(-247.2982, device='cuda:0')
epoch: 132 test_true_pfm: 9.211813455887995
episode: 528 training return: tensor(-292.3196, device='cuda:0')
episode: 529 training return: tensor(-319.1767, device='cuda:0')
episode: 530 training return: tensor(-508.8447, device='cuda:0')
episode: 531 training return: tensor(-211.3079, device='cuda:0')
epoch: 133 test_true_pfm: 16.46116797151271
episode: 532 training return: tensor(-184.7850, device='cuda:0')
episode: 533 training return: tensor(42.1709, device='cuda:0')
episode: 534 training return: tensor(-3.4867, device='cuda:0')
episode: 535 training return: tensor(4.5776, device='cuda:0')
epoch: 134 test_true_pfm: 14.388987398082168
episode: 536 training return: tensor(-42.4712, device='cuda:0')
episode: 537 training return: tensor(-365.4775, device='cuda:0')
episode: 538 training return: tensor(10.1211, device='cuda:0')
episode: 539 training return: tensor(-1.0220, device='cuda:0')
epoch: 135 test_true_pfm: 16.959080662126592
episode: 540 training return: tensor(104.1810, device='cuda:0')
episode: 541 training return: tensor(-268.4908, device='cuda:0')
episode: 542 training return: tensor(51.3885, device='cuda:0')
episode: 543 training return: tensor(-283.1381, device='cuda:0')
epoch: 136 test_true_pfm: 10.242939712668768
episode: 544 training return: tensor(74.9914, device='cuda:0')
episode: 545 training return: tensor(92.3496, device='cuda:0')
episode: 546 training return: tensor(-204.2774, device='cuda:0')
episode: 547 training return: tensor(-100.4572, device='cuda:0')
epoch: 137 test_true_pfm: 4.522408465814133
episode: 548 training return: tensor(-106.6216, device='cuda:0')
episode: 549 training return: tensor(-36.1865, device='cuda:0')
episode: 550 training return: tensor(3.2871, device='cuda:0')
episode: 551 training return: tensor(68.8602, device='cuda:0')
epoch: 138 test_true_pfm: 14.228346199558297
episode: 552 training return: tensor(-38.5850, device='cuda:0')
episode: 553 training return: tensor(-363.7326, device='cuda:0')
episode: 554 training return: tensor(-601.5202, device='cuda:0')
episode: 555 training return: tensor(-440.2625, device='cuda:0')
epoch: 139 test_true_pfm: 22.473385550738307
episode: 556 training return: tensor(-246.8123, device='cuda:0')
episode: 557 training return: tensor(60.0894, device='cuda:0')
episode: 558 training return: tensor(-142.6392, device='cuda:0')
episode: 559 training return: tensor(-627.2349, device='cuda:0')
epoch: 140 test_true_pfm: 14.433125301507175
episode: 560 training return: tensor(-24.0839, device='cuda:0')
episode: 561 training return: tensor(-57.4429, device='cuda:0')
episode: 562 training return: tensor(-290.2831, device='cuda:0')
episode: 563 training return: tensor(120.7991, device='cuda:0')
epoch: 141 test_true_pfm: 15.76839610412554
episode: 564 training return: tensor(63.2487, device='cuda:0')
episode: 565 training return: tensor(-14.7003, device='cuda:0')
episode: 566 training return: tensor(-31.2048, device='cuda:0')
episode: 567 training return: tensor(-1017.7775, device='cuda:0')
epoch: 142 test_true_pfm: 15.64177334237662
episode: 568 training return: tensor(-119.9295, device='cuda:0')
episode: 569 training return: tensor(-63.7278, device='cuda:0')
episode: 570 training return: tensor(-646.0123, device='cuda:0')
episode: 571 training return: tensor(135.7857, device='cuda:0')
epoch: 143 test_true_pfm: 10.720908466924902
episode: 572 training return: tensor(-509.5803, device='cuda:0')
episode: 573 training return: tensor(26.7634, device='cuda:0')
episode: 574 training return: tensor(-185.4453, device='cuda:0')
episode: 575 training return: tensor(-651.0303, device='cuda:0')
epoch: 144 test_true_pfm: 10.282872963518919
episode: 576 training return: tensor(-250.4453, device='cuda:0')
episode: 577 training return: tensor(51.6546, device='cuda:0')
episode: 578 training return: tensor(-780.2104, device='cuda:0')
episode: 579 training return: tensor(-511.4625, device='cuda:0')
epoch: 145 test_true_pfm: 21.479586271790353
episode: 580 training return: tensor(-609.0688, device='cuda:0')
episode: 581 training return: tensor(-390.8755, device='cuda:0')
episode: 582 training return: tensor(-57.7445, device='cuda:0')
episode: 583 training return: tensor(-333.9137, device='cuda:0')
epoch: 146 test_true_pfm: 14.373612776134053
episode: 584 training return: tensor(-347.9966, device='cuda:0')
episode: 585 training return: tensor(-6.6474, device='cuda:0')
episode: 586 training return: tensor(-238.2610, device='cuda:0')
episode: 587 training return: tensor(84.1809, device='cuda:0')
epoch: 147 test_true_pfm: 12.841117901644177
episode: 588 training return: tensor(73.2289, device='cuda:0')
episode: 589 training return: tensor(44.7463, device='cuda:0')
episode: 590 training return: tensor(-397.5120, device='cuda:0')
episode: 591 training return: tensor(-470.4562, device='cuda:0')
epoch: 148 test_true_pfm: 17.698729344810953
episode: 592 training return: tensor(-1089.2651, device='cuda:0')
episode: 593 training return: tensor(-121.5251, device='cuda:0')
episode: 594 training return: tensor(57.8623, device='cuda:0')
episode: 595 training return: tensor(-236.2748, device='cuda:0')
epoch: 149 test_true_pfm: 13.595445899903279
episode: 596 training return: tensor(-205.5287, device='cuda:0')
episode: 597 training return: tensor(-395.1667, device='cuda:0')
episode: 598 training return: tensor(-205.0859, device='cuda:0')
episode: 599 training return: tensor(-2.6240, device='cuda:0')
epoch: 150 test_true_pfm: 18.260671685526653
