['--alg', 'ddpg', '--env', 'Swimmer-v3', '--learn', 'brac', '--traj', 'expert', '--seed', '0', '--data', '100000']
129.53934573490628
episode: 0 training return: tensor(-7.2145e+17, device='cuda:0')
episode: 1 training return: tensor(-5.7248e+16, device='cuda:0')
episode: 2 training return: tensor(-9.9137e+14, device='cuda:0')
episode: 3 training return: tensor(-3.8180e+14, device='cuda:0')
epoch: 1 test_true_pfm: -24.70106197446013
episode: 4 training return: tensor(-5.9680e+15, device='cuda:0')
episode: 5 training return: tensor(-1.7282e+15, device='cuda:0')
episode: 6 training return: tensor(-8.0848e+14, device='cuda:0')
episode: 7 training return: tensor(-1.3002e+16, device='cuda:0')
epoch: 2 test_true_pfm: -22.202377889572052
episode: 8 training return: tensor(-3.8031e+16, device='cuda:0')
episode: 9 training return: tensor(-1.9469e+16, device='cuda:0')
episode: 10 training return: tensor(-2.1201e+10, device='cuda:0')
episode: 11 training return: tensor(-2.1109e+10, device='cuda:0')
epoch: 3 test_true_pfm: -22.362495547642617
episode: 12 training return: tensor(-2.5422e+10, device='cuda:0')
episode: 13 training return: tensor(-2.5116e+10, device='cuda:0')
episode: 14 training return: tensor(-2.2486e+10, device='cuda:0')
episode: 15 training return: tensor(-1.9144e+10, device='cuda:0')
epoch: 4 test_true_pfm: -22.680867850393913
episode: 16 training return: tensor(-2.3767e+10, device='cuda:0')
episode: 17 training return: tensor(-1.7821e+10, device='cuda:0')
episode: 18 training return: tensor(-1.6871e+10, device='cuda:0')
episode: 19 training return: tensor(-1.7983e+10, device='cuda:0')
epoch: 5 test_true_pfm: -22.82971176169064
episode: 20 training return: tensor(-1.8432e+10, device='cuda:0')
episode: 21 training return: tensor(-1.7659e+10, device='cuda:0')
episode: 22 training return: tensor(-1.5099e+10, device='cuda:0')
episode: 23 training return: tensor(-1.2348e+10, device='cuda:0')
epoch: 6 test_true_pfm: -21.85949053841354
episode: 24 training return: tensor(-1.0475e+10, device='cuda:0')
episode: 25 training return: tensor(-1.3897e+11, device='cuda:0')
episode: 26 training return: tensor(-4.5815e+12, device='cuda:0')
episode: 27 training return: tensor(-8.6642e+09, device='cuda:0')
epoch: 7 test_true_pfm: -21.967551694493817
episode: 28 training return: tensor(-8.1229e+09, device='cuda:0')
episode: 29 training return: tensor(-1.0107e+10, device='cuda:0')
episode: 30 training return: tensor(-6.3728e+09, device='cuda:0')
episode: 31 training return: tensor(-8.9777e+09, device='cuda:0')
epoch: 8 test_true_pfm: -21.021445393830493
episode: 32 training return: tensor(-7.3836e+09, device='cuda:0')
episode: 33 training return: tensor(-8.0024e+09, device='cuda:0')
episode: 34 training return: tensor(-5.1881e+09, device='cuda:0')
episode: 35 training return: tensor(-5.8047e+09, device='cuda:0')
epoch: 9 test_true_pfm: -21.43018665253781
episode: 36 training return: tensor(-5.8014e+09, device='cuda:0')
episode: 37 training return: tensor(-4.3389e+09, device='cuda:0')
episode: 38 training return: tensor(-2.8666e+14, device='cuda:0')
episode: 39 training return: tensor(-4.4468e+09, device='cuda:0')
epoch: 10 test_true_pfm: -20.842209689126
episode: 40 training return: tensor(-3.4254e+09, device='cuda:0')
episode: 41 training return: tensor(-3.2716e+09, device='cuda:0')
episode: 42 training return: tensor(-3.6896e+09, device='cuda:0')
episode: 43 training return: tensor(-3.3221e+09, device='cuda:0')
epoch: 11 test_true_pfm: -21.39872121316776
episode: 44 training return: tensor(-4.2409e+09, device='cuda:0')
episode: 45 training return: tensor(-2.3357e+09, device='cuda:0')
episode: 46 training return: tensor(-3.3800e+09, device='cuda:0')
episode: 47 training return: tensor(-3.4747e+09, device='cuda:0')
epoch: 12 test_true_pfm: -21.164098355148873
episode: 48 training return: tensor(-3.6809e+09, device='cuda:0')
episode: 49 training return: tensor(-4.0753e+09, device='cuda:0')
episode: 50 training return: tensor(-4.1087e+09, device='cuda:0')
episode: 51 training return: tensor(-4.0153e+09, device='cuda:0')
epoch: 13 test_true_pfm: -21.652306906239346
episode: 52 training return: tensor(-2.9703e+09, device='cuda:0')
episode: 53 training return: tensor(-3.0196e+09, device='cuda:0')
episode: 54 training return: tensor(-2.7381e+09, device='cuda:0')
episode: 55 training return: tensor(-2.9749e+09, device='cuda:0')
epoch: 14 test_true_pfm: -21.05265599204477
episode: 56 training return: tensor(-3.7478e+09, device='cuda:0')
episode: 57 training return: tensor(-2.8532e+09, device='cuda:0')
episode: 58 training return: tensor(-3.6094e+09, device='cuda:0')
episode: 59 training return: tensor(-3.0582e+09, device='cuda:0')
epoch: 15 test_true_pfm: -21.07105209852694
episode: 60 training return: tensor(-3.8955e+09, device='cuda:0')
episode: 61 training return: tensor(-3.4805e+09, device='cuda:0')
episode: 62 training return: tensor(-4.1542e+09, device='cuda:0')
episode: 63 training return: tensor(-3.3302e+09, device='cuda:0')
epoch: 16 test_true_pfm: -21.843870378249605
episode: 64 training return: tensor(-2.7844e+09, device='cuda:0')
episode: 65 training return: tensor(-2.9274e+09, device='cuda:0')
episode: 66 training return: tensor(-3.9015e+09, device='cuda:0')
episode: 67 training return: tensor(-3.0885e+09, device='cuda:0')
epoch: 17 test_true_pfm: -21.15395240262534
episode: 68 training return: tensor(-3.3767e+09, device='cuda:0')
episode: 69 training return: tensor(-2.5451e+09, device='cuda:0')
episode: 70 training return: tensor(-2.4530e+09, device='cuda:0')
episode: 71 training return: tensor(-3.0383e+09, device='cuda:0')
epoch: 18 test_true_pfm: -21.21446116471211
episode: 72 training return: tensor(-2.6145e+09, device='cuda:0')
episode: 73 training return: tensor(-2.5214e+09, device='cuda:0')
episode: 74 training return: tensor(-2.8156e+09, device='cuda:0')
episode: 75 training return: tensor(-2.7791e+09, device='cuda:0')
epoch: 19 test_true_pfm: -21.283925618032633
episode: 76 training return: tensor(-2.2515e+09, device='cuda:0')
episode: 77 training return: tensor(-3.3431e+09, device='cuda:0')
episode: 78 training return: tensor(-2.7019e+09, device='cuda:0')
episode: 79 training return: tensor(-3.1066e+09, device='cuda:0')
epoch: 20 test_true_pfm: -21.06350524669258
episode: 80 training return: tensor(-2.1844e+09, device='cuda:0')
episode: 81 training return: tensor(-3.6435e+09, device='cuda:0')
episode: 82 training return: tensor(-2.8781e+09, device='cuda:0')
episode: 83 training return: tensor(-3.4844e+09, device='cuda:0')
epoch: 21 test_true_pfm: -21.26367157368393
episode: 84 training return: tensor(-3.3576e+09, device='cuda:0')
episode: 85 training return: tensor(-3.1755e+09, device='cuda:0')
episode: 86 training return: tensor(-3.4838e+09, device='cuda:0')
episode: 87 training return: tensor(-3.4972e+09, device='cuda:0')
epoch: 22 test_true_pfm: -21.495107476019804
episode: 88 training return: tensor(-2.4625e+09, device='cuda:0')
episode: 89 training return: tensor(-3.0260e+09, device='cuda:0')
episode: 90 training return: tensor(-2.0249e+09, device='cuda:0')
episode: 91 training return: tensor(-3.1673e+09, device='cuda:0')
epoch: 23 test_true_pfm: -21.47215735372395
episode: 92 training return: tensor(-3.3582e+09, device='cuda:0')
episode: 93 training return: tensor(-2.5409e+09, device='cuda:0')
episode: 94 training return: tensor(-2.2548e+09, device='cuda:0')
episode: 95 training return: tensor(-3.0064e+09, device='cuda:0')
epoch: 24 test_true_pfm: -21.790116241256506
episode: 96 training return: tensor(-2.8574e+09, device='cuda:0')
episode: 97 training return: tensor(-2.5478e+09, device='cuda:0')
episode: 98 training return: tensor(-2.2319e+09, device='cuda:0')
episode: 99 training return: tensor(-2.2719e+09, device='cuda:0')
epoch: 25 test_true_pfm: -21.218755795655298
episode: 100 training return: tensor(-2.4028e+09, device='cuda:0')
episode: 101 training return: tensor(-2.7751e+09, device='cuda:0')
episode: 102 training return: tensor(-2.2573e+09, device='cuda:0')
episode: 103 training return: tensor(-2.4108e+09, device='cuda:0')
epoch: 26 test_true_pfm: -21.19770663142892
episode: 104 training return: tensor(-2.0971e+09, device='cuda:0')
episode: 105 training return: tensor(-2.1957e+09, device='cuda:0')
episode: 106 training return: tensor(-2.6381e+09, device='cuda:0')
episode: 107 training return: tensor(-2.1775e+09, device='cuda:0')
epoch: 27 test_true_pfm: -21.468134779240547
episode: 108 training return: tensor(-1.8639e+09, device='cuda:0')
episode: 109 training return: tensor(-2.2664e+09, device='cuda:0')
episode: 110 training return: tensor(-1.7669e+09, device='cuda:0')
episode: 111 training return: tensor(-2.3460e+09, device='cuda:0')
epoch: 28 test_true_pfm: -20.69931054006853
episode: 112 training return: tensor(-2.0328e+09, device='cuda:0')
episode: 113 training return: tensor(-2.0175e+09, device='cuda:0')
episode: 114 training return: tensor(-2.6673e+09, device='cuda:0')
episode: 115 training return: tensor(-2.2364e+09, device='cuda:0')
epoch: 29 test_true_pfm: -21.786195384647954
episode: 116 training return: tensor(-1.7106e+09, device='cuda:0')
episode: 117 training return: tensor(-2.0774e+09, device='cuda:0')
episode: 118 training return: tensor(-1.7370e+09, device='cuda:0')
episode: 119 training return: tensor(-2.3569e+09, device='cuda:0')
epoch: 30 test_true_pfm: -22.249664419453172
episode: 120 training return: tensor(-2.0804e+09, device='cuda:0')
episode: 121 training return: tensor(-1.7911e+09, device='cuda:0')
episode: 122 training return: tensor(-2.1475e+09, device='cuda:0')
episode: 123 training return: tensor(-1.9888e+09, device='cuda:0')
epoch: 31 test_true_pfm: -21.31754082596376
episode: 124 training return: tensor(-1.8655e+09, device='cuda:0')
episode: 125 training return: tensor(-2.0020e+09, device='cuda:0')
episode: 126 training return: tensor(-2.0530e+09, device='cuda:0')
episode: 127 training return: tensor(-2.0070e+09, device='cuda:0')
epoch: 32 test_true_pfm: -21.420295107644613
episode: 128 training return: tensor(-2.8070e+09, device='cuda:0')
episode: 129 training return: tensor(-1.5565e+09, device='cuda:0')
episode: 130 training return: tensor(-2.7496e+09, device='cuda:0')
episode: 131 training return: tensor(-2.2999e+09, device='cuda:0')
epoch: 33 test_true_pfm: -21.931519822719864
episode: 132 training return: tensor(-2.0823e+09, device='cuda:0')
episode: 133 training return: tensor(-2.2460e+09, device='cuda:0')
episode: 134 training return: tensor(-2.2048e+09, device='cuda:0')
episode: 135 training return: tensor(-2.1956e+09, device='cuda:0')
epoch: 34 test_true_pfm: -20.84463167448147
episode: 136 training return: tensor(-1.7171e+09, device='cuda:0')
episode: 137 training return: tensor(-1.8315e+09, device='cuda:0')
episode: 138 training return: tensor(-1.7273e+09, device='cuda:0')
episode: 139 training return: tensor(-1.8923e+09, device='cuda:0')
epoch: 35 test_true_pfm: -22.03832471195377
episode: 140 training return: tensor(-1.8515e+09, device='cuda:0')
episode: 141 training return: tensor(-1.9364e+09, device='cuda:0')
episode: 142 training return: tensor(-1.9711e+09, device='cuda:0')
episode: 143 training return: tensor(-1.8690e+09, device='cuda:0')
epoch: 36 test_true_pfm: -21.491072372660298
episode: 144 training return: tensor(-1.6798e+09, device='cuda:0')
episode: 145 training return: tensor(-2.1359e+09, device='cuda:0')
episode: 146 training return: tensor(-2.4478e+09, device='cuda:0')
episode: 147 training return: tensor(-2.0709e+09, device='cuda:0')
epoch: 37 test_true_pfm: -21.943717977580697
episode: 148 training return: tensor(-2.0245e+09, device='cuda:0')
episode: 149 training return: tensor(-1.8598e+09, device='cuda:0')
episode: 150 training return: tensor(-1.5255e+09, device='cuda:0')
episode: 151 training return: tensor(-1.8728e+09, device='cuda:0')
epoch: 38 test_true_pfm: -22.132398563340924
episode: 152 training return: tensor(-1.4760e+09, device='cuda:0')
episode: 153 training return: tensor(-2.4150e+09, device='cuda:0')
episode: 154 training return: tensor(-2.5949e+09, device='cuda:0')
episode: 155 training return: tensor(-1.8169e+09, device='cuda:0')
epoch: 39 test_true_pfm: -21.730588393270228
episode: 156 training return: tensor(-2.2777e+09, device='cuda:0')
episode: 157 training return: tensor(-1.9504e+09, device='cuda:0')
episode: 158 training return: tensor(-1.9001e+09, device='cuda:0')
episode: 159 training return: tensor(-1.9451e+09, device='cuda:0')
epoch: 40 test_true_pfm: -21.460404703432843
episode: 160 training return: tensor(-1.6516e+09, device='cuda:0')
episode: 161 training return: tensor(-1.6602e+09, device='cuda:0')
episode: 162 training return: tensor(-2.2188e+09, device='cuda:0')
episode: 163 training return: tensor(-1.7287e+09, device='cuda:0')
epoch: 41 test_true_pfm: -22.21360681870506
episode: 164 training return: tensor(-1.6837e+09, device='cuda:0')
episode: 165 training return: tensor(-2.2556e+09, device='cuda:0')
episode: 166 training return: tensor(-1.5024e+09, device='cuda:0')
episode: 167 training return: tensor(-1.9401e+09, device='cuda:0')
epoch: 42 test_true_pfm: -21.611506243505378
episode: 168 training return: tensor(-1.8095e+09, device='cuda:0')
episode: 169 training return: tensor(-1.4924e+09, device='cuda:0')
episode: 170 training return: tensor(-1.6897e+09, device='cuda:0')
episode: 171 training return: tensor(-2.1403e+09, device='cuda:0')
epoch: 43 test_true_pfm: -22.45762308940845
episode: 172 training return: tensor(-1.7949e+09, device='cuda:0')
episode: 173 training return: tensor(-1.6465e+09, device='cuda:0')
episode: 174 training return: tensor(-1.7128e+09, device='cuda:0')
episode: 175 training return: tensor(-2.0263e+09, device='cuda:0')
epoch: 44 test_true_pfm: -22.39528554278653
episode: 176 training return: tensor(-1.9707e+09, device='cuda:0')
episode: 177 training return: tensor(-1.9340e+09, device='cuda:0')
episode: 178 training return: tensor(-1.5699e+09, device='cuda:0')
episode: 179 training return: tensor(-1.9545e+09, device='cuda:0')
epoch: 45 test_true_pfm: -21.775102927988023
episode: 180 training return: tensor(-1.5485e+09, device='cuda:0')
episode: 181 training return: tensor(-1.9504e+09, device='cuda:0')
episode: 182 training return: tensor(-2.0795e+09, device='cuda:0')
episode: 183 training return: tensor(-1.8235e+09, device='cuda:0')
epoch: 46 test_true_pfm: -21.33617461639937
episode: 184 training return: tensor(-1.9010e+09, device='cuda:0')
episode: 185 training return: tensor(-2.4212e+09, device='cuda:0')
episode: 186 training return: tensor(-2.3777e+09, device='cuda:0')
episode: 187 training return: tensor(-2.4314e+09, device='cuda:0')
epoch: 47 test_true_pfm: -21.685724114964636
episode: 188 training return: tensor(-2.4480e+09, device='cuda:0')
episode: 189 training return: tensor(-1.5650e+09, device='cuda:0')
episode: 190 training return: tensor(-1.7637e+09, device='cuda:0')
episode: 191 training return: tensor(-1.4632e+09, device='cuda:0')
epoch: 48 test_true_pfm: -22.024583346014566
episode: 192 training return: tensor(-1.9335e+09, device='cuda:0')
episode: 193 training return: tensor(-1.7298e+09, device='cuda:0')
episode: 194 training return: tensor(-1.6563e+09, device='cuda:0')
episode: 195 training return: tensor(-1.6682e+09, device='cuda:0')
epoch: 49 test_true_pfm: -21.699712452135635
episode: 196 training return: tensor(-1.7997e+09, device='cuda:0')
episode: 197 training return: tensor(-1.6695e+09, device='cuda:0')
episode: 198 training return: tensor(-1.4648e+09, device='cuda:0')
episode: 199 training return: tensor(-1.9246e+09, device='cuda:0')
epoch: 50 test_true_pfm: -22.338731963169195
episode: 200 training return: tensor(-2.0058e+09, device='cuda:0')
episode: 201 training return: tensor(-1.8265e+09, device='cuda:0')
episode: 202 training return: tensor(-1.7493e+09, device='cuda:0')
episode: 203 training return: tensor(-2.0606e+09, device='cuda:0')
epoch: 51 test_true_pfm: -21.799150826514328
episode: 204 training return: tensor(-2.3839e+09, device='cuda:0')
episode: 205 training return: tensor(-2.0033e+09, device='cuda:0')
episode: 206 training return: tensor(-1.7758e+09, device='cuda:0')
episode: 207 training return: tensor(-1.5485e+09, device='cuda:0')
epoch: 52 test_true_pfm: -22.257916561395422
episode: 208 training return: tensor(-1.5042e+09, device='cuda:0')
episode: 209 training return: tensor(-1.3967e+09, device='cuda:0')
episode: 210 training return: tensor(-1.8067e+09, device='cuda:0')
episode: 211 training return: tensor(-1.5717e+09, device='cuda:0')
epoch: 53 test_true_pfm: -22.58844078782909
episode: 212 training return: tensor(-1.6299e+09, device='cuda:0')
episode: 213 training return: tensor(-1.7711e+09, device='cuda:0')
episode: 214 training return: tensor(-1.9994e+09, device='cuda:0')
episode: 215 training return: tensor(-1.5139e+09, device='cuda:0')
epoch: 54 test_true_pfm: -22.636540648000242
episode: 216 training return: tensor(-1.4561e+09, device='cuda:0')
episode: 217 training return: tensor(-1.8887e+09, device='cuda:0')
episode: 218 training return: tensor(-2.1927e+09, device='cuda:0')
episode: 219 training return: tensor(-1.7963e+09, device='cuda:0')
epoch: 55 test_true_pfm: -22.199436720333477
episode: 220 training return: tensor(-2.3225e+09, device='cuda:0')
episode: 221 training return: tensor(-1.9283e+09, device='cuda:0')
episode: 222 training return: tensor(-1.6735e+09, device='cuda:0')
episode: 223 training return: tensor(-2.0300e+09, device='cuda:0')
epoch: 56 test_true_pfm: -21.414569710760507
episode: 224 training return: tensor(-2.2883e+09, device='cuda:0')
episode: 225 training return: tensor(-2.0078e+09, device='cuda:0')
episode: 226 training return: tensor(-2.4832e+09, device='cuda:0')
episode: 227 training return: tensor(-1.9428e+09, device='cuda:0')
epoch: 57 test_true_pfm: -22.58514002151339
episode: 228 training return: tensor(-1.9937e+09, device='cuda:0')
episode: 229 training return: tensor(-2.4188e+09, device='cuda:0')
episode: 230 training return: tensor(-1.8644e+09, device='cuda:0')
episode: 231 training return: tensor(-2.4485e+09, device='cuda:0')
epoch: 58 test_true_pfm: -21.066714486712147
episode: 232 training return: tensor(-2.1291e+09, device='cuda:0')
episode: 233 training return: tensor(-1.8183e+09, device='cuda:0')
episode: 234 training return: tensor(-2.2849e+09, device='cuda:0')
episode: 235 training return: tensor(-1.8851e+09, device='cuda:0')
epoch: 59 test_true_pfm: -22.492765448217092
episode: 236 training return: tensor(-2.5672e+09, device='cuda:0')
episode: 237 training return: tensor(-2.2109e+09, device='cuda:0')
episode: 238 training return: tensor(-2.4201e+09, device='cuda:0')
episode: 239 training return: tensor(-2.4650e+09, device='cuda:0')
epoch: 60 test_true_pfm: -22.380748401655005
episode: 240 training return: tensor(-2.3487e+09, device='cuda:0')
episode: 241 training return: tensor(-2.0213e+09, device='cuda:0')
episode: 242 training return: tensor(-2.3107e+09, device='cuda:0')
episode: 243 training return: tensor(-1.9953e+09, device='cuda:0')
epoch: 61 test_true_pfm: -22.00217853520072
episode: 244 training return: tensor(-2.4933e+09, device='cuda:0')
episode: 245 training return: tensor(-2.1229e+09, device='cuda:0')
episode: 246 training return: tensor(-2.1829e+09, device='cuda:0')
episode: 247 training return: tensor(-2.1748e+09, device='cuda:0')
epoch: 62 test_true_pfm: -20.895644231284017
episode: 248 training return: tensor(-2.5775e+09, device='cuda:0')
episode: 249 training return: tensor(-2.1623e+09, device='cuda:0')
episode: 250 training return: tensor(-2.2423e+09, device='cuda:0')
episode: 251 training return: tensor(-2.2837e+09, device='cuda:0')
epoch: 63 test_true_pfm: -21.56210488964798
episode: 252 training return: tensor(-2.9003e+09, device='cuda:0')
episode: 253 training return: tensor(-2.5027e+09, device='cuda:0')
episode: 254 training return: tensor(-2.0304e+09, device='cuda:0')
episode: 255 training return: tensor(-1.9781e+09, device='cuda:0')
epoch: 64 test_true_pfm: -22.212031931780466
episode: 256 training return: tensor(-2.9394e+09, device='cuda:0')
episode: 257 training return: tensor(-2.4149e+09, device='cuda:0')
episode: 258 training return: tensor(-2.7746e+09, device='cuda:0')
episode: 259 training return: tensor(-3.0266e+09, device='cuda:0')
epoch: 65 test_true_pfm: -21.878940312861154
episode: 260 training return: tensor(-2.5702e+09, device='cuda:0')
episode: 261 training return: tensor(-2.0189e+09, device='cuda:0')
episode: 262 training return: tensor(-2.0595e+09, device='cuda:0')
episode: 263 training return: tensor(-2.4080e+09, device='cuda:0')
epoch: 66 test_true_pfm: -22.912550408331537
episode: 264 training return: tensor(-2.4175e+09, device='cuda:0')
episode: 265 training return: tensor(-2.1282e+09, device='cuda:0')
episode: 266 training return: tensor(-2.4590e+09, device='cuda:0')
episode: 267 training return: tensor(-1.7003e+09, device='cuda:0')
epoch: 67 test_true_pfm: -21.977511441889753
episode: 268 training return: tensor(-1.7880e+09, device='cuda:0')
episode: 269 training return: tensor(-2.2315e+09, device='cuda:0')
episode: 270 training return: tensor(-2.3698e+09, device='cuda:0')
episode: 271 training return: tensor(-3.6229e+09, device='cuda:0')
epoch: 68 test_true_pfm: -22.639209961336135
episode: 272 training return: tensor(-2.4280e+09, device='cuda:0')
episode: 273 training return: tensor(-2.5606e+09, device='cuda:0')
episode: 274 training return: tensor(-2.2218e+09, device='cuda:0')
episode: 275 training return: tensor(-1.7025e+09, device='cuda:0')
epoch: 69 test_true_pfm: -21.8995552045403
episode: 276 training return: tensor(-2.1071e+09, device='cuda:0')
episode: 277 training return: tensor(-2.4516e+09, device='cuda:0')
episode: 278 training return: tensor(-2.1486e+09, device='cuda:0')
episode: 279 training return: tensor(-2.6298e+09, device='cuda:0')
epoch: 70 test_true_pfm: -21.39062011145786
episode: 280 training return: tensor(-2.4053e+09, device='cuda:0')
episode: 281 training return: tensor(-2.1829e+09, device='cuda:0')
episode: 282 training return: tensor(-2.0003e+09, device='cuda:0')
episode: 283 training return: tensor(-2.5735e+09, device='cuda:0')
epoch: 71 test_true_pfm: -22.292936306717287
episode: 284 training return: tensor(-2.2742e+09, device='cuda:0')
episode: 285 training return: tensor(-2.1200e+09, device='cuda:0')
episode: 286 training return: tensor(-2.4406e+09, device='cuda:0')
episode: 287 training return: tensor(-2.1363e+09, device='cuda:0')
epoch: 72 test_true_pfm: -22.722823398892945
episode: 288 training return: tensor(-2.8261e+09, device='cuda:0')
episode: 289 training return: tensor(-2.4004e+09, device='cuda:0')
episode: 290 training return: tensor(-2.6016e+09, device='cuda:0')
episode: 291 training return: tensor(-1.9217e+09, device='cuda:0')
epoch: 73 test_true_pfm: -22.481290753483457
episode: 292 training return: tensor(-3.1233e+09, device='cuda:0')
episode: 293 training return: tensor(-2.4257e+09, device='cuda:0')
episode: 294 training return: tensor(-2.1690e+09, device='cuda:0')
episode: 295 training return: tensor(-1.9532e+09, device='cuda:0')
epoch: 74 test_true_pfm: -22.190655496438943
episode: 296 training return: tensor(-2.2887e+09, device='cuda:0')
episode: 297 training return: tensor(-2.2272e+09, device='cuda:0')
episode: 298 training return: tensor(-2.5882e+09, device='cuda:0')
episode: 299 training return: tensor(-2.2091e+09, device='cuda:0')
epoch: 75 test_true_pfm: -21.930295425771725
episode: 300 training return: tensor(-2.5002e+09, device='cuda:0')
episode: 301 training return: tensor(-2.5674e+09, device='cuda:0')
episode: 302 training return: tensor(-2.2580e+09, device='cuda:0')
episode: 303 training return: tensor(-2.6783e+09, device='cuda:0')
epoch: 76 test_true_pfm: -21.030305663030724
episode: 304 training return: tensor(-2.6467e+09, device='cuda:0')
episode: 305 training return: tensor(-2.0387e+09, device='cuda:0')
episode: 306 training return: tensor(-2.4304e+09, device='cuda:0')
episode: 307 training return: tensor(-2.6625e+09, device='cuda:0')
epoch: 77 test_true_pfm: -21.161564348288376
episode: 308 training return: tensor(-3.2600e+09, device='cuda:0')
episode: 309 training return: tensor(-2.4856e+09, device='cuda:0')
episode: 310 training return: tensor(-2.1288e+09, device='cuda:0')
episode: 311 training return: tensor(-2.7859e+09, device='cuda:0')
epoch: 78 test_true_pfm: -21.64375731945594
episode: 312 training return: tensor(-3.0896e+09, device='cuda:0')
episode: 313 training return: tensor(-2.9878e+09, device='cuda:0')
episode: 314 training return: tensor(-2.8886e+09, device='cuda:0')
episode: 315 training return: tensor(-2.6976e+09, device='cuda:0')
epoch: 79 test_true_pfm: -21.91869581062094
episode: 316 training return: tensor(-3.3338e+09, device='cuda:0')
episode: 317 training return: tensor(-2.6876e+09, device='cuda:0')
episode: 318 training return: tensor(-2.7087e+09, device='cuda:0')
episode: 319 training return: tensor(-2.9381e+09, device='cuda:0')
epoch: 80 test_true_pfm: -21.86585672642332
episode: 320 training return: tensor(-3.0608e+09, device='cuda:0')
episode: 321 training return: tensor(-2.7395e+09, device='cuda:0')
episode: 322 training return: tensor(-2.4875e+09, device='cuda:0')
episode: 323 training return: tensor(-2.5042e+09, device='cuda:0')
epoch: 81 test_true_pfm: -21.63346324267814
episode: 324 training return: tensor(-2.5124e+09, device='cuda:0')
episode: 325 training return: tensor(-2.2502e+09, device='cuda:0')
episode: 326 training return: tensor(-2.7704e+09, device='cuda:0')
episode: 327 training return: tensor(-3.0427e+09, device='cuda:0')
epoch: 82 test_true_pfm: -21.7414079106043
episode: 328 training return: tensor(-3.1463e+09, device='cuda:0')
episode: 329 training return: tensor(-3.0878e+09, device='cuda:0')
episode: 330 training return: tensor(-3.6345e+09, device='cuda:0')
episode: 331 training return: tensor(-2.6521e+09, device='cuda:0')
epoch: 83 test_true_pfm: -21.788355969033425
episode: 332 training return: tensor(-2.7193e+09, device='cuda:0')
episode: 333 training return: tensor(-2.6475e+09, device='cuda:0')
episode: 334 training return: tensor(-2.7260e+09, device='cuda:0')
episode: 335 training return: tensor(-2.5944e+09, device='cuda:0')
epoch: 84 test_true_pfm: -21.61020639854275
episode: 336 training return: tensor(-3.7691e+09, device='cuda:0')
episode: 337 training return: tensor(-3.2836e+09, device='cuda:0')
episode: 338 training return: tensor(-3.1499e+09, device='cuda:0')
episode: 339 training return: tensor(-3.0743e+09, device='cuda:0')
epoch: 85 test_true_pfm: -21.690267560942
episode: 340 training return: tensor(-2.2670e+09, device='cuda:0')
episode: 341 training return: tensor(-2.7171e+09, device='cuda:0')
episode: 342 training return: tensor(-2.3044e+09, device='cuda:0')
episode: 343 training return: tensor(-2.2745e+09, device='cuda:0')
epoch: 86 test_true_pfm: -21.812755825688082
episode: 344 training return: tensor(-2.3911e+09, device='cuda:0')
episode: 345 training return: tensor(-3.0060e+09, device='cuda:0')
episode: 346 training return: tensor(-3.0348e+09, device='cuda:0')
episode: 347 training return: tensor(-2.7441e+09, device='cuda:0')
epoch: 87 test_true_pfm: -21.31290472034823
episode: 348 training return: tensor(-3.2002e+09, device='cuda:0')
episode: 349 training return: tensor(-3.1874e+09, device='cuda:0')
episode: 350 training return: tensor(-4.0669e+09, device='cuda:0')
episode: 351 training return: tensor(-2.2867e+09, device='cuda:0')
epoch: 88 test_true_pfm: -22.30364426785183
episode: 352 training return: tensor(-3.0992e+09, device='cuda:0')
episode: 353 training return: tensor(-3.1110e+09, device='cuda:0')
episode: 354 training return: tensor(-3.2336e+09, device='cuda:0')
episode: 355 training return: tensor(-4.6188e+09, device='cuda:0')
epoch: 89 test_true_pfm: -22.45652445906108
episode: 356 training return: tensor(-3.4503e+09, device='cuda:0')
episode: 357 training return: tensor(-3.2536e+09, device='cuda:0')
episode: 358 training return: tensor(-3.4830e+09, device='cuda:0')
episode: 359 training return: tensor(-3.0283e+09, device='cuda:0')
epoch: 90 test_true_pfm: -22.36076041507657
episode: 360 training return: tensor(-2.2537e+09, device='cuda:0')
episode: 361 training return: tensor(-2.7189e+09, device='cuda:0')
episode: 362 training return: tensor(-3.0485e+09, device='cuda:0')
episode: 363 training return: tensor(-3.1635e+09, device='cuda:0')
epoch: 91 test_true_pfm: -21.95729050714694
episode: 364 training return: tensor(-3.1842e+09, device='cuda:0')
episode: 365 training return: tensor(-3.6268e+09, device='cuda:0')
episode: 366 training return: tensor(-2.8020e+09, device='cuda:0')
episode: 367 training return: tensor(-2.0548e+09, device='cuda:0')
epoch: 92 test_true_pfm: -21.601181311735328
episode: 368 training return: tensor(-3.6283e+09, device='cuda:0')
episode: 369 training return: tensor(-2.8181e+09, device='cuda:0')
episode: 370 training return: tensor(-2.4917e+09, device='cuda:0')
episode: 371 training return: tensor(-3.7487e+09, device='cuda:0')
epoch: 93 test_true_pfm: -22.042956980632873
episode: 372 training return: tensor(-3.3724e+09, device='cuda:0')
episode: 373 training return: tensor(-3.0980e+09, device='cuda:0')
episode: 374 training return: tensor(-2.8992e+09, device='cuda:0')
episode: 375 training return: tensor(-3.5104e+09, device='cuda:0')
epoch: 94 test_true_pfm: -22.205514750548836
episode: 376 training return: tensor(-2.9072e+09, device='cuda:0')
episode: 377 training return: tensor(-3.2944e+09, device='cuda:0')
episode: 378 training return: tensor(-2.7492e+09, device='cuda:0')
episode: 379 training return: tensor(-2.8349e+09, device='cuda:0')
epoch: 95 test_true_pfm: -21.655093801497806
episode: 380 training return: tensor(-2.0766e+09, device='cuda:0')
episode: 381 training return: tensor(-2.8575e+09, device='cuda:0')
episode: 382 training return: tensor(-2.6083e+09, device='cuda:0')
episode: 383 training return: tensor(-2.5607e+09, device='cuda:0')
epoch: 96 test_true_pfm: -23.077276605517817
episode: 384 training return: tensor(-3.5028e+09, device='cuda:0')
episode: 385 training return: tensor(-2.9623e+09, device='cuda:0')
episode: 386 training return: tensor(-3.0656e+09, device='cuda:0')
episode: 387 training return: tensor(-2.4028e+09, device='cuda:0')
epoch: 97 test_true_pfm: -22.176048207408286
episode: 388 training return: tensor(-2.8198e+09, device='cuda:0')
episode: 389 training return: tensor(-3.0269e+09, device='cuda:0')
episode: 390 training return: tensor(-3.0703e+09, device='cuda:0')
episode: 391 training return: tensor(-2.2641e+09, device='cuda:0')
epoch: 98 test_true_pfm: -21.603277709920356
episode: 392 training return: tensor(-2.9661e+09, device='cuda:0')
episode: 393 training return: tensor(-3.4018e+09, device='cuda:0')
episode: 394 training return: tensor(-2.8139e+09, device='cuda:0')
episode: 395 training return: tensor(-2.9905e+09, device='cuda:0')
epoch: 99 test_true_pfm: -22.89043930687267
episode: 396 training return: tensor(-4.1586e+09, device='cuda:0')
episode: 397 training return: tensor(-2.8303e+09, device='cuda:0')
episode: 398 training return: tensor(-2.1472e+09, device='cuda:0')
episode: 399 training return: tensor(-2.9632e+09, device='cuda:0')
epoch: 100 test_true_pfm: -22.303793873730804
episode: 400 training return: tensor(-2.4005e+09, device='cuda:0')
episode: 401 training return: tensor(-2.4595e+09, device='cuda:0')
episode: 402 training return: tensor(-2.7655e+09, device='cuda:0')
episode: 403 training return: tensor(-3.6885e+09, device='cuda:0')
epoch: 101 test_true_pfm: -22.092376544246047
episode: 404 training return: tensor(-2.7281e+09, device='cuda:0')
episode: 405 training return: tensor(-2.8922e+09, device='cuda:0')
episode: 406 training return: tensor(-3.3506e+09, device='cuda:0')
episode: 407 training return: tensor(-2.3674e+09, device='cuda:0')
epoch: 102 test_true_pfm: -21.753548706372875
episode: 408 training return: tensor(-3.7459e+09, device='cuda:0')
episode: 409 training return: tensor(-3.1737e+09, device='cuda:0')
episode: 410 training return: tensor(-3.2806e+09, device='cuda:0')
episode: 411 training return: tensor(-2.5915e+09, device='cuda:0')
epoch: 103 test_true_pfm: -22.743641489446055
episode: 412 training return: tensor(-2.5675e+09, device='cuda:0')
episode: 413 training return: tensor(-3.2114e+09, device='cuda:0')
episode: 414 training return: tensor(-3.0802e+09, device='cuda:0')
episode: 415 training return: tensor(-3.3991e+09, device='cuda:0')
epoch: 104 test_true_pfm: -21.69817762294542
episode: 416 training return: tensor(-2.9710e+09, device='cuda:0')
episode: 417 training return: tensor(-2.2692e+09, device='cuda:0')
episode: 418 training return: tensor(-2.8529e+09, device='cuda:0')
episode: 419 training return: tensor(-2.4013e+09, device='cuda:0')
epoch: 105 test_true_pfm: -21.767334518569832
episode: 420 training return: tensor(-2.8134e+09, device='cuda:0')
episode: 421 training return: tensor(-3.2682e+09, device='cuda:0')
episode: 422 training return: tensor(-2.5826e+09, device='cuda:0')
episode: 423 training return: tensor(-3.4671e+09, device='cuda:0')
epoch: 106 test_true_pfm: -21.628884419368458
episode: 424 training return: tensor(-3.7840e+09, device='cuda:0')
episode: 425 training return: tensor(-2.7756e+09, device='cuda:0')
episode: 426 training return: tensor(-2.7233e+09, device='cuda:0')
episode: 427 training return: tensor(-2.6582e+09, device='cuda:0')
epoch: 107 test_true_pfm: -21.994250904445323
episode: 428 training return: tensor(-2.8523e+09, device='cuda:0')
episode: 429 training return: tensor(-3.0965e+09, device='cuda:0')
episode: 430 training return: tensor(-3.1615e+09, device='cuda:0')
episode: 431 training return: tensor(-3.1911e+09, device='cuda:0')
epoch: 108 test_true_pfm: -23.37637078958772
episode: 432 training return: tensor(-3.7119e+09, device='cuda:0')
episode: 433 training return: tensor(-2.9135e+09, device='cuda:0')
episode: 434 training return: tensor(-3.1990e+09, device='cuda:0')
episode: 435 training return: tensor(-3.2200e+09, device='cuda:0')
epoch: 109 test_true_pfm: -22.158876792276878
episode: 436 training return: tensor(-3.0088e+09, device='cuda:0')
episode: 437 training return: tensor(-3.6123e+09, device='cuda:0')
episode: 438 training return: tensor(-2.7398e+09, device='cuda:0')
episode: 439 training return: tensor(-3.3406e+09, device='cuda:0')
epoch: 110 test_true_pfm: -21.879096016315412
episode: 440 training return: tensor(-2.8676e+09, device='cuda:0')
episode: 441 training return: tensor(-2.6527e+09, device='cuda:0')
episode: 442 training return: tensor(-3.6218e+09, device='cuda:0')
episode: 443 training return: tensor(-2.8041e+09, device='cuda:0')
epoch: 111 test_true_pfm: -22.12224821923806
episode: 444 training return: tensor(-4.4379e+09, device='cuda:0')
episode: 445 training return: tensor(-3.1834e+09, device='cuda:0')
episode: 446 training return: tensor(-2.6180e+09, device='cuda:0')
episode: 447 training return: tensor(-2.8115e+09, device='cuda:0')
epoch: 112 test_true_pfm: -22.897124012845694
episode: 448 training return: tensor(-2.7541e+09, device='cuda:0')
episode: 449 training return: tensor(-2.7235e+09, device='cuda:0')
episode: 450 training return: tensor(-2.1792e+09, device='cuda:0')
episode: 451 training return: tensor(-2.7566e+09, device='cuda:0')
epoch: 113 test_true_pfm: -21.73759002770737
episode: 452 training return: tensor(-2.3395e+09, device='cuda:0')
episode: 453 training return: tensor(-2.5435e+09, device='cuda:0')
episode: 454 training return: tensor(-2.9940e+09, device='cuda:0')
episode: 455 training return: tensor(-3.2875e+09, device='cuda:0')
epoch: 114 test_true_pfm: -22.38636776360385
episode: 456 training return: tensor(-2.3202e+09, device='cuda:0')
episode: 457 training return: tensor(-2.7628e+09, device='cuda:0')
episode: 458 training return: tensor(-2.7489e+09, device='cuda:0')
episode: 459 training return: tensor(-2.7643e+09, device='cuda:0')
epoch: 115 test_true_pfm: -22.276496332195222
episode: 460 training return: tensor(-3.0995e+09, device='cuda:0')
episode: 461 training return: tensor(-3.0644e+09, device='cuda:0')
episode: 462 training return: tensor(-2.7506e+09, device='cuda:0')
episode: 463 training return: tensor(-3.4969e+09, device='cuda:0')
epoch: 116 test_true_pfm: -21.081687669599763
episode: 464 training return: tensor(-3.3585e+09, device='cuda:0')
episode: 465 training return: tensor(-2.8640e+09, device='cuda:0')
episode: 466 training return: tensor(-2.7814e+09, device='cuda:0')
episode: 467 training return: tensor(-3.5504e+09, device='cuda:0')
epoch: 117 test_true_pfm: -22.684355581461666
episode: 468 training return: tensor(-3.1002e+09, device='cuda:0')
episode: 469 training return: tensor(-2.7740e+09, device='cuda:0')
episode: 470 training return: tensor(-3.6780e+09, device='cuda:0')
episode: 471 training return: tensor(-3.3681e+09, device='cuda:0')
epoch: 118 test_true_pfm: -21.67148745628871
episode: 472 training return: tensor(-3.5011e+09, device='cuda:0')
episode: 473 training return: tensor(-2.8544e+09, device='cuda:0')
episode: 474 training return: tensor(-2.7371e+09, device='cuda:0')
episode: 475 training return: tensor(-3.5539e+09, device='cuda:0')
epoch: 119 test_true_pfm: -21.23963193419461
episode: 476 training return: tensor(-2.5113e+09, device='cuda:0')
episode: 477 training return: tensor(-3.0503e+09, device='cuda:0')
episode: 478 training return: tensor(-2.9785e+09, device='cuda:0')
episode: 479 training return: tensor(-2.5980e+09, device='cuda:0')
epoch: 120 test_true_pfm: -20.95456126710542
episode: 480 training return: tensor(-3.0020e+09, device='cuda:0')
episode: 481 training return: tensor(-3.5101e+09, device='cuda:0')
episode: 482 training return: tensor(-2.5211e+09, device='cuda:0')
episode: 483 training return: tensor(-2.4138e+09, device='cuda:0')
epoch: 121 test_true_pfm: -20.915105993409238
episode: 484 training return: tensor(-3.0415e+09, device='cuda:0')
episode: 485 training return: tensor(-2.7369e+09, device='cuda:0')
episode: 486 training return: tensor(-3.4353e+09, device='cuda:0')
episode: 487 training return: tensor(-2.7484e+09, device='cuda:0')
epoch: 122 test_true_pfm: -22.76422177190194
episode: 488 training return: tensor(-3.0124e+09, device='cuda:0')
episode: 489 training return: tensor(-2.1772e+09, device='cuda:0')
episode: 490 training return: tensor(-3.1429e+09, device='cuda:0')
episode: 491 training return: tensor(-2.6292e+09, device='cuda:0')
epoch: 123 test_true_pfm: -21.683414259631526
episode: 492 training return: tensor(-3.0999e+09, device='cuda:0')
episode: 493 training return: tensor(-2.1724e+09, device='cuda:0')
episode: 494 training return: tensor(-2.7441e+09, device='cuda:0')
episode: 495 training return: tensor(-2.8887e+09, device='cuda:0')
epoch: 124 test_true_pfm: -22.2143343547183
episode: 496 training return: tensor(-2.4796e+09, device='cuda:0')
episode: 497 training return: tensor(-3.1399e+09, device='cuda:0')
episode: 498 training return: tensor(-2.8681e+09, device='cuda:0')
episode: 499 training return: tensor(-3.4688e+09, device='cuda:0')
epoch: 125 test_true_pfm: -22.657806320634386
episode: 500 training return: tensor(-2.7803e+09, device='cuda:0')
episode: 501 training return: tensor(-3.4317e+09, device='cuda:0')
episode: 502 training return: tensor(-2.9874e+09, device='cuda:0')
episode: 503 training return: tensor(-2.9083e+09, device='cuda:0')
epoch: 126 test_true_pfm: -22.54979851738478
episode: 504 training return: tensor(-2.6059e+09, device='cuda:0')
episode: 505 training return: tensor(-2.7878e+09, device='cuda:0')
episode: 506 training return: tensor(-2.8370e+09, device='cuda:0')
episode: 507 training return: tensor(-2.6396e+09, device='cuda:0')
epoch: 127 test_true_pfm: -22.08348650540376
episode: 508 training return: tensor(-2.6859e+09, device='cuda:0')
episode: 509 training return: tensor(-3.0176e+09, device='cuda:0')
episode: 510 training return: tensor(-2.8832e+09, device='cuda:0')
episode: 511 training return: tensor(-2.3807e+09, device='cuda:0')
epoch: 128 test_true_pfm: -22.838924112528055
episode: 512 training return: tensor(-3.1375e+09, device='cuda:0')
episode: 513 training return: tensor(-3.2401e+09, device='cuda:0')
episode: 514 training return: tensor(-2.3796e+09, device='cuda:0')
episode: 515 training return: tensor(-2.8944e+09, device='cuda:0')
epoch: 129 test_true_pfm: -21.92301709421132
episode: 516 training return: tensor(-2.5852e+09, device='cuda:0')
episode: 517 training return: tensor(-3.0388e+09, device='cuda:0')
episode: 518 training return: tensor(-2.9014e+09, device='cuda:0')
episode: 519 training return: tensor(-2.5313e+09, device='cuda:0')
epoch: 130 test_true_pfm: -21.927579723112945
episode: 520 training return: tensor(-2.9686e+09, device='cuda:0')
episode: 521 training return: tensor(-3.3603e+09, device='cuda:0')
episode: 522 training return: tensor(-2.8568e+09, device='cuda:0')
episode: 523 training return: tensor(-2.8214e+09, device='cuda:0')
epoch: 131 test_true_pfm: -22.20878301011231
episode: 524 training return: tensor(-3.2830e+09, device='cuda:0')
episode: 525 training return: tensor(-3.8328e+09, device='cuda:0')
episode: 526 training return: tensor(-3.1023e+09, device='cuda:0')
episode: 527 training return: tensor(-2.7692e+09, device='cuda:0')
epoch: 132 test_true_pfm: -22.36224063702334
episode: 528 training return: tensor(-3.0616e+09, device='cuda:0')
episode: 529 training return: tensor(-3.0351e+09, device='cuda:0')
episode: 530 training return: tensor(-2.9088e+09, device='cuda:0')
episode: 531 training return: tensor(-2.4826e+09, device='cuda:0')
epoch: 133 test_true_pfm: -22.298550358132577
episode: 532 training return: tensor(-2.5831e+09, device='cuda:0')
episode: 533 training return: tensor(-2.4336e+09, device='cuda:0')
episode: 534 training return: tensor(-3.1329e+09, device='cuda:0')
episode: 535 training return: tensor(-2.7981e+09, device='cuda:0')
epoch: 134 test_true_pfm: -21.777295826409027
episode: 536 training return: tensor(-3.1065e+09, device='cuda:0')
episode: 537 training return: tensor(-2.4839e+09, device='cuda:0')
episode: 538 training return: tensor(-3.5468e+09, device='cuda:0')
episode: 539 training return: tensor(-2.5566e+09, device='cuda:0')
epoch: 135 test_true_pfm: -22.055875113065845
episode: 540 training return: tensor(-2.9357e+09, device='cuda:0')
episode: 541 training return: tensor(-2.3054e+09, device='cuda:0')
episode: 542 training return: tensor(-3.2662e+09, device='cuda:0')
episode: 543 training return: tensor(-2.9162e+09, device='cuda:0')
epoch: 136 test_true_pfm: -21.999502603653085
episode: 544 training return: tensor(-2.7902e+09, device='cuda:0')
episode: 545 training return: tensor(-3.3758e+09, device='cuda:0')
episode: 546 training return: tensor(-3.0044e+09, device='cuda:0')
episode: 547 training return: tensor(-3.1259e+09, device='cuda:0')
epoch: 137 test_true_pfm: -21.539684086621726
episode: 548 training return: tensor(-3.0396e+09, device='cuda:0')
episode: 549 training return: tensor(-3.6611e+09, device='cuda:0')
episode: 550 training return: tensor(-3.2806e+09, device='cuda:0')
episode: 551 training return: tensor(-4.1953e+09, device='cuda:0')
epoch: 138 test_true_pfm: -21.880784737395146
episode: 552 training return: tensor(-3.3538e+09, device='cuda:0')
episode: 553 training return: tensor(-2.5619e+09, device='cuda:0')
episode: 554 training return: tensor(-2.4885e+09, device='cuda:0')
episode: 555 training return: tensor(-2.9886e+09, device='cuda:0')
epoch: 139 test_true_pfm: -22.069595592092206
episode: 556 training return: tensor(-3.4408e+09, device='cuda:0')
episode: 557 training return: tensor(-2.4533e+09, device='cuda:0')
episode: 558 training return: tensor(-2.6848e+09, device='cuda:0')
episode: 559 training return: tensor(-2.7530e+09, device='cuda:0')
epoch: 140 test_true_pfm: -20.956387031048536
episode: 560 training return: tensor(-2.6422e+09, device='cuda:0')
episode: 561 training return: tensor(-2.5110e+09, device='cuda:0')
episode: 562 training return: tensor(-2.5760e+09, device='cuda:0')
episode: 563 training return: tensor(-2.8387e+09, device='cuda:0')
epoch: 141 test_true_pfm: -21.428482185500926
episode: 564 training return: tensor(-2.1612e+09, device='cuda:0')
episode: 565 training return: tensor(-3.0750e+09, device='cuda:0')
episode: 566 training return: tensor(-3.2843e+09, device='cuda:0')
episode: 567 training return: tensor(-3.7369e+09, device='cuda:0')
epoch: 142 test_true_pfm: -21.05969162744882
episode: 568 training return: tensor(-2.9174e+09, device='cuda:0')
episode: 569 training return: tensor(-3.0923e+09, device='cuda:0')
episode: 570 training return: tensor(-3.5344e+09, device='cuda:0')
episode: 571 training return: tensor(-3.2153e+09, device='cuda:0')
epoch: 143 test_true_pfm: -21.38766916903211
episode: 572 training return: tensor(-3.4008e+09, device='cuda:0')
episode: 573 training return: tensor(-2.4762e+09, device='cuda:0')
episode: 574 training return: tensor(-3.4948e+09, device='cuda:0')
episode: 575 training return: tensor(-2.7096e+09, device='cuda:0')
epoch: 144 test_true_pfm: -21.580964615637843
episode: 576 training return: tensor(-3.0167e+09, device='cuda:0')
episode: 577 training return: tensor(-3.1903e+09, device='cuda:0')
episode: 578 training return: tensor(-2.8626e+09, device='cuda:0')
episode: 579 training return: tensor(-2.6769e+09, device='cuda:0')
epoch: 145 test_true_pfm: -21.008330694542103
episode: 580 training return: tensor(-3.3420e+09, device='cuda:0')
episode: 581 training return: tensor(-2.2667e+09, device='cuda:0')
episode: 582 training return: tensor(-3.1491e+09, device='cuda:0')
episode: 583 training return: tensor(-3.0400e+09, device='cuda:0')
epoch: 146 test_true_pfm: -21.36599875938817
episode: 584 training return: tensor(-3.2745e+09, device='cuda:0')
episode: 585 training return: tensor(-3.4557e+09, device='cuda:0')
episode: 586 training return: tensor(-2.7077e+09, device='cuda:0')
episode: 587 training return: tensor(-2.8647e+09, device='cuda:0')
epoch: 147 test_true_pfm: -21.624663313796656
episode: 588 training return: tensor(-2.8797e+09, device='cuda:0')
episode: 589 training return: tensor(-2.7025e+09, device='cuda:0')
episode: 590 training return: tensor(-2.9689e+09, device='cuda:0')
episode: 591 training return: tensor(-2.2157e+09, device='cuda:0')
epoch: 148 test_true_pfm: -20.983743064080436
episode: 592 training return: tensor(-2.6125e+09, device='cuda:0')
episode: 593 training return: tensor(-3.1611e+09, device='cuda:0')
episode: 594 training return: tensor(-2.4819e+09, device='cuda:0')
episode: 595 training return: tensor(-2.6201e+09, device='cuda:0')
epoch: 149 test_true_pfm: -21.71071944717024
episode: 596 training return: tensor(-2.8629e+09, device='cuda:0')
episode: 597 training return: tensor(-2.8534e+09, device='cuda:0')
episode: 598 training return: tensor(-3.1953e+09, device='cuda:0')
episode: 599 training return: tensor(-2.6135e+09, device='cuda:0')
epoch: 150 test_true_pfm: -21.81611043184372
