['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'brac', '--traj', 'medium', '--seed', '3', '--data', '100000']
1318.0962004153255
episode: 0 training return: tensor(-6.0825e+15, device='cuda:0')
episode: 1 training return: tensor(-7.2665e+14, device='cuda:0')
episode: 2 training return: tensor(-1.2162e+15, device='cuda:0')
episode: 3 training return: tensor(-4.9680e+10, device='cuda:0')
epoch: 1 test_true_pfm: 45.60400239794733
episode: 4 training return: tensor(-2.3688e+14, device='cuda:0')
episode: 5 training return: tensor(-5.7487e+15, device='cuda:0')
episode: 6 training return: tensor(-1.6057e+16, device='cuda:0')
episode: 7 training return: tensor(-8.6282e+15, device='cuda:0')
epoch: 2 test_true_pfm: -21.886313384072153
episode: 8 training return: tensor(-4.4054e+15, device='cuda:0')
episode: 9 training return: tensor(-8.0924e+18, device='cuda:0')
episode: 10 training return: tensor(-6.5878e+12, device='cuda:0')
episode: 11 training return: tensor(-1.2503e+13, device='cuda:0')
epoch: 3 test_true_pfm: -22.646114705639054
episode: 12 training return: tensor(-1.0183e+13, device='cuda:0')
episode: 13 training return: tensor(-9.8566e+12, device='cuda:0')
episode: 14 training return: tensor(-1.2300e+13, device='cuda:0')
episode: 15 training return: tensor(-8.8347e+12, device='cuda:0')
epoch: 4 test_true_pfm: -21.921516627325996
episode: 16 training return: tensor(-8.7840e+12, device='cuda:0')
episode: 17 training return: tensor(-1.0741e+13, device='cuda:0')
episode: 18 training return: tensor(-1.1853e+13, device='cuda:0')
episode: 19 training return: tensor(-1.0543e+13, device='cuda:0')
epoch: 5 test_true_pfm: -22.139696588180723
episode: 20 training return: tensor(-8.1786e+12, device='cuda:0')
episode: 21 training return: tensor(-8.4026e+12, device='cuda:0')
episode: 22 training return: tensor(-1.0924e+13, device='cuda:0')
episode: 23 training return: tensor(-8.4478e+12, device='cuda:0')
epoch: 6 test_true_pfm: -22.385896319945772
episode: 24 training return: tensor(-8.8949e+12, device='cuda:0')
episode: 25 training return: tensor(-1.2222e+13, device='cuda:0')
episode: 26 training return: tensor(-8.4881e+12, device='cuda:0')
episode: 27 training return: tensor(-1.1901e+13, device='cuda:0')
epoch: 7 test_true_pfm: -22.070957548098004
episode: 28 training return: tensor(-1.1270e+13, device='cuda:0')
episode: 29 training return: tensor(-1.0094e+13, device='cuda:0')
episode: 30 training return: tensor(-1.1174e+13, device='cuda:0')
episode: 31 training return: tensor(-8.4190e+12, device='cuda:0')
epoch: 8 test_true_pfm: -21.82533197809855
episode: 32 training return: tensor(-1.0011e+13, device='cuda:0')
episode: 33 training return: tensor(-1.2264e+13, device='cuda:0')
episode: 34 training return: tensor(-1.1525e+13, device='cuda:0')
episode: 35 training return: tensor(-8.3852e+12, device='cuda:0')
epoch: 9 test_true_pfm: -21.661721436678874
episode: 36 training return: tensor(-8.1118e+12, device='cuda:0')
episode: 37 training return: tensor(-9.7665e+12, device='cuda:0')
episode: 38 training return: tensor(-1.1223e+13, device='cuda:0')
episode: 39 training return: tensor(-1.2779e+13, device='cuda:0')
epoch: 10 test_true_pfm: -22.31301802374482
episode: 40 training return: tensor(-1.0404e+13, device='cuda:0')
episode: 41 training return: tensor(-1.2548e+13, device='cuda:0')
episode: 42 training return: tensor(-7.8515e+12, device='cuda:0')
episode: 43 training return: tensor(-8.8287e+12, device='cuda:0')
epoch: 11 test_true_pfm: -22.12306717460389
episode: 44 training return: tensor(-1.2002e+13, device='cuda:0')
episode: 45 training return: tensor(-1.0605e+13, device='cuda:0')
episode: 46 training return: tensor(-1.0556e+13, device='cuda:0')
episode: 47 training return: tensor(-1.2008e+13, device='cuda:0')
epoch: 12 test_true_pfm: -22.143113560225363
episode: 48 training return: tensor(-1.0759e+13, device='cuda:0')
episode: 49 training return: tensor(-8.7866e+12, device='cuda:0')
episode: 50 training return: tensor(-1.2186e+13, device='cuda:0')
episode: 51 training return: tensor(-1.0705e+13, device='cuda:0')
epoch: 13 test_true_pfm: -22.182702359202313
episode: 52 training return: tensor(-9.1373e+12, device='cuda:0')
episode: 53 training return: tensor(-1.0907e+13, device='cuda:0')
episode: 54 training return: tensor(-9.9361e+12, device='cuda:0')
episode: 55 training return: tensor(-1.1934e+13, device='cuda:0')
epoch: 14 test_true_pfm: -21.905831418655392
episode: 56 training return: tensor(-1.2361e+13, device='cuda:0')
episode: 57 training return: tensor(-1.1280e+13, device='cuda:0')
episode: 58 training return: tensor(-7.8613e+12, device='cuda:0')
episode: 59 training return: tensor(-1.0016e+13, device='cuda:0')
epoch: 15 test_true_pfm: -21.87977937637884
episode: 60 training return: tensor(-9.9311e+12, device='cuda:0')
episode: 61 training return: tensor(-8.8819e+12, device='cuda:0')
episode: 62 training return: tensor(-9.2936e+12, device='cuda:0')
episode: 63 training return: tensor(-1.2495e+13, device='cuda:0')
epoch: 16 test_true_pfm: -22.260575476354784
episode: 64 training return: tensor(-8.7192e+12, device='cuda:0')
episode: 65 training return: tensor(-1.5690e+13, device='cuda:0')
episode: 66 training return: tensor(-8.3981e+12, device='cuda:0')
episode: 67 training return: tensor(-1.0988e+13, device='cuda:0')
epoch: 17 test_true_pfm: -22.417667583148084
episode: 68 training return: tensor(-8.4413e+12, device='cuda:0')
episode: 69 training return: tensor(-1.0852e+13, device='cuda:0')
episode: 70 training return: tensor(-8.8357e+12, device='cuda:0')
episode: 71 training return: tensor(-1.0842e+13, device='cuda:0')
epoch: 18 test_true_pfm: -22.40121979642484
episode: 72 training return: tensor(-9.1819e+12, device='cuda:0')
episode: 73 training return: tensor(-1.3170e+13, device='cuda:0')
episode: 74 training return: tensor(-1.0695e+13, device='cuda:0')
episode: 75 training return: tensor(-1.3953e+14, device='cuda:0')
epoch: 19 test_true_pfm: -22.34992986849105
episode: 76 training return: tensor(-8.4167e+12, device='cuda:0')
episode: 77 training return: tensor(-9.4554e+12, device='cuda:0')
episode: 78 training return: tensor(-1.1405e+13, device='cuda:0')
episode: 79 training return: tensor(-1.0249e+13, device='cuda:0')
epoch: 20 test_true_pfm: -22.85229462228683
episode: 80 training return: tensor(-6.0313e+12, device='cuda:0')
episode: 81 training return: tensor(-2.0499e+13, device='cuda:0')
episode: 82 training return: tensor(-1.0589e+13, device='cuda:0')
episode: 83 training return: tensor(-1.1352e+13, device='cuda:0')
epoch: 21 test_true_pfm: -22.665190912409514
episode: 84 training return: tensor(-1.1833e+13, device='cuda:0')
episode: 85 training return: tensor(-1.0342e+13, device='cuda:0')
episode: 86 training return: tensor(-8.6400e+12, device='cuda:0')
episode: 87 training return: tensor(-1.0209e+13, device='cuda:0')
epoch: 22 test_true_pfm: -22.157371122148543
episode: 88 training return: tensor(-1.1538e+13, device='cuda:0')
episode: 89 training return: tensor(-1.0251e+13, device='cuda:0')
episode: 90 training return: tensor(-9.7058e+12, device='cuda:0')
episode: 91 training return: tensor(-8.4936e+12, device='cuda:0')
epoch: 23 test_true_pfm: -22.72730768138689
episode: 92 training return: tensor(-1.0471e+13, device='cuda:0')
episode: 93 training return: tensor(-9.2513e+12, device='cuda:0')
episode: 94 training return: tensor(-1.1714e+13, device='cuda:0')
episode: 95 training return: tensor(-1.1809e+13, device='cuda:0')
epoch: 24 test_true_pfm: -22.287204636494593
episode: 96 training return: tensor(-1.1276e+13, device='cuda:0')
episode: 97 training return: tensor(-8.2193e+12, device='cuda:0')
episode: 98 training return: tensor(-1.0862e+13, device='cuda:0')
episode: 99 training return: tensor(-1.2307e+13, device='cuda:0')
epoch: 25 test_true_pfm: -22.484619191350333
episode: 100 training return: tensor(-8.0732e+12, device='cuda:0')
episode: 101 training return: tensor(-1.2003e+13, device='cuda:0')
episode: 102 training return: tensor(-9.8152e+12, device='cuda:0')
episode: 103 training return: tensor(-1.2655e+13, device='cuda:0')
epoch: 26 test_true_pfm: -22.34749072444609
episode: 104 training return: tensor(-9.6333e+12, device='cuda:0')
episode: 105 training return: tensor(-1.1013e+13, device='cuda:0')
episode: 106 training return: tensor(-1.1720e+13, device='cuda:0')
episode: 107 training return: tensor(-8.4770e+12, device='cuda:0')
epoch: 27 test_true_pfm: -21.65166740909109
episode: 108 training return: tensor(-7.4774e+12, device='cuda:0')
episode: 109 training return: tensor(-8.8843e+12, device='cuda:0')
episode: 110 training return: tensor(-9.0620e+12, device='cuda:0')
episode: 111 training return: tensor(-1.0034e+13, device='cuda:0')
epoch: 28 test_true_pfm: -22.073980501172016
episode: 112 training return: tensor(-8.8507e+12, device='cuda:0')
episode: 113 training return: tensor(-1.0765e+13, device='cuda:0')
episode: 114 training return: tensor(-1.2051e+13, device='cuda:0')
episode: 115 training return: tensor(-1.1679e+13, device='cuda:0')
epoch: 29 test_true_pfm: -22.340242534716158
episode: 116 training return: tensor(-1.0954e+13, device='cuda:0')
episode: 117 training return: tensor(-1.2347e+13, device='cuda:0')
episode: 118 training return: tensor(-1.2023e+13, device='cuda:0')
episode: 119 training return: tensor(-1.2189e+13, device='cuda:0')
epoch: 30 test_true_pfm: -22.3947580390394
episode: 120 training return: tensor(-1.2708e+13, device='cuda:0')
episode: 121 training return: tensor(-1.0058e+13, device='cuda:0')
episode: 122 training return: tensor(-1.0404e+13, device='cuda:0')
episode: 123 training return: tensor(-8.9084e+12, device='cuda:0')
epoch: 31 test_true_pfm: -21.672571655072474
episode: 124 training return: tensor(-8.1740e+12, device='cuda:0')
episode: 125 training return: tensor(-9.0275e+12, device='cuda:0')
episode: 126 training return: tensor(-1.3403e+13, device='cuda:0')
episode: 127 training return: tensor(-1.1622e+13, device='cuda:0')
epoch: 32 test_true_pfm: -22.354777105537966
episode: 128 training return: tensor(-1.1350e+13, device='cuda:0')
episode: 129 training return: tensor(-1.0215e+13, device='cuda:0')
episode: 130 training return: tensor(-9.4756e+12, device='cuda:0')
episode: 131 training return: tensor(-1.1888e+13, device='cuda:0')
epoch: 33 test_true_pfm: -22.407841217756527
episode: 132 training return: tensor(-1.2315e+13, device='cuda:0')
episode: 133 training return: tensor(-7.8687e+12, device='cuda:0')
episode: 134 training return: tensor(-9.5412e+12, device='cuda:0')
episode: 135 training return: tensor(-1.0592e+13, device='cuda:0')
epoch: 34 test_true_pfm: -22.00944918573516
episode: 136 training return: tensor(-1.2718e+13, device='cuda:0')
episode: 137 training return: tensor(-9.8227e+12, device='cuda:0')
episode: 138 training return: tensor(-8.8725e+12, device='cuda:0')
episode: 139 training return: tensor(-1.2265e+13, device='cuda:0')
epoch: 35 test_true_pfm: -22.454504826426202
episode: 140 training return: tensor(-1.1354e+13, device='cuda:0')
episode: 141 training return: tensor(-1.0851e+13, device='cuda:0')
episode: 142 training return: tensor(-9.7200e+12, device='cuda:0')
episode: 143 training return: tensor(-8.9928e+12, device='cuda:0')
epoch: 36 test_true_pfm: -22.184588657819578
episode: 144 training return: tensor(-1.2182e+13, device='cuda:0')
episode: 145 training return: tensor(-8.8588e+12, device='cuda:0')
episode: 146 training return: tensor(-1.1416e+13, device='cuda:0')
episode: 147 training return: tensor(-1.1871e+13, device='cuda:0')
epoch: 37 test_true_pfm: -21.937454439418797
episode: 148 training return: tensor(-1.0726e+13, device='cuda:0')
episode: 149 training return: tensor(-8.8936e+12, device='cuda:0')
episode: 150 training return: tensor(-4.8002e+12, device='cuda:0')
episode: 151 training return: tensor(-1.2704e+13, device='cuda:0')
epoch: 38 test_true_pfm: -21.97028161976989
episode: 152 training return: tensor(-8.5126e+12, device='cuda:0')
episode: 153 training return: tensor(-8.3789e+12, device='cuda:0')
episode: 154 training return: tensor(-9.8774e+12, device='cuda:0')
episode: 155 training return: tensor(-9.3028e+12, device='cuda:0')
epoch: 39 test_true_pfm: -22.00067734290216
episode: 156 training return: tensor(-7.8107e+12, device='cuda:0')
episode: 157 training return: tensor(-1.0167e+13, device='cuda:0')
episode: 158 training return: tensor(-8.4671e+12, device='cuda:0')
episode: 159 training return: tensor(-1.1129e+13, device='cuda:0')
epoch: 40 test_true_pfm: -22.282344160512107
episode: 160 training return: tensor(-1.4261e+13, device='cuda:0')
episode: 161 training return: tensor(-1.0684e+13, device='cuda:0')
episode: 162 training return: tensor(-1.0219e+13, device='cuda:0')
episode: 163 training return: tensor(-1.0018e+13, device='cuda:0')
epoch: 41 test_true_pfm: -22.386569367473857
episode: 164 training return: tensor(-9.4884e+12, device='cuda:0')
episode: 165 training return: tensor(-1.0270e+13, device='cuda:0')
episode: 166 training return: tensor(-1.0550e+13, device='cuda:0')
episode: 167 training return: tensor(-1.1935e+13, device='cuda:0')
epoch: 42 test_true_pfm: -22.125221206992734
episode: 168 training return: tensor(-1.2420e+13, device='cuda:0')
episode: 169 training return: tensor(-8.3202e+12, device='cuda:0')
episode: 170 training return: tensor(-9.1180e+12, device='cuda:0')
episode: 171 training return: tensor(-1.1283e+13, device='cuda:0')
epoch: 43 test_true_pfm: -22.281812913939167
episode: 172 training return: tensor(-1.1326e+13, device='cuda:0')
episode: 173 training return: tensor(-1.0286e+13, device='cuda:0')
episode: 174 training return: tensor(-7.9751e+12, device='cuda:0')
episode: 175 training return: tensor(-1.0623e+13, device='cuda:0')
epoch: 44 test_true_pfm: -22.326152967348914
episode: 176 training return: tensor(-1.1390e+13, device='cuda:0')
episode: 177 training return: tensor(-1.0595e+13, device='cuda:0')
episode: 178 training return: tensor(-1.0551e+13, device='cuda:0')
episode: 179 training return: tensor(-8.9947e+12, device='cuda:0')
epoch: 45 test_true_pfm: -21.985117969926772
episode: 180 training return: tensor(-9.7113e+12, device='cuda:0')
episode: 181 training return: tensor(-1.2836e+13, device='cuda:0')
episode: 182 training return: tensor(-8.8785e+12, device='cuda:0')
episode: 183 training return: tensor(-1.0939e+13, device='cuda:0')
epoch: 46 test_true_pfm: -21.990921715458754
episode: 184 training return: tensor(-1.0886e+13, device='cuda:0')
episode: 185 training return: tensor(-8.7133e+12, device='cuda:0')
episode: 186 training return: tensor(-8.8361e+12, device='cuda:0')
episode: 187 training return: tensor(-9.8366e+12, device='cuda:0')
epoch: 47 test_true_pfm: -22.11061555202127
episode: 188 training return: tensor(-1.1942e+13, device='cuda:0')
episode: 189 training return: tensor(-9.7834e+12, device='cuda:0')
episode: 190 training return: tensor(-1.0094e+13, device='cuda:0')
episode: 191 training return: tensor(-8.9709e+12, device='cuda:0')
epoch: 48 test_true_pfm: -22.19936112705204
episode: 192 training return: tensor(-1.0280e+13, device='cuda:0')
episode: 193 training return: tensor(-9.6948e+12, device='cuda:0')
episode: 194 training return: tensor(-9.5815e+12, device='cuda:0')
episode: 195 training return: tensor(-9.4527e+12, device='cuda:0')
epoch: 49 test_true_pfm: -22.40725544334217
episode: 196 training return: tensor(-1.2081e+13, device='cuda:0')
episode: 197 training return: tensor(-1.1665e+13, device='cuda:0')
episode: 198 training return: tensor(-1.1518e+13, device='cuda:0')
episode: 199 training return: tensor(-9.4360e+12, device='cuda:0')
epoch: 50 test_true_pfm: -22.502645778437586
episode: 200 training return: tensor(-1.0020e+13, device='cuda:0')
episode: 201 training return: tensor(-1.0636e+13, device='cuda:0')
episode: 202 training return: tensor(-8.0095e+12, device='cuda:0')
episode: 203 training return: tensor(-9.9234e+12, device='cuda:0')
epoch: 51 test_true_pfm: -22.29340373363881
episode: 204 training return: tensor(-1.1945e+13, device='cuda:0')
episode: 205 training return: tensor(-1.0849e+13, device='cuda:0')
episode: 206 training return: tensor(-8.7879e+12, device='cuda:0')
episode: 207 training return: tensor(-1.0400e+13, device='cuda:0')
epoch: 52 test_true_pfm: -22.793282787765886
episode: 208 training return: tensor(-1.0358e+13, device='cuda:0')
episode: 209 training return: tensor(-1.0314e+13, device='cuda:0')
episode: 210 training return: tensor(-9.8334e+12, device='cuda:0')
episode: 211 training return: tensor(-8.4629e+12, device='cuda:0')
epoch: 53 test_true_pfm: -21.930051263713924
episode: 212 training return: tensor(-1.1648e+13, device='cuda:0')
episode: 213 training return: tensor(-8.3329e+12, device='cuda:0')
episode: 214 training return: tensor(-1.2226e+13, device='cuda:0')
episode: 215 training return: tensor(-9.9950e+12, device='cuda:0')
epoch: 54 test_true_pfm: -22.218567003677943
episode: 216 training return: tensor(-1.1567e+13, device='cuda:0')
episode: 217 training return: tensor(-1.1439e+13, device='cuda:0')
episode: 218 training return: tensor(-1.0352e+13, device='cuda:0')
episode: 219 training return: tensor(-9.4403e+12, device='cuda:0')
epoch: 55 test_true_pfm: -22.586405250010326
episode: 220 training return: tensor(-1.0543e+13, device='cuda:0')
episode: 221 training return: tensor(-9.0016e+12, device='cuda:0')
episode: 222 training return: tensor(-8.1821e+12, device='cuda:0')
episode: 223 training return: tensor(-9.6170e+12, device='cuda:0')
epoch: 56 test_true_pfm: -22.261783962036848
episode: 224 training return: tensor(-1.2946e+13, device='cuda:0')
episode: 225 training return: tensor(-1.0918e+13, device='cuda:0')
episode: 226 training return: tensor(-8.7035e+12, device='cuda:0')
episode: 227 training return: tensor(-1.0181e+13, device='cuda:0')
epoch: 57 test_true_pfm: -22.434346166522385
episode: 228 training return: tensor(-7.8185e+12, device='cuda:0')
episode: 229 training return: tensor(-1.0931e+13, device='cuda:0')
episode: 230 training return: tensor(-8.8216e+12, device='cuda:0')
episode: 231 training return: tensor(-9.7000e+12, device='cuda:0')
epoch: 58 test_true_pfm: -22.6932868433267
episode: 232 training return: tensor(-1.1237e+13, device='cuda:0')
episode: 233 training return: tensor(-1.0594e+13, device='cuda:0')
episode: 234 training return: tensor(-8.6778e+12, device='cuda:0')
episode: 235 training return: tensor(-1.1682e+13, device='cuda:0')
epoch: 59 test_true_pfm: -22.205516942655226
episode: 236 training return: tensor(-1.0987e+13, device='cuda:0')
episode: 237 training return: tensor(-8.7091e+12, device='cuda:0')
episode: 238 training return: tensor(-1.0190e+13, device='cuda:0')
episode: 239 training return: tensor(-1.1498e+13, device='cuda:0')
epoch: 60 test_true_pfm: -21.605547234136022
episode: 240 training return: tensor(-1.1965e+13, device='cuda:0')
episode: 241 training return: tensor(-1.0567e+13, device='cuda:0')
episode: 242 training return: tensor(-8.1682e+12, device='cuda:0')
episode: 243 training return: tensor(-1.1019e+13, device='cuda:0')
epoch: 61 test_true_pfm: -22.14967146310526
episode: 244 training return: tensor(-1.0923e+13, device='cuda:0')
episode: 245 training return: tensor(-9.1696e+12, device='cuda:0')
episode: 246 training return: tensor(-1.1240e+13, device='cuda:0')
episode: 247 training return: tensor(-1.1265e+13, device='cuda:0')
epoch: 62 test_true_pfm: -22.5091524600649
episode: 248 training return: tensor(-8.9966e+12, device='cuda:0')
episode: 249 training return: tensor(-8.8603e+12, device='cuda:0')
episode: 250 training return: tensor(-1.2086e+13, device='cuda:0')
episode: 251 training return: tensor(-9.3449e+12, device='cuda:0')
epoch: 63 test_true_pfm: -22.272422280191204
episode: 252 training return: tensor(-9.7086e+12, device='cuda:0')
episode: 253 training return: tensor(-1.1651e+13, device='cuda:0')
episode: 254 training return: tensor(-8.2691e+12, device='cuda:0')
episode: 255 training return: tensor(-1.2355e+13, device='cuda:0')
epoch: 64 test_true_pfm: -22.059043125823788
episode: 256 training return: tensor(-1.2310e+13, device='cuda:0')
episode: 257 training return: tensor(-9.6820e+12, device='cuda:0')
episode: 258 training return: tensor(-8.3995e+12, device='cuda:0')
episode: 259 training return: tensor(-9.9836e+12, device='cuda:0')
epoch: 65 test_true_pfm: -21.576658936574546
episode: 260 training return: tensor(-1.0408e+13, device='cuda:0')
episode: 261 training return: tensor(-9.4657e+12, device='cuda:0')
episode: 262 training return: tensor(-8.1794e+12, device='cuda:0')
episode: 263 training return: tensor(-1.0871e+13, device='cuda:0')
epoch: 66 test_true_pfm: -22.02086609309005
episode: 264 training return: tensor(-9.2527e+12, device='cuda:0')
episode: 265 training return: tensor(-8.8382e+12, device='cuda:0')
episode: 266 training return: tensor(-1.0772e+13, device='cuda:0')
episode: 267 training return: tensor(-8.8965e+12, device='cuda:0')
epoch: 67 test_true_pfm: -22.377376665913943
episode: 268 training return: tensor(-8.1627e+12, device='cuda:0')
episode: 269 training return: tensor(-1.2134e+13, device='cuda:0')
episode: 270 training return: tensor(-9.9332e+12, device='cuda:0')
episode: 271 training return: tensor(-1.1224e+13, device='cuda:0')
epoch: 68 test_true_pfm: -22.294641985542977
episode: 272 training return: tensor(-9.7452e+12, device='cuda:0')
episode: 273 training return: tensor(-9.4749e+12, device='cuda:0')
episode: 274 training return: tensor(-9.7317e+12, device='cuda:0')
episode: 275 training return: tensor(-1.2070e+13, device='cuda:0')
epoch: 69 test_true_pfm: -22.142947406086808
episode: 276 training return: tensor(-9.0661e+12, device='cuda:0')
episode: 277 training return: tensor(-8.8486e+12, device='cuda:0')
episode: 278 training return: tensor(-8.3099e+12, device='cuda:0')
episode: 279 training return: tensor(-1.2915e+13, device='cuda:0')
epoch: 70 test_true_pfm: -21.961637598046952
episode: 280 training return: tensor(-1.0254e+13, device='cuda:0')
episode: 281 training return: tensor(-1.0123e+13, device='cuda:0')
episode: 282 training return: tensor(-1.1510e+13, device='cuda:0')
episode: 283 training return: tensor(-8.2616e+12, device='cuda:0')
epoch: 71 test_true_pfm: -21.84573276631272
episode: 284 training return: tensor(-1.0213e+13, device='cuda:0')
episode: 285 training return: tensor(-1.0343e+13, device='cuda:0')
episode: 286 training return: tensor(-1.1687e+13, device='cuda:0')
episode: 287 training return: tensor(-9.0149e+12, device='cuda:0')
epoch: 72 test_true_pfm: -22.15815642220726
episode: 288 training return: tensor(-1.2294e+13, device='cuda:0')
episode: 289 training return: tensor(-9.8209e+12, device='cuda:0')
episode: 290 training return: tensor(-1.0550e+13, device='cuda:0')
episode: 291 training return: tensor(-9.2295e+12, device='cuda:0')
epoch: 73 test_true_pfm: -22.169435700822078
episode: 292 training return: tensor(-1.0272e+13, device='cuda:0')
episode: 293 training return: tensor(-1.1612e+13, device='cuda:0')
episode: 294 training return: tensor(-1.0322e+13, device='cuda:0')
episode: 295 training return: tensor(-1.2723e+13, device='cuda:0')
epoch: 74 test_true_pfm: -21.989980833395762
episode: 296 training return: tensor(-1.0622e+13, device='cuda:0')
episode: 297 training return: tensor(-9.5477e+12, device='cuda:0')
episode: 298 training return: tensor(-1.0132e+13, device='cuda:0')
episode: 299 training return: tensor(-1.1548e+13, device='cuda:0')
epoch: 75 test_true_pfm: -22.542986185576684
episode: 300 training return: tensor(-8.7826e+12, device='cuda:0')
episode: 301 training return: tensor(-9.0065e+12, device='cuda:0')
episode: 302 training return: tensor(-9.2349e+15, device='cuda:0')
episode: 303 training return: tensor(-1.6951e+14, device='cuda:0')
epoch: 76 test_true_pfm: -21.728513699015306
episode: 304 training return: tensor(-9.6727e+12, device='cuda:0')
episode: 305 training return: tensor(-1.2416e+13, device='cuda:0')
episode: 306 training return: tensor(-1.0679e+13, device='cuda:0')
episode: 307 training return: tensor(-9.4827e+12, device='cuda:0')
epoch: 77 test_true_pfm: -14.833273392449655
episode: 308 training return: tensor(-5.0979e+12, device='cuda:0')
episode: 309 training return: tensor(-4.6506e+12, device='cuda:0')
episode: 310 training return: tensor(-1.0764e+13, device='cuda:0')
episode: 311 training return: tensor(-1.0257e+13, device='cuda:0')
epoch: 78 test_true_pfm: -22.430505762596397
episode: 312 training return: tensor(-9.4349e+12, device='cuda:0')
episode: 313 training return: tensor(-9.6968e+12, device='cuda:0')
episode: 314 training return: tensor(-1.0929e+13, device='cuda:0')
episode: 315 training return: tensor(-1.2210e+13, device='cuda:0')
epoch: 79 test_true_pfm: -21.77178998817671
episode: 316 training return: tensor(-9.3692e+12, device='cuda:0')
episode: 317 training return: tensor(-9.8957e+12, device='cuda:0')
episode: 318 training return: tensor(-8.7061e+12, device='cuda:0')
episode: 319 training return: tensor(-9.3183e+12, device='cuda:0')
epoch: 80 test_true_pfm: -21.80950164091701
episode: 320 training return: tensor(-1.1155e+13, device='cuda:0')
episode: 321 training return: tensor(-9.7061e+12, device='cuda:0')
episode: 322 training return: tensor(-1.2100e+13, device='cuda:0')
episode: 323 training return: tensor(-8.4573e+12, device='cuda:0')
epoch: 81 test_true_pfm: -21.75909718693499
episode: 324 training return: tensor(-1.2175e+13, device='cuda:0')
episode: 325 training return: tensor(-1.2400e+13, device='cuda:0')
episode: 326 training return: tensor(-8.0304e+12, device='cuda:0')
episode: 327 training return: tensor(-1.1980e+13, device='cuda:0')
epoch: 82 test_true_pfm: -22.300248686457895
episode: 328 training return: tensor(-1.2327e+13, device='cuda:0')
episode: 329 training return: tensor(-9.7891e+12, device='cuda:0')
episode: 330 training return: tensor(-4.3618e+12, device='cuda:0')
episode: 331 training return: tensor(-1.0571e+13, device='cuda:0')
epoch: 83 test_true_pfm: -21.83511451506358
episode: 332 training return: tensor(-1.0649e+13, device='cuda:0')
episode: 333 training return: tensor(-1.2241e+13, device='cuda:0')
episode: 334 training return: tensor(-8.3093e+12, device='cuda:0')
episode: 335 training return: tensor(-9.4744e+12, device='cuda:0')
epoch: 84 test_true_pfm: -22.412937466001623
episode: 336 training return: tensor(-1.1354e+13, device='cuda:0')
episode: 337 training return: tensor(-1.2436e+13, device='cuda:0')
episode: 338 training return: tensor(-8.1697e+12, device='cuda:0')
episode: 339 training return: tensor(-1.1966e+13, device='cuda:0')
epoch: 85 test_true_pfm: -22.281913438111886
episode: 340 training return: tensor(-1.1982e+13, device='cuda:0')
episode: 341 training return: tensor(-3.5459e+12, device='cuda:0')
episode: 342 training return: tensor(-1.1062e+13, device='cuda:0')
episode: 343 training return: tensor(-9.2900e+12, device='cuda:0')
epoch: 86 test_true_pfm: -22.192413707003467
episode: 344 training return: tensor(-1.0550e+13, device='cuda:0')
episode: 345 training return: tensor(-9.0226e+12, device='cuda:0')
episode: 346 training return: tensor(-7.7893e+12, device='cuda:0')
episode: 347 training return: tensor(-8.8525e+12, device='cuda:0')
epoch: 87 test_true_pfm: -22.12406879976076
episode: 348 training return: tensor(-8.6085e+12, device='cuda:0')
episode: 349 training return: tensor(-1.0906e+13, device='cuda:0')
episode: 350 training return: tensor(-1.0315e+13, device='cuda:0')
episode: 351 training return: tensor(-9.5196e+12, device='cuda:0')
epoch: 88 test_true_pfm: -22.171460064588725
episode: 352 training return: tensor(-8.8006e+12, device='cuda:0')
episode: 353 training return: tensor(-1.1030e+13, device='cuda:0')
episode: 354 training return: tensor(-9.9087e+12, device='cuda:0')
episode: 355 training return: tensor(-1.2728e+13, device='cuda:0')
epoch: 89 test_true_pfm: -21.849331622698617
episode: 356 training return: tensor(-9.1035e+12, device='cuda:0')
episode: 357 training return: tensor(-1.1777e+13, device='cuda:0')
episode: 358 training return: tensor(-1.0168e+13, device='cuda:0')
episode: 359 training return: tensor(-1.0764e+13, device='cuda:0')
epoch: 90 test_true_pfm: -22.08692500394085
episode: 360 training return: tensor(-9.7808e+12, device='cuda:0')
episode: 361 training return: tensor(-8.9848e+12, device='cuda:0')
episode: 362 training return: tensor(-8.2564e+12, device='cuda:0')
episode: 363 training return: tensor(-1.2197e+13, device='cuda:0')
epoch: 91 test_true_pfm: -22.561657511935266
episode: 364 training return: tensor(-1.1412e+13, device='cuda:0')
episode: 365 training return: tensor(-1.2352e+13, device='cuda:0')
episode: 366 training return: tensor(-1.0251e+13, device='cuda:0')
episode: 367 training return: tensor(-1.2276e+13, device='cuda:0')
epoch: 92 test_true_pfm: 23.123807889858806
episode: 368 training return: tensor(-7.7020e+13, device='cuda:0')
episode: 369 training return: tensor(-9.8781e+12, device='cuda:0')
episode: 370 training return: tensor(-9.7885e+12, device='cuda:0')
episode: 371 training return: tensor(-8.3039e+12, device='cuda:0')
epoch: 93 test_true_pfm: -21.66846721755076
episode: 372 training return: tensor(-1.1073e+13, device='cuda:0')
episode: 373 training return: tensor(-1.1153e+13, device='cuda:0')
episode: 374 training return: tensor(-1.1267e+13, device='cuda:0')
episode: 375 training return: tensor(-1.2450e+13, device='cuda:0')
epoch: 94 test_true_pfm: -22.259852159376702
episode: 376 training return: tensor(-9.1380e+12, device='cuda:0')
episode: 377 training return: tensor(-1.1148e+13, device='cuda:0')
episode: 378 training return: tensor(-7.9894e+12, device='cuda:0')
episode: 379 training return: tensor(-1.2219e+13, device='cuda:0')
epoch: 95 test_true_pfm: -21.709630653899172
episode: 380 training return: tensor(-1.0714e+13, device='cuda:0')
episode: 381 training return: tensor(-7.7437e+12, device='cuda:0')
episode: 382 training return: tensor(-8.5645e+12, device='cuda:0')
episode: 383 training return: tensor(-1.0646e+13, device='cuda:0')
epoch: 96 test_true_pfm: -22.187600659828508
episode: 384 training return: tensor(-1.0420e+13, device='cuda:0')
episode: 385 training return: tensor(-1.0480e+13, device='cuda:0')
episode: 386 training return: tensor(-1.1958e+13, device='cuda:0')
episode: 387 training return: tensor(-9.1852e+12, device='cuda:0')
epoch: 97 test_true_pfm: -34.460668302971804
episode: 388 training return: tensor(-9.4233e+12, device='cuda:0')
episode: 389 training return: tensor(-8.1763e+12, device='cuda:0')
episode: 390 training return: tensor(-1.0632e+13, device='cuda:0')
episode: 391 training return: tensor(-8.9064e+12, device='cuda:0')
epoch: 98 test_true_pfm: -21.799028182401585
episode: 392 training return: tensor(-1.2335e+13, device='cuda:0')
episode: 393 training return: tensor(-1.0696e+13, device='cuda:0')
episode: 394 training return: tensor(-1.2434e+13, device='cuda:0')
episode: 395 training return: tensor(-9.9297e+12, device='cuda:0')
epoch: 99 test_true_pfm: -22.344895563169302
episode: 396 training return: tensor(-9.0340e+12, device='cuda:0')
episode: 397 training return: tensor(-1.2153e+13, device='cuda:0')
episode: 398 training return: tensor(-1.0390e+13, device='cuda:0')
episode: 399 training return: tensor(-1.1609e+13, device='cuda:0')
epoch: 100 test_true_pfm: -22.249223976186823
episode: 400 training return: tensor(-8.6903e+12, device='cuda:0')
episode: 401 training return: tensor(-1.1289e+13, device='cuda:0')
episode: 402 training return: tensor(-1.1418e+13, device='cuda:0')
episode: 403 training return: tensor(-9.9458e+12, device='cuda:0')
epoch: 101 test_true_pfm: -22.265003927462192
episode: 404 training return: tensor(-1.0253e+13, device='cuda:0')
episode: 405 training return: tensor(-9.0682e+12, device='cuda:0')
episode: 406 training return: tensor(-7.8000e+12, device='cuda:0')
episode: 407 training return: tensor(-1.1656e+13, device='cuda:0')
epoch: 102 test_true_pfm: -22.468977906508172
episode: 408 training return: tensor(-1.0584e+13, device='cuda:0')
episode: 409 training return: tensor(-9.1056e+12, device='cuda:0')
episode: 410 training return: tensor(-1.0913e+13, device='cuda:0')
episode: 411 training return: tensor(-9.7071e+12, device='cuda:0')
epoch: 103 test_true_pfm: -22.212194465356742
episode: 412 training return: tensor(-1.2572e+13, device='cuda:0')
episode: 413 training return: tensor(-8.0019e+12, device='cuda:0')
episode: 414 training return: tensor(-1.0907e+13, device='cuda:0')
episode: 415 training return: tensor(-4.9785e+19, device='cuda:0')
epoch: 104 test_true_pfm: -62.755828367438994
episode: 416 training return: tensor(-4.2852e+12, device='cuda:0')
episode: 417 training return: tensor(-9.7359e+12, device='cuda:0')
episode: 418 training return: tensor(-1.0065e+13, device='cuda:0')
episode: 419 training return: tensor(-9.8369e+12, device='cuda:0')
epoch: 105 test_true_pfm: -21.856902744153853
episode: 420 training return: tensor(-8.6356e+12, device='cuda:0')
episode: 421 training return: tensor(-8.9304e+12, device='cuda:0')
episode: 422 training return: tensor(-9.0189e+12, device='cuda:0')
episode: 423 training return: tensor(-1.2702e+13, device='cuda:0')
epoch: 106 test_true_pfm: -2.6264852539197125
episode: 424 training return: tensor(-1.1710e+13, device='cuda:0')
episode: 425 training return: tensor(-2.4833e+14, device='cuda:0')
episode: 426 training return: tensor(-8.9193e+12, device='cuda:0')
episode: 427 training return: tensor(-9.0164e+12, device='cuda:0')
epoch: 107 test_true_pfm: -22.907426615694828
episode: 428 training return: tensor(-8.0406e+12, device='cuda:0')
episode: 429 training return: tensor(-1.1044e+13, device='cuda:0')
episode: 430 training return: tensor(-1.0764e+13, device='cuda:0')
episode: 431 training return: tensor(-8.4771e+12, device='cuda:0')
epoch: 108 test_true_pfm: -22.35622083909124
episode: 432 training return: tensor(-9.6446e+12, device='cuda:0')
episode: 433 training return: tensor(-8.6988e+12, device='cuda:0')
episode: 434 training return: tensor(-1.2175e+13, device='cuda:0')
episode: 435 training return: tensor(-1.0026e+13, device='cuda:0')
epoch: 109 test_true_pfm: -22.370725227994086
episode: 436 training return: tensor(-1.1828e+13, device='cuda:0')
episode: 437 training return: tensor(-8.7464e+12, device='cuda:0')
episode: 438 training return: tensor(-1.1755e+13, device='cuda:0')
episode: 439 training return: tensor(-1.2184e+13, device='cuda:0')
epoch: 110 test_true_pfm: -22.256144036947276
episode: 440 training return: tensor(-7.8004e+12, device='cuda:0')
episode: 441 training return: tensor(-1.0941e+13, device='cuda:0')
episode: 442 training return: tensor(-9.2576e+12, device='cuda:0')
episode: 443 training return: tensor(-9.2579e+12, device='cuda:0')
epoch: 111 test_true_pfm: -21.450340788825013
episode: 444 training return: tensor(-8.6936e+12, device='cuda:0')
episode: 445 training return: tensor(-8.1932e+12, device='cuda:0')
episode: 446 training return: tensor(-9.1616e+12, device='cuda:0')
episode: 447 training return: tensor(-8.4526e+12, device='cuda:0')
epoch: 112 test_true_pfm: -21.89533001165853
episode: 448 training return: tensor(-8.8166e+12, device='cuda:0')
episode: 449 training return: tensor(-8.6969e+12, device='cuda:0')
episode: 450 training return: tensor(-8.0867e+12, device='cuda:0')
episode: 451 training return: tensor(-1.0567e+13, device='cuda:0')
epoch: 113 test_true_pfm: -22.306556648978255
episode: 452 training return: tensor(-1.2530e+13, device='cuda:0')
episode: 453 training return: tensor(-1.1782e+13, device='cuda:0')
episode: 454 training return: tensor(-1.1745e+13, device='cuda:0')
episode: 455 training return: tensor(-8.3879e+12, device='cuda:0')
epoch: 114 test_true_pfm: -21.972578011622897
episode: 456 training return: tensor(-1.0601e+13, device='cuda:0')
episode: 457 training return: tensor(-8.1095e+12, device='cuda:0')
episode: 458 training return: tensor(-1.1948e+13, device='cuda:0')
episode: 459 training return: tensor(-8.8755e+12, device='cuda:0')
epoch: 115 test_true_pfm: -22.520611995183454
episode: 460 training return: tensor(-1.1967e+13, device='cuda:0')
episode: 461 training return: tensor(-1.0853e+13, device='cuda:0')
episode: 462 training return: tensor(-9.5701e+12, device='cuda:0')
episode: 463 training return: tensor(-8.7989e+12, device='cuda:0')
epoch: 116 test_true_pfm: -22.31855745208959
episode: 464 training return: tensor(-9.6016e+12, device='cuda:0')
episode: 465 training return: tensor(-9.3528e+12, device='cuda:0')
episode: 466 training return: tensor(-1.0969e+13, device='cuda:0')
episode: 467 training return: tensor(-1.2027e+13, device='cuda:0')
epoch: 117 test_true_pfm: -21.935838421769926
episode: 468 training return: tensor(-3.0984e+14, device='cuda:0')
episode: 469 training return: tensor(-8.7298e+12, device='cuda:0')
episode: 470 training return: tensor(-8.5291e+12, device='cuda:0')
episode: 471 training return: tensor(-1.0100e+13, device='cuda:0')
epoch: 118 test_true_pfm: -22.343153812902575
episode: 472 training return: tensor(-1.1656e+13, device='cuda:0')
episode: 473 training return: tensor(-8.9824e+12, device='cuda:0')
episode: 474 training return: tensor(-1.0244e+13, device='cuda:0')
episode: 475 training return: tensor(-1.0838e+13, device='cuda:0')
epoch: 119 test_true_pfm: -22.111494983007635
episode: 476 training return: tensor(-8.9108e+12, device='cuda:0')
episode: 477 training return: tensor(-9.3906e+12, device='cuda:0')
episode: 478 training return: tensor(-1.0299e+13, device='cuda:0')
episode: 479 training return: tensor(-7.8572e+12, device='cuda:0')
epoch: 120 test_true_pfm: -22.12701444232104
episode: 480 training return: tensor(-8.7658e+12, device='cuda:0')
episode: 481 training return: tensor(-1.2506e+13, device='cuda:0')
episode: 482 training return: tensor(-9.3550e+12, device='cuda:0')
episode: 483 training return: tensor(-9.5187e+12, device='cuda:0')
epoch: 121 test_true_pfm: -22.40001176980839
episode: 484 training return: tensor(-1.1550e+13, device='cuda:0')
episode: 485 training return: tensor(-9.0175e+12, device='cuda:0')
episode: 486 training return: tensor(-7.8254e+12, device='cuda:0')
episode: 487 training return: tensor(-1.0048e+13, device='cuda:0')
epoch: 122 test_true_pfm: -22.188591046047083
episode: 488 training return: tensor(-1.1206e+13, device='cuda:0')
episode: 489 training return: tensor(-1.0890e+13, device='cuda:0')
episode: 490 training return: tensor(-9.7575e+12, device='cuda:0')
episode: 491 training return: tensor(-1.0268e+13, device='cuda:0')
epoch: 123 test_true_pfm: -22.642635631227837
episode: 492 training return: tensor(-1.1843e+13, device='cuda:0')
episode: 493 training return: tensor(-1.0589e+13, device='cuda:0')
episode: 494 training return: tensor(-1.0096e+13, device='cuda:0')
episode: 495 training return: tensor(-1.0029e+13, device='cuda:0')
epoch: 124 test_true_pfm: -21.733388688682442
episode: 496 training return: tensor(-1.1207e+13, device='cuda:0')
episode: 497 training return: tensor(-1.1984e+13, device='cuda:0')
episode: 498 training return: tensor(-1.1915e+13, device='cuda:0')
episode: 499 training return: tensor(-1.2092e+13, device='cuda:0')
epoch: 125 test_true_pfm: -22.763717928376597
episode: 500 training return: tensor(-8.4757e+12, device='cuda:0')
episode: 501 training return: tensor(-1.1122e+13, device='cuda:0')
episode: 502 training return: tensor(-9.3307e+12, device='cuda:0')
episode: 503 training return: tensor(-1.0421e+13, device='cuda:0')
epoch: 126 test_true_pfm: -22.097358930673604
episode: 504 training return: tensor(-1.2752e+13, device='cuda:0')
episode: 505 training return: tensor(-8.9628e+12, device='cuda:0')
episode: 506 training return: tensor(-9.2581e+12, device='cuda:0')
episode: 507 training return: tensor(-1.1853e+13, device='cuda:0')
epoch: 127 test_true_pfm: -21.97304449534471
episode: 508 training return: tensor(-1.1945e+13, device='cuda:0')
episode: 509 training return: tensor(-9.0955e+12, device='cuda:0')
episode: 510 training return: tensor(-1.0672e+13, device='cuda:0')
episode: 511 training return: tensor(-8.7393e+12, device='cuda:0')
epoch: 128 test_true_pfm: -22.277908545495492
episode: 512 training return: tensor(-1.0736e+13, device='cuda:0')
episode: 513 training return: tensor(-8.3666e+12, device='cuda:0')
episode: 514 training return: tensor(-8.4830e+12, device='cuda:0')
episode: 515 training return: tensor(-1.1159e+13, device='cuda:0')
epoch: 129 test_true_pfm: -22.210344868214424
episode: 516 training return: tensor(-8.3853e+12, device='cuda:0')
episode: 517 training return: tensor(-8.7359e+12, device='cuda:0')
episode: 518 training return: tensor(-9.8821e+12, device='cuda:0')
episode: 519 training return: tensor(-8.3543e+12, device='cuda:0')
epoch: 130 test_true_pfm: -22.354387902519658
episode: 520 training return: tensor(-1.1008e+13, device='cuda:0')
episode: 521 training return: tensor(-1.0150e+13, device='cuda:0')
episode: 522 training return: tensor(-1.0362e+13, device='cuda:0')
episode: 523 training return: tensor(-1.0631e+13, device='cuda:0')
epoch: 131 test_true_pfm: -22.091033813663
episode: 524 training return: tensor(-9.0463e+12, device='cuda:0')
episode: 525 training return: tensor(-9.2652e+12, device='cuda:0')
episode: 526 training return: tensor(-1.2105e+13, device='cuda:0')
episode: 527 training return: tensor(-1.2014e+13, device='cuda:0')
epoch: 132 test_true_pfm: -22.161161517334534
episode: 528 training return: tensor(-9.0602e+12, device='cuda:0')
episode: 529 training return: tensor(-1.0211e+13, device='cuda:0')
episode: 530 training return: tensor(-9.1063e+12, device='cuda:0')
episode: 531 training return: tensor(-9.4001e+12, device='cuda:0')
epoch: 133 test_true_pfm: -21.996735311564645
episode: 532 training return: tensor(-9.8581e+12, device='cuda:0')
episode: 533 training return: tensor(-1.0592e+13, device='cuda:0')
episode: 534 training return: tensor(-8.7299e+12, device='cuda:0')
episode: 535 training return: tensor(-9.6701e+12, device='cuda:0')
epoch: 134 test_true_pfm: -22.265882597318427
episode: 536 training return: tensor(-1.2032e+13, device='cuda:0')
episode: 537 training return: tensor(-9.2111e+12, device='cuda:0')
episode: 538 training return: tensor(-1.1242e+13, device='cuda:0')
episode: 539 training return: tensor(-1.2772e+13, device='cuda:0')
epoch: 135 test_true_pfm: -22.154552085101358
episode: 540 training return: tensor(-1.0592e+13, device='cuda:0')
episode: 541 training return: tensor(-1.0030e+13, device='cuda:0')
episode: 542 training return: tensor(-8.7022e+12, device='cuda:0')
episode: 543 training return: tensor(-8.8144e+12, device='cuda:0')
epoch: 136 test_true_pfm: -21.59420149271392
episode: 544 training return: tensor(-1.0929e+13, device='cuda:0')
episode: 545 training return: tensor(-1.2067e+13, device='cuda:0')
episode: 546 training return: tensor(-1.0986e+13, device='cuda:0')
episode: 547 training return: tensor(-1.2130e+13, device='cuda:0')
epoch: 137 test_true_pfm: -21.815210951199088
episode: 548 training return: tensor(-8.7983e+12, device='cuda:0')
episode: 549 training return: tensor(-8.9751e+12, device='cuda:0')
episode: 550 training return: tensor(-8.8930e+12, device='cuda:0')
episode: 551 training return: tensor(-1.1428e+13, device='cuda:0')
epoch: 138 test_true_pfm: -22.481694812616112
episode: 552 training return: tensor(-1.2291e+13, device='cuda:0')
episode: 553 training return: tensor(-1.2301e+13, device='cuda:0')
episode: 554 training return: tensor(-9.2878e+12, device='cuda:0')
episode: 555 training return: tensor(-9.9266e+12, device='cuda:0')
epoch: 139 test_true_pfm: -22.188962866895267
episode: 556 training return: tensor(-1.2693e+13, device='cuda:0')
episode: 557 training return: tensor(-9.7358e+12, device='cuda:0')
episode: 558 training return: tensor(-8.8616e+12, device='cuda:0')
episode: 559 training return: tensor(-8.8654e+12, device='cuda:0')
epoch: 140 test_true_pfm: -21.936700746491468
episode: 560 training return: tensor(-8.7853e+12, device='cuda:0')
episode: 561 training return: tensor(-1.1739e+13, device='cuda:0')
episode: 562 training return: tensor(-1.1126e+13, device='cuda:0')
episode: 563 training return: tensor(-7.8264e+12, device='cuda:0')
epoch: 141 test_true_pfm: -21.708316005047084
episode: 564 training return: tensor(-1.0303e+13, device='cuda:0')
episode: 565 training return: tensor(-1.8039e+13, device='cuda:0')
episode: 566 training return: tensor(-9.1905e+12, device='cuda:0')
episode: 567 training return: tensor(-1.0626e+13, device='cuda:0')
epoch: 142 test_true_pfm: -22.490142706393197
episode: 568 training return: tensor(-1.1451e+13, device='cuda:0')
episode: 569 training return: tensor(-9.0364e+12, device='cuda:0')
episode: 570 training return: tensor(-1.2300e+13, device='cuda:0')
episode: 571 training return: tensor(-8.5635e+12, device='cuda:0')
epoch: 143 test_true_pfm: -22.145195309773047
episode: 572 training return: tensor(-9.1107e+12, device='cuda:0')
episode: 573 training return: tensor(-8.4746e+12, device='cuda:0')
episode: 574 training return: tensor(-1.1514e+13, device='cuda:0')
episode: 575 training return: tensor(-1.2521e+13, device='cuda:0')
epoch: 144 test_true_pfm: -22.211096984362836
episode: 576 training return: tensor(-1.1663e+13, device='cuda:0')
episode: 577 training return: tensor(-8.0209e+12, device='cuda:0')
episode: 578 training return: tensor(-1.2429e+13, device='cuda:0')
episode: 579 training return: tensor(-1.2735e+13, device='cuda:0')
epoch: 145 test_true_pfm: -22.459349623269674
episode: 580 training return: tensor(-1.0167e+13, device='cuda:0')
episode: 581 training return: tensor(-1.0543e+13, device='cuda:0')
episode: 582 training return: tensor(-1.2138e+13, device='cuda:0')
episode: 583 training return: tensor(-9.5011e+12, device='cuda:0')
epoch: 146 test_true_pfm: 10.715777103808321
episode: 584 training return: tensor(-4.4262e+13, device='cuda:0')
episode: 585 training return: tensor(-1.2248e+13, device='cuda:0')
episode: 586 training return: tensor(-1.4612e+13, device='cuda:0')
episode: 587 training return: tensor(-8.1537e+12, device='cuda:0')
epoch: 147 test_true_pfm: -22.087045316132418
episode: 588 training return: tensor(-1.1935e+13, device='cuda:0')
episode: 589 training return: tensor(-1.1938e+13, device='cuda:0')
episode: 590 training return: tensor(-8.4720e+12, device='cuda:0')
episode: 591 training return: tensor(-1.0383e+13, device='cuda:0')
epoch: 148 test_true_pfm: -21.871448301774368
episode: 592 training return: tensor(-1.0193e+13, device='cuda:0')
episode: 593 training return: tensor(-9.2126e+12, device='cuda:0')
episode: 594 training return: tensor(-1.0578e+13, device='cuda:0')
episode: 595 training return: tensor(-1.6764e+13, device='cuda:0')
epoch: 149 test_true_pfm: -21.89970151883816
episode: 596 training return: tensor(-1.2845e+13, device='cuda:0')
episode: 597 training return: tensor(-1.0936e+13, device='cuda:0')
episode: 598 training return: tensor(-1.1992e+13, device='cuda:0')
episode: 599 training return: tensor(-8.2849e+12, device='cuda:0')
epoch: 150 test_true_pfm: -22.129757974205802
