['--alg', 'ddpg', '--env', 'Swimmer-v3', '--learn', 'baseline', '--traj', 'expert', '--seed', '3']
episode: 0 training return: tensor(420.2092, device='cuda:0')
episode: 1 training return: tensor(233.6905, device='cuda:0')
episode: 2 training return: tensor(141.0090, device='cuda:0')
episode: 3 training return: tensor(270.0616, device='cuda:0')
epoch: 1 test_true_pfm: 25.13544659885713 sim_pfm: 955.1754745453596
episode: 4 training return: tensor(348.1195, device='cuda:0')
episode: 5 training return: tensor(460.4494, device='cuda:0')
episode: 6 training return: tensor(297.5872, device='cuda:0')
episode: 7 training return: tensor(391.5028, device='cuda:0')
epoch: 2 test_true_pfm: 25.10423306273561 sim_pfm: 953.757174295187
episode: 8 training return: tensor(276.8278, device='cuda:0')
episode: 9 training return: tensor(273.2489, device='cuda:0')
episode: 10 training return: tensor(942.6498, device='cuda:0')
episode: 11 training return: tensor(950.5372, device='cuda:0')
epoch: 3 test_true_pfm: 9.986708978562794 sim_pfm: 935.1003453597426
episode: 12 training return: tensor(837.2420, device='cuda:0')
episode: 13 training return: tensor(945.6034, device='cuda:0')
episode: 14 training return: tensor(916.5554, device='cuda:0')
episode: 15 training return: tensor(268.6932, device='cuda:0')
epoch: 4 test_true_pfm: 2.4400874933787 sim_pfm: -4.843152661318891
episode: 16 training return: tensor(-149.2942, device='cuda:0')
episode: 17 training return: tensor(972.8378, device='cuda:0')
episode: 18 training return: tensor(376.9874, device='cuda:0')
episode: 19 training return: tensor(972.5593, device='cuda:0')
epoch: 5 test_true_pfm: 29.97019811674063 sim_pfm: 721.7785499456804
episode: 20 training return: tensor(747.2514, device='cuda:0')
episode: 21 training return: tensor(442.1512, device='cuda:0')
episode: 22 training return: tensor(969.1335, device='cuda:0')
episode: 23 training return: tensor(588.6341, device='cuda:0')
epoch: 6 test_true_pfm: 15.354540682069176 sim_pfm: -99.10204129014164
episode: 24 training return: tensor(642.8253, device='cuda:0')
episode: 25 training return: tensor(447.5311, device='cuda:0')
episode: 26 training return: tensor(982.3126, device='cuda:0')
episode: 27 training return: tensor(990.4711, device='cuda:0')
epoch: 7 test_true_pfm: -14.056348658344834 sim_pfm: 987.274985177815
episode: 28 training return: tensor(983.8700, device='cuda:0')
episode: 29 training return: tensor(992.6381, device='cuda:0')
episode: 30 training return: tensor(994.4056, device='cuda:0')
episode: 31 training return: tensor(363.5944, device='cuda:0')
epoch: 8 test_true_pfm: -2.1235423276605507 sim_pfm: 988.1198473930359
episode: 32 training return: tensor(989.0449, device='cuda:0')
episode: 33 training return: tensor(945.3677, device='cuda:0')
episode: 34 training return: tensor(566.4927, device='cuda:0')
episode: 35 training return: tensor(707.3575, device='cuda:0')
epoch: 9 test_true_pfm: -0.7313240444155795 sim_pfm: 921.4056587615981
episode: 36 training return: tensor(976.8608, device='cuda:0')
episode: 37 training return: tensor(773.9161, device='cuda:0')
episode: 38 training return: tensor(981.1154, device='cuda:0')
episode: 39 training return: tensor(989.3531, device='cuda:0')
epoch: 10 test_true_pfm: 2.0504694229646363 sim_pfm: 993.1904491961002
episode: 40 training return: tensor(798.3002, device='cuda:0')
episode: 41 training return: tensor(991.4003, device='cuda:0')
episode: 42 training return: tensor(986.4159, device='cuda:0')
episode: 43 training return: tensor(995.0656, device='cuda:0')
epoch: 11 test_true_pfm: -3.5807901830068944 sim_pfm: 994.1756378889083
episode: 44 training return: tensor(994.5138, device='cuda:0')
episode: 45 training return: tensor(981.0840, device='cuda:0')
episode: 46 training return: tensor(994.2230, device='cuda:0')
episode: 47 training return: tensor(921.0732, device='cuda:0')
epoch: 12 test_true_pfm: 1.4815372933809072 sim_pfm: 943.7650246826931
episode: 48 training return: tensor(974.1689, device='cuda:0')
episode: 49 training return: tensor(994.4924, device='cuda:0')
episode: 50 training return: tensor(990.4051, device='cuda:0')
episode: 51 training return: tensor(943.9711, device='cuda:0')
epoch: 13 test_true_pfm: -20.04213066208576 sim_pfm: 989.9366072612814
episode: 52 training return: tensor(990.4451, device='cuda:0')
episode: 53 training return: tensor(943.9745, device='cuda:0')
episode: 54 training return: tensor(993.0657, device='cuda:0')
episode: 55 training return: tensor(969.9189, device='cuda:0')
epoch: 14 test_true_pfm: 18.141813348990315 sim_pfm: 951.1300372004509
episode: 56 training return: tensor(944.5070, device='cuda:0')
episode: 57 training return: tensor(944.1777, device='cuda:0')
episode: 58 training return: tensor(943.3243, device='cuda:0')
episode: 59 training return: tensor(996.0727, device='cuda:0')
epoch: 15 test_true_pfm: -11.872981506136476 sim_pfm: 996.1348598182201
episode: 60 training return: tensor(994.7561, device='cuda:0')
episode: 61 training return: tensor(992.0009, device='cuda:0')
episode: 62 training return: tensor(996.8391, device='cuda:0')
episode: 63 training return: tensor(996.5228, device='cuda:0')
epoch: 16 test_true_pfm: -5.8139447170322285 sim_pfm: 996.2094713449478
episode: 64 training return: tensor(995.7697, device='cuda:0')
episode: 65 training return: tensor(943.0109, device='cuda:0')
episode: 66 training return: tensor(994.4294, device='cuda:0')
episode: 67 training return: tensor(995.0925, device='cuda:0')
epoch: 17 test_true_pfm: -0.9810863977022457 sim_pfm: 995.9606887340545
episode: 68 training return: tensor(995.7125, device='cuda:0')
episode: 69 training return: tensor(994.4553, device='cuda:0')
episode: 70 training return: tensor(995.3878, device='cuda:0')
episode: 71 training return: tensor(995.7255, device='cuda:0')
epoch: 18 test_true_pfm: -13.350568435553702 sim_pfm: 992.9420165419579
episode: 72 training return: tensor(995.6037, device='cuda:0')
episode: 73 training return: tensor(994.9188, device='cuda:0')
episode: 74 training return: tensor(997.0677, device='cuda:0')
episode: 75 training return: tensor(993.9780, device='cuda:0')
epoch: 19 test_true_pfm: -11.00837536151956 sim_pfm: 997.2586518406868
episode: 76 training return: tensor(997.0188, device='cuda:0')
episode: 77 training return: tensor(942.6466, device='cuda:0')
episode: 78 training return: tensor(956.1212, device='cuda:0')
episode: 79 training return: tensor(948.4051, device='cuda:0')
epoch: 20 test_true_pfm: -7.682204904201518 sim_pfm: 979.5535510420799
episode: 80 training return: tensor(991.2992, device='cuda:0')
episode: 81 training return: tensor(996.2640, device='cuda:0')
episode: 82 training return: tensor(996.0117, device='cuda:0')
episode: 83 training return: tensor(973.5911, device='cuda:0')
epoch: 21 test_true_pfm: -5.78707546065049 sim_pfm: 994.8389805495739
episode: 84 training return: tensor(994.7822, device='cuda:0')
episode: 85 training return: tensor(995.3757, device='cuda:0')
episode: 86 training return: tensor(994.2453, device='cuda:0')
episode: 87 training return: tensor(996.4528, device='cuda:0')
epoch: 22 test_true_pfm: 0.8341912250363184 sim_pfm: 996.4439657002688
episode: 88 training return: tensor(993.9946, device='cuda:0')
episode: 89 training return: tensor(996.3868, device='cuda:0')
episode: 90 training return: tensor(993.4734, device='cuda:0')
episode: 91 training return: tensor(991.5973, device='cuda:0')
epoch: 23 test_true_pfm: -1.5571685800870723 sim_pfm: 995.0163800120354
episode: 92 training return: tensor(993.7856, device='cuda:0')
episode: 93 training return: tensor(995.1400, device='cuda:0')
episode: 94 training return: tensor(995.7203, device='cuda:0')
episode: 95 training return: tensor(995.3726, device='cuda:0')
epoch: 24 test_true_pfm: -3.7869815423813824 sim_pfm: 996.2941382937133
episode: 96 training return: tensor(996.1898, device='cuda:0')
episode: 97 training return: tensor(961.7470, device='cuda:0')
episode: 98 training return: tensor(963.4686, device='cuda:0')
episode: 99 training return: tensor(968.4707, device='cuda:0')
epoch: 25 test_true_pfm: -12.889617300435555 sim_pfm: 971.481898611784
episode: 100 training return: tensor(962.7523, device='cuda:0')
episode: 101 training return: tensor(966.1242, device='cuda:0')
episode: 102 training return: tensor(969.4901, device='cuda:0')
episode: 103 training return: tensor(955.6085, device='cuda:0')
epoch: 26 test_true_pfm: -10.510623741506285 sim_pfm: 961.8729692459107
episode: 104 training return: tensor(957.6970, device='cuda:0')
episode: 105 training return: tensor(934.0067, device='cuda:0')
episode: 106 training return: tensor(956.6555, device='cuda:0')
episode: 107 training return: tensor(989.4350, device='cuda:0')
epoch: 27 test_true_pfm: -15.304494983635076 sim_pfm: 990.6857213079929
episode: 108 training return: tensor(981.5391, device='cuda:0')
episode: 109 training return: tensor(997.4070, device='cuda:0')
episode: 110 training return: tensor(996.6505, device='cuda:0')
episode: 111 training return: tensor(994.3946, device='cuda:0')
epoch: 28 test_true_pfm: -12.33409301582007 sim_pfm: 995.9513562962413
episode: 112 training return: tensor(994.9554, device='cuda:0')
episode: 113 training return: tensor(990.2187, device='cuda:0')
episode: 114 training return: tensor(993.6614, device='cuda:0')
episode: 115 training return: tensor(987.5244, device='cuda:0')
epoch: 29 test_true_pfm: -13.12689167993256 sim_pfm: 975.1882638335228
episode: 116 training return: tensor(995.2951, device='cuda:0')
episode: 117 training return: tensor(987.7184, device='cuda:0')
episode: 118 training return: tensor(981.0243, device='cuda:0')
episode: 119 training return: tensor(985.1948, device='cuda:0')
epoch: 30 test_true_pfm: -9.36175430816112 sim_pfm: 987.7383760929108
episode: 120 training return: tensor(990.5704, device='cuda:0')
episode: 121 training return: tensor(992.6022, device='cuda:0')
episode: 122 training return: tensor(993.8418, device='cuda:0')
episode: 123 training return: tensor(993.7379, device='cuda:0')
epoch: 31 test_true_pfm: 6.095817582390153 sim_pfm: 994.9643735542893
episode: 124 training return: tensor(991.3553, device='cuda:0')
episode: 125 training return: tensor(993.3664, device='cuda:0')
episode: 126 training return: tensor(992.6653, device='cuda:0')
episode: 127 training return: tensor(991.6674, device='cuda:0')
epoch: 32 test_true_pfm: 0.3737188494204225 sim_pfm: 975.400352114439
episode: 128 training return: tensor(986.9845, device='cuda:0')
episode: 129 training return: tensor(995.5148, device='cuda:0')
episode: 130 training return: tensor(988.6151, device='cuda:0')
episode: 131 training return: tensor(994.6395, device='cuda:0')
epoch: 33 test_true_pfm: 4.243908691277801 sim_pfm: 992.2262719273567
episode: 132 training return: tensor(995.0490, device='cuda:0')
episode: 133 training return: tensor(989.7983, device='cuda:0')
episode: 134 training return: tensor(992.3950, device='cuda:0')
episode: 135 training return: tensor(991.8242, device='cuda:0')
epoch: 34 test_true_pfm: -13.939723275627818 sim_pfm: 991.3467826839536
episode: 136 training return: tensor(994.3978, device='cuda:0')
episode: 137 training return: tensor(992.5859, device='cuda:0')
episode: 138 training return: tensor(992.6098, device='cuda:0')
episode: 139 training return: tensor(993.1163, device='cuda:0')
epoch: 35 test_true_pfm: 2.2166569901853546 sim_pfm: 991.507518517971
episode: 140 training return: tensor(989.9449, device='cuda:0')
episode: 141 training return: tensor(988.5391, device='cuda:0')
episode: 142 training return: tensor(980.6609, device='cuda:0')
episode: 143 training return: tensor(974.9022, device='cuda:0')
epoch: 36 test_true_pfm: -11.880775096933364 sim_pfm: 983.3136409893632
episode: 144 training return: tensor(985.0634, device='cuda:0')
episode: 145 training return: tensor(996.0504, device='cuda:0')
episode: 146 training return: tensor(990.0906, device='cuda:0')
episode: 147 training return: tensor(990.4493, device='cuda:0')
epoch: 37 test_true_pfm: -9.05292747929237 sim_pfm: 995.2129457950592
episode: 148 training return: tensor(995.2778, device='cuda:0')
episode: 149 training return: tensor(996.5729, device='cuda:0')
episode: 150 training return: tensor(996.8164, device='cuda:0')
episode: 151 training return: tensor(996.2764, device='cuda:0')
epoch: 38 test_true_pfm: -2.3411188170569934 sim_pfm: 995.3985771507025
episode: 152 training return: tensor(995.1014, device='cuda:0')
episode: 153 training return: tensor(993.3815, device='cuda:0')
episode: 154 training return: tensor(987.2772, device='cuda:0')
episode: 155 training return: tensor(985.6017, device='cuda:0')
epoch: 39 test_true_pfm: 16.1863656057645 sim_pfm: 950.5664500018581
episode: 156 training return: tensor(942.0379, device='cuda:0')
episode: 157 training return: tensor(943.0074, device='cuda:0')
episode: 158 training return: tensor(971.7817, device='cuda:0')
episode: 159 training return: tensor(992.4581, device='cuda:0')
epoch: 40 test_true_pfm: -4.191532930243012 sim_pfm: 995.807698494196
episode: 160 training return: tensor(970.0792, device='cuda:0')
episode: 161 training return: tensor(995.4407, device='cuda:0')
episode: 162 training return: tensor(990.0454, device='cuda:0')
episode: 163 training return: tensor(990.6463, device='cuda:0')
epoch: 41 test_true_pfm: 4.835180897860876 sim_pfm: 984.3880789920688
episode: 164 training return: tensor(963.3536, device='cuda:0')
episode: 165 training return: tensor(948.4192, device='cuda:0')
episode: 166 training return: tensor(953.8480, device='cuda:0')
episode: 167 training return: tensor(996.2256, device='cuda:0')
epoch: 42 test_true_pfm: 22.9550535504118 sim_pfm: 976.4642926307395
episode: 168 training return: tensor(994.3609, device='cuda:0')
episode: 169 training return: tensor(986.7061, device='cuda:0')
episode: 170 training return: tensor(990.7064, device='cuda:0')
episode: 171 training return: tensor(993.0362, device='cuda:0')
epoch: 43 test_true_pfm: 2.4219973904997154 sim_pfm: 994.5497009754181
episode: 172 training return: tensor(994.0480, device='cuda:0')
episode: 173 training return: tensor(994.9443, device='cuda:0')
episode: 174 training return: tensor(995.6574, device='cuda:0')
episode: 175 training return: tensor(995.5808, device='cuda:0')
epoch: 44 test_true_pfm: -1.355033727125599 sim_pfm: 995.7385377695784
episode: 176 training return: tensor(994.9018, device='cuda:0')
episode: 177 training return: tensor(990.8978, device='cuda:0')
episode: 178 training return: tensor(994.5936, device='cuda:0')
episode: 179 training return: tensor(991.1815, device='cuda:0')
epoch: 45 test_true_pfm: 3.5559599030440125 sim_pfm: 994.1106242716312
episode: 180 training return: tensor(986.7060, device='cuda:0')
episode: 181 training return: tensor(977.8561, device='cuda:0')
episode: 182 training return: tensor(979.9592, device='cuda:0')
episode: 183 training return: tensor(993.4979, device='cuda:0')
epoch: 46 test_true_pfm: 4.945112624058659 sim_pfm: 993.7438221603632
episode: 184 training return: tensor(988.7900, device='cuda:0')
episode: 185 training return: tensor(919.3375, device='cuda:0')
episode: 186 training return: tensor(996.7787, device='cuda:0')
episode: 187 training return: tensor(992.7521, device='cuda:0')
epoch: 47 test_true_pfm: -4.244058823986512 sim_pfm: 991.90120241642
episode: 188 training return: tensor(965.5941, device='cuda:0')
episode: 189 training return: tensor(995.6562, device='cuda:0')
episode: 190 training return: tensor(996.0969, device='cuda:0')
episode: 191 training return: tensor(991.8555, device='cuda:0')
epoch: 48 test_true_pfm: -3.61094305768448 sim_pfm: 962.3671182811261
episode: 192 training return: tensor(955.7479, device='cuda:0')
episode: 193 training return: tensor(941.8886, device='cuda:0')
episode: 194 training return: tensor(987.0272, device='cuda:0')
episode: 195 training return: tensor(991.3130, device='cuda:0')
epoch: 49 test_true_pfm: -18.132320348642786 sim_pfm: 980.5326630473137
episode: 196 training return: tensor(965.0116, device='cuda:0')
episode: 197 training return: tensor(977.7627, device='cuda:0')
episode: 198 training return: tensor(876.9493, device='cuda:0')
episode: 199 training return: tensor(990.7244, device='cuda:0')
epoch: 50 test_true_pfm: -1.9926802463072018 sim_pfm: 988.804907296598
episode: 200 training return: tensor(995.7329, device='cuda:0')
episode: 201 training return: tensor(996.3502, device='cuda:0')
episode: 202 training return: tensor(984.9825, device='cuda:0')
episode: 203 training return: tensor(995.5941, device='cuda:0')
epoch: 51 test_true_pfm: -15.051078115083175 sim_pfm: 995.3289652884007
episode: 204 training return: tensor(993.5202, device='cuda:0')
episode: 205 training return: tensor(994.9893, device='cuda:0')
episode: 206 training return: tensor(995.2159, device='cuda:0')
episode: 207 training return: tensor(992.6744, device='cuda:0')
epoch: 52 test_true_pfm: -1.860998297879798 sim_pfm: 995.2039920985699
episode: 208 training return: tensor(992.2034, device='cuda:0')
episode: 209 training return: tensor(993.9397, device='cuda:0')
episode: 210 training return: tensor(991.9976, device='cuda:0')
episode: 211 training return: tensor(980.8857, device='cuda:0')
epoch: 53 test_true_pfm: 2.7188861085609206 sim_pfm: 992.9024338006973
episode: 212 training return: tensor(989.4720, device='cuda:0')
episode: 213 training return: tensor(990.4693, device='cuda:0')
episode: 214 training return: tensor(968.4946, device='cuda:0')
episode: 215 training return: tensor(978.1137, device='cuda:0')
epoch: 54 test_true_pfm: -3.5926861743238163 sim_pfm: 986.5581186890602
episode: 216 training return: tensor(989.9254, device='cuda:0')
episode: 217 training return: tensor(983.9884, device='cuda:0')
episode: 218 training return: tensor(993.0590, device='cuda:0')
episode: 219 training return: tensor(988.4999, device='cuda:0')
epoch: 55 test_true_pfm: -10.780707688092095 sim_pfm: 995.9631772637367
episode: 220 training return: tensor(993.0750, device='cuda:0')
episode: 221 training return: tensor(995.0108, device='cuda:0')
episode: 222 training return: tensor(995.3043, device='cuda:0')
episode: 223 training return: tensor(991.7322, device='cuda:0')
epoch: 56 test_true_pfm: -3.717139190265482 sim_pfm: 991.1088463842868
episode: 224 training return: tensor(993.5723, device='cuda:0')
episode: 225 training return: tensor(989.7006, device='cuda:0')
episode: 226 training return: tensor(990.3797, device='cuda:0')
episode: 227 training return: tensor(981.4113, device='cuda:0')
epoch: 57 test_true_pfm: -4.890851779705665 sim_pfm: 986.4303171157837
episode: 228 training return: tensor(994.1386, device='cuda:0')
episode: 229 training return: tensor(993.4352, device='cuda:0')
episode: 230 training return: tensor(990.7060, device='cuda:0')
episode: 231 training return: tensor(985.1951, device='cuda:0')
epoch: 58 test_true_pfm: 0.6484884285303456 sim_pfm: 980.133259416651
episode: 232 training return: tensor(976.3082, device='cuda:0')
episode: 233 training return: tensor(973.0844, device='cuda:0')
episode: 234 training return: tensor(973.0956, device='cuda:0')
episode: 235 training return: tensor(969.9262, device='cuda:0')
epoch: 59 test_true_pfm: 1.7244982438187695 sim_pfm: 982.7812305927276
episode: 236 training return: tensor(994.1242, device='cuda:0')
episode: 237 training return: tensor(995.1921, device='cuda:0')
episode: 238 training return: tensor(992.7371, device='cuda:0')
episode: 239 training return: tensor(994.8101, device='cuda:0')
epoch: 60 test_true_pfm: 0.1887605055516869 sim_pfm: 995.4942038714886
episode: 240 training return: tensor(993.2258, device='cuda:0')
episode: 241 training return: tensor(992.7285, device='cuda:0')
episode: 242 training return: tensor(975.7070, device='cuda:0')
episode: 243 training return: tensor(993.4982, device='cuda:0')
epoch: 61 test_true_pfm: -2.604283528035733 sim_pfm: 996.0467968583107
episode: 244 training return: tensor(990.9119, device='cuda:0')
episode: 245 training return: tensor(991.6547, device='cuda:0')
episode: 246 training return: tensor(993.6982, device='cuda:0')
episode: 247 training return: tensor(976.9421, device='cuda:0')
epoch: 62 test_true_pfm: -1.9303204395332592 sim_pfm: 987.9414211347699
episode: 248 training return: tensor(989.8524, device='cuda:0')
episode: 249 training return: tensor(988.7581, device='cuda:0')
episode: 250 training return: tensor(988.2430, device='cuda:0')
episode: 251 training return: tensor(989.1088, device='cuda:0')
epoch: 63 test_true_pfm: 1.6410922314720497 sim_pfm: 981.338353408128
episode: 252 training return: tensor(983.9097, device='cuda:0')
episode: 253 training return: tensor(978.4448, device='cuda:0')
episode: 254 training return: tensor(989.7097, device='cuda:0')
episode: 255 training return: tensor(981.4429, device='cuda:0')
epoch: 64 test_true_pfm: -15.461468609133984 sim_pfm: 963.4559897951782
episode: 256 training return: tensor(993.1840, device='cuda:0')
episode: 257 training return: tensor(974.0532, device='cuda:0')
episode: 258 training return: tensor(986.1938, device='cuda:0')
episode: 259 training return: tensor(980.0139, device='cuda:0')
epoch: 65 test_true_pfm: -10.329553196392922 sim_pfm: 989.581735046953
episode: 260 training return: tensor(989.0417, device='cuda:0')
episode: 261 training return: tensor(989.6283, device='cuda:0')
episode: 262 training return: tensor(985.6466, device='cuda:0')
episode: 263 training return: tensor(990.0045, device='cuda:0')
epoch: 66 test_true_pfm: -9.228779498846492 sim_pfm: 995.7287474468351
episode: 264 training return: tensor(986.1071, device='cuda:0')
episode: 265 training return: tensor(987.6702, device='cuda:0')
episode: 266 training return: tensor(994.5046, device='cuda:0')
episode: 267 training return: tensor(993.7494, device='cuda:0')
epoch: 67 test_true_pfm: 17.41982526139814 sim_pfm: 987.8785799622535
episode: 268 training return: tensor(978.6828, device='cuda:0')
episode: 269 training return: tensor(977.2038, device='cuda:0')
episode: 270 training return: tensor(987.0841, device='cuda:0')
episode: 271 training return: tensor(970.5102, device='cuda:0')
epoch: 68 test_true_pfm: -9.139364045076146 sim_pfm: 986.6886745959521
episode: 272 training return: tensor(958.1216, device='cuda:0')
episode: 273 training return: tensor(977.6806, device='cuda:0')
episode: 274 training return: tensor(989.7997, device='cuda:0')
episode: 275 training return: tensor(991.9783, device='cuda:0')
epoch: 69 test_true_pfm: 1.2114671364393186 sim_pfm: 988.260920098424
episode: 276 training return: tensor(995.4360, device='cuda:0')
episode: 277 training return: tensor(994.2487, device='cuda:0')
episode: 278 training return: tensor(993.2924, device='cuda:0')
episode: 279 training return: tensor(986.1260, device='cuda:0')
epoch: 70 test_true_pfm: 2.0790787842621254 sim_pfm: 985.9166798979044
episode: 280 training return: tensor(979.2405, device='cuda:0')
episode: 281 training return: tensor(983.5913, device='cuda:0')
episode: 282 training return: tensor(977.0250, device='cuda:0')
episode: 283 training return: tensor(972.6074, device='cuda:0')
epoch: 71 test_true_pfm: -12.242910867412448 sim_pfm: 989.1120288074017
episode: 284 training return: tensor(980.2504, device='cuda:0')
episode: 285 training return: tensor(982.1693, device='cuda:0')
episode: 286 training return: tensor(982.9603, device='cuda:0')
episode: 287 training return: tensor(984.1057, device='cuda:0')
epoch: 72 test_true_pfm: -0.05035983019147472 sim_pfm: 986.4525748193264
episode: 288 training return: tensor(991.5328, device='cuda:0')
episode: 289 training return: tensor(995.3826, device='cuda:0')
episode: 290 training return: tensor(995.6243, device='cuda:0')
episode: 291 training return: tensor(876.0724, device='cuda:0')
epoch: 73 test_true_pfm: 16.383571853783533 sim_pfm: 888.8861979112029
episode: 292 training return: tensor(908.5888, device='cuda:0')
episode: 293 training return: tensor(988.3477, device='cuda:0')
episode: 294 training return: tensor(973.4805, device='cuda:0')
episode: 295 training return: tensor(894.9377, device='cuda:0')
epoch: 74 test_true_pfm: -6.323694633951422 sim_pfm: 817.5881149256136
episode: 296 training return: tensor(895.7260, device='cuda:0')
episode: 297 training return: tensor(972.6024, device='cuda:0')
episode: 298 training return: tensor(990.4797, device='cuda:0')
episode: 299 training return: tensor(992.0532, device='cuda:0')
epoch: 75 test_true_pfm: 0.7656952039978435 sim_pfm: 991.2226557433605
episode: 300 training return: tensor(986.8987, device='cuda:0')
episode: 301 training return: tensor(983.3688, device='cuda:0')
episode: 302 training return: tensor(983.0749, device='cuda:0')
episode: 303 training return: tensor(968.2877, device='cuda:0')
epoch: 76 test_true_pfm: 11.317791114249392 sim_pfm: 958.3497971653939
episode: 304 training return: tensor(959.3269, device='cuda:0')
episode: 305 training return: tensor(969.1625, device='cuda:0')
episode: 306 training return: tensor(963.4703, device='cuda:0')
episode: 307 training return: tensor(983.7209, device='cuda:0')
epoch: 77 test_true_pfm: -2.2673087920652883 sim_pfm: 990.875326371193
episode: 308 training return: tensor(987.5982, device='cuda:0')
episode: 309 training return: tensor(996.5204, device='cuda:0')
episode: 310 training return: tensor(991.9547, device='cuda:0')
episode: 311 training return: tensor(990.5583, device='cuda:0')
epoch: 78 test_true_pfm: -5.200647690268684 sim_pfm: 993.4193938641808
episode: 312 training return: tensor(993.6082, device='cuda:0')
episode: 313 training return: tensor(985.8716, device='cuda:0')
episode: 314 training return: tensor(973.7200, device='cuda:0')
episode: 315 training return: tensor(989.8217, device='cuda:0')
epoch: 79 test_true_pfm: -1.07131564132585 sim_pfm: 994.8641059271991
episode: 316 training return: tensor(984.9280, device='cuda:0')
episode: 317 training return: tensor(992.0302, device='cuda:0')
episode: 318 training return: tensor(993.4427, device='cuda:0')
episode: 319 training return: tensor(988.6723, device='cuda:0')
epoch: 80 test_true_pfm: 16.21309200887189 sim_pfm: 982.570634675026
episode: 320 training return: tensor(972.2092, device='cuda:0')
episode: 321 training return: tensor(938.9384, device='cuda:0')
episode: 322 training return: tensor(991.4080, device='cuda:0')
episode: 323 training return: tensor(990.5995, device='cuda:0')
epoch: 81 test_true_pfm: -10.576500828735458 sim_pfm: 996.2648399949073
episode: 324 training return: tensor(980.4384, device='cuda:0')
episode: 325 training return: tensor(993.8403, device='cuda:0')
episode: 326 training return: tensor(993.5400, device='cuda:0')
episode: 327 training return: tensor(996.5544, device='cuda:0')
epoch: 82 test_true_pfm: -3.9342597234573065 sim_pfm: 996.0897245645523
episode: 328 training return: tensor(996.2476, device='cuda:0')
episode: 329 training return: tensor(996.3544, device='cuda:0')
episode: 330 training return: tensor(995.5976, device='cuda:0')
episode: 331 training return: tensor(986.1636, device='cuda:0')
epoch: 83 test_true_pfm: 31.13850883618064 sim_pfm: 990.4457056939602
episode: 332 training return: tensor(949.9494, device='cuda:0')
episode: 333 training return: tensor(973.2226, device='cuda:0')
episode: 334 training return: tensor(921.8389, device='cuda:0')
episode: 335 training return: tensor(911.0964, device='cuda:0')
epoch: 84 test_true_pfm: 1.2012639074081606 sim_pfm: 977.6473701134324
episode: 336 training return: tensor(980.5787, device='cuda:0')
episode: 337 training return: tensor(950.5970, device='cuda:0')
episode: 338 training return: tensor(956.5881, device='cuda:0')
episode: 339 training return: tensor(917.9055, device='cuda:0')
epoch: 85 test_true_pfm: 12.787961500092647 sim_pfm: 895.6994343519211
episode: 340 training return: tensor(965.6880, device='cuda:0')
episode: 341 training return: tensor(957.2932, device='cuda:0')
episode: 342 training return: tensor(946.7583, device='cuda:0')
episode: 343 training return: tensor(968.4745, device='cuda:0')
epoch: 86 test_true_pfm: 4.495856896937757 sim_pfm: 952.2479866623878
episode: 344 training return: tensor(961.2025, device='cuda:0')
episode: 345 training return: tensor(942.5916, device='cuda:0')
episode: 346 training return: tensor(942.7963, device='cuda:0')
episode: 347 training return: tensor(923.6584, device='cuda:0')
epoch: 87 test_true_pfm: 5.587617202434708 sim_pfm: 935.3723251491786
episode: 348 training return: tensor(960.3032, device='cuda:0')
episode: 349 training return: tensor(958.2357, device='cuda:0')
episode: 350 training return: tensor(952.3128, device='cuda:0')
episode: 351 training return: tensor(993.5150, device='cuda:0')
epoch: 88 test_true_pfm: -3.7898632115138406 sim_pfm: 996.1265784919262
episode: 352 training return: tensor(995.3572, device='cuda:0')
episode: 353 training return: tensor(992.5638, device='cuda:0')
episode: 354 training return: tensor(994.6226, device='cuda:0')
episode: 355 training return: tensor(992.1746, device='cuda:0')
epoch: 89 test_true_pfm: -6.784144884622416 sim_pfm: 992.7457259178161
episode: 356 training return: tensor(994.8163, device='cuda:0')
episode: 357 training return: tensor(990.6248, device='cuda:0')
episode: 358 training return: tensor(986.7231, device='cuda:0')
episode: 359 training return: tensor(989.0206, device='cuda:0')
epoch: 90 test_true_pfm: 5.852597673691337 sim_pfm: 991.9555822134018
episode: 360 training return: tensor(988.2820, device='cuda:0')
episode: 361 training return: tensor(986.7463, device='cuda:0')
episode: 362 training return: tensor(965.2391, device='cuda:0')
episode: 363 training return: tensor(955.8593, device='cuda:0')
epoch: 91 test_true_pfm: 5.293966502091161 sim_pfm: 959.6119000867009
episode: 364 training return: tensor(975.2987, device='cuda:0')
episode: 365 training return: tensor(990.8856, device='cuda:0')
episode: 366 training return: tensor(941.3380, device='cuda:0')
episode: 367 training return: tensor(989.3125, device='cuda:0')
epoch: 92 test_true_pfm: -11.104566801135892 sim_pfm: 965.1194364763796
episode: 368 training return: tensor(945.6160, device='cuda:0')
episode: 369 training return: tensor(984.1606, device='cuda:0')
episode: 370 training return: tensor(987.4890, device='cuda:0')
episode: 371 training return: tensor(858.8487, device='cuda:0')
epoch: 93 test_true_pfm: -6.755181791521818 sim_pfm: 989.5281065762043
episode: 372 training return: tensor(984.8035, device='cuda:0')
episode: 373 training return: tensor(987.2658, device='cuda:0')
episode: 374 training return: tensor(992.8790, device='cuda:0')
episode: 375 training return: tensor(993.5403, device='cuda:0')
epoch: 94 test_true_pfm: -8.294854659557705 sim_pfm: 995.0657964229583
episode: 376 training return: tensor(996.7104, device='cuda:0')
episode: 377 training return: tensor(995.3192, device='cuda:0')
episode: 378 training return: tensor(993.2371, device='cuda:0')
episode: 379 training return: tensor(995.6657, device='cuda:0')
epoch: 95 test_true_pfm: -5.3505116849883105 sim_pfm: 995.8759165436029
episode: 380 training return: tensor(994.1248, device='cuda:0')
episode: 381 training return: tensor(995.4787, device='cuda:0')
episode: 382 training return: tensor(996.2001, device='cuda:0')
episode: 383 training return: tensor(995.0108, device='cuda:0')
epoch: 96 test_true_pfm: 9.531346231880773 sim_pfm: 969.2964956820011
episode: 384 training return: tensor(977.4962, device='cuda:0')
episode: 385 training return: tensor(979.7393, device='cuda:0')
episode: 386 training return: tensor(986.1851, device='cuda:0')
episode: 387 training return: tensor(981.1241, device='cuda:0')
epoch: 97 test_true_pfm: 7.177550086467076 sim_pfm: 988.156246316433
episode: 388 training return: tensor(986.0344, device='cuda:0')
episode: 389 training return: tensor(984.7739, device='cuda:0')
episode: 390 training return: tensor(990.1066, device='cuda:0')
episode: 391 training return: tensor(991.0202, device='cuda:0')
epoch: 98 test_true_pfm: -8.444606151976833 sim_pfm: 989.9491317033768
episode: 392 training return: tensor(992.9283, device='cuda:0')
episode: 393 training return: tensor(990.5395, device='cuda:0')
episode: 394 training return: tensor(992.9154, device='cuda:0')
episode: 395 training return: tensor(980.2764, device='cuda:0')
epoch: 99 test_true_pfm: -12.645232486891233 sim_pfm: 985.3869015991688
episode: 396 training return: tensor(979.3161, device='cuda:0')
episode: 397 training return: tensor(991.4817, device='cuda:0')
episode: 398 training return: tensor(995.8667, device='cuda:0')
episode: 399 training return: tensor(995.2333, device='cuda:0')
epoch: 100 test_true_pfm: -9.20843139823974 sim_pfm: 990.3680303434842
episode: 400 training return: tensor(982.7264, device='cuda:0')
episode: 401 training return: tensor(984.7014, device='cuda:0')
episode: 402 training return: tensor(967.5815, device='cuda:0')
episode: 403 training return: tensor(992.8143, device='cuda:0')
epoch: 101 test_true_pfm: -7.347370483065835 sim_pfm: 994.8051729798317
episode: 404 training return: tensor(995.2064, device='cuda:0')
episode: 405 training return: tensor(990.5390, device='cuda:0')
episode: 406 training return: tensor(991.7813, device='cuda:0')
episode: 407 training return: tensor(994.0175, device='cuda:0')
epoch: 102 test_true_pfm: 2.174469910894153 sim_pfm: 993.8422694146633
episode: 408 training return: tensor(979.9475, device='cuda:0')
episode: 409 training return: tensor(994.6626, device='cuda:0')
episode: 410 training return: tensor(982.6701, device='cuda:0')
episode: 411 training return: tensor(944.2334, device='cuda:0')
epoch: 103 test_true_pfm: 18.538100125948 sim_pfm: 988.8812073945999
episode: 412 training return: tensor(976.9122, device='cuda:0')
episode: 413 training return: tensor(992.5041, device='cuda:0')
episode: 414 training return: tensor(995.0108, device='cuda:0')
episode: 415 training return: tensor(992.8268, device='cuda:0')
epoch: 104 test_true_pfm: 16.789824589896597 sim_pfm: 987.2431628704071
episode: 416 training return: tensor(983.0632, device='cuda:0')
episode: 417 training return: tensor(961.9426, device='cuda:0')
episode: 418 training return: tensor(956.5289, device='cuda:0')
episode: 419 training return: tensor(947.3113, device='cuda:0')
epoch: 105 test_true_pfm: 21.256400866673438 sim_pfm: 920.2018049001693
episode: 420 training return: tensor(961.0690, device='cuda:0')
episode: 421 training return: tensor(983.2030, device='cuda:0')
episode: 422 training return: tensor(992.6930, device='cuda:0')
episode: 423 training return: tensor(954.8297, device='cuda:0')
epoch: 106 test_true_pfm: -8.689907215824242 sim_pfm: 992.9596764802933
episode: 424 training return: tensor(991.3250, device='cuda:0')
episode: 425 training return: tensor(991.4229, device='cuda:0')
episode: 426 training return: tensor(982.0602, device='cuda:0')
episode: 427 training return: tensor(987.7509, device='cuda:0')
epoch: 107 test_true_pfm: -12.549935563206683 sim_pfm: 914.5212395294045
episode: 428 training return: tensor(965.9742, device='cuda:0')
episode: 429 training return: tensor(965.1909, device='cuda:0')
episode: 430 training return: tensor(957.2359, device='cuda:0')
episode: 431 training return: tensor(985.4622, device='cuda:0')
epoch: 108 test_true_pfm: 19.707777502440678 sim_pfm: 977.8119907617569
episode: 432 training return: tensor(979.4011, device='cuda:0')
episode: 433 training return: tensor(959.0129, device='cuda:0')
episode: 434 training return: tensor(969.5699, device='cuda:0')
episode: 435 training return: tensor(956.6029, device='cuda:0')
epoch: 109 test_true_pfm: -9.992220225332904 sim_pfm: 971.3433879852295
episode: 436 training return: tensor(973.2773, device='cuda:0')
episode: 437 training return: tensor(972.1432, device='cuda:0')
episode: 438 training return: tensor(968.3934, device='cuda:0')
episode: 439 training return: tensor(941.7510, device='cuda:0')
epoch: 110 test_true_pfm: 3.2321862910553634 sim_pfm: 995.512169778347
episode: 440 training return: tensor(929.0200, device='cuda:0')
episode: 441 training return: tensor(889.1792, device='cuda:0')
episode: 442 training return: tensor(910.1285, device='cuda:0')
episode: 443 training return: tensor(984.6778, device='cuda:0')
epoch: 111 test_true_pfm: 21.225038400006305 sim_pfm: 988.8956860363484
episode: 444 training return: tensor(918.2192, device='cuda:0')
episode: 445 training return: tensor(978.9594, device='cuda:0')
episode: 446 training return: tensor(996.5616, device='cuda:0')
episode: 447 training return: tensor(993.1437, device='cuda:0')
epoch: 112 test_true_pfm: 1.1241799982842464 sim_pfm: 984.5953055858612
episode: 448 training return: tensor(992.3873, device='cuda:0')
episode: 449 training return: tensor(983.0718, device='cuda:0')
episode: 450 training return: tensor(992.3120, device='cuda:0')
episode: 451 training return: tensor(991.4888, device='cuda:0')
epoch: 113 test_true_pfm: -5.931882413149955 sim_pfm: 993.6850816130639
episode: 452 training return: tensor(991.7603, device='cuda:0')
episode: 453 training return: tensor(975.3306, device='cuda:0')
episode: 454 training return: tensor(995.3066, device='cuda:0')
episode: 455 training return: tensor(995.1328, device='cuda:0')
epoch: 114 test_true_pfm: -2.4408294115862907 sim_pfm: 994.0494222283363
episode: 456 training return: tensor(993.7278, device='cuda:0')
episode: 457 training return: tensor(976.7357, device='cuda:0')
episode: 458 training return: tensor(966.5021, device='cuda:0')
episode: 459 training return: tensor(986.0520, device='cuda:0')
epoch: 115 test_true_pfm: -5.373702870007942 sim_pfm: 993.3117860645056
episode: 460 training return: tensor(992.9691, device='cuda:0')
episode: 461 training return: tensor(986.3305, device='cuda:0')
episode: 462 training return: tensor(983.9002, device='cuda:0')
episode: 463 training return: tensor(978.0620, device='cuda:0')
epoch: 116 test_true_pfm: -6.449580437826538 sim_pfm: 990.972479813546
episode: 464 training return: tensor(951.1746, device='cuda:0')
episode: 465 training return: tensor(928.3318, device='cuda:0')
episode: 466 training return: tensor(945.4116, device='cuda:0')
episode: 467 training return: tensor(990.9055, device='cuda:0')
epoch: 117 test_true_pfm: -4.073287575604921 sim_pfm: 991.840829384327
episode: 468 training return: tensor(990.2550, device='cuda:0')
episode: 469 training return: tensor(990.5938, device='cuda:0')
episode: 470 training return: tensor(987.9538, device='cuda:0')
episode: 471 training return: tensor(984.0578, device='cuda:0')
epoch: 118 test_true_pfm: -1.3089914332073516 sim_pfm: 985.4082444190979
episode: 472 training return: tensor(973.0386, device='cuda:0')
episode: 473 training return: tensor(979.0278, device='cuda:0')
episode: 474 training return: tensor(970.9069, device='cuda:0')
episode: 475 training return: tensor(976.1482, device='cuda:0')
epoch: 119 test_true_pfm: -8.974005696824884 sim_pfm: 974.3061105035246
episode: 476 training return: tensor(978.0217, device='cuda:0')
episode: 477 training return: tensor(978.5882, device='cuda:0')
episode: 478 training return: tensor(972.3245, device='cuda:0')
episode: 479 training return: tensor(963.1071, device='cuda:0')
epoch: 120 test_true_pfm: -4.522954931475727 sim_pfm: 968.2735947102308
episode: 480 training return: tensor(965.2746, device='cuda:0')
episode: 481 training return: tensor(979.0306, device='cuda:0')
episode: 482 training return: tensor(965.7612, device='cuda:0')
episode: 483 training return: tensor(943.2677, device='cuda:0')
epoch: 121 test_true_pfm: -1.5084034882099289 sim_pfm: 972.8364600628614
episode: 484 training return: tensor(978.5296, device='cuda:0')
episode: 485 training return: tensor(975.2467, device='cuda:0')
episode: 486 training return: tensor(987.6387, device='cuda:0')
episode: 487 training return: tensor(971.7444, device='cuda:0')
epoch: 122 test_true_pfm: -1.1840468941515483 sim_pfm: 971.4218069046735
episode: 488 training return: tensor(967.9827, device='cuda:0')
episode: 489 training return: tensor(977.3630, device='cuda:0')
episode: 490 training return: tensor(988.4257, device='cuda:0')
episode: 491 training return: tensor(972.1570, device='cuda:0')
epoch: 123 test_true_pfm: -14.241777085273537 sim_pfm: 959.9948052346706
episode: 492 training return: tensor(958.0339, device='cuda:0')
episode: 493 training return: tensor(944.8185, device='cuda:0')
episode: 494 training return: tensor(953.0737, device='cuda:0')
episode: 495 training return: tensor(993.3702, device='cuda:0')
epoch: 124 test_true_pfm: -1.7541334666914659 sim_pfm: 993.4481503516436
episode: 496 training return: tensor(994.0072, device='cuda:0')
episode: 497 training return: tensor(990.2491, device='cuda:0')
episode: 498 training return: tensor(983.6129, device='cuda:0')
episode: 499 training return: tensor(925.2612, device='cuda:0')
epoch: 125 test_true_pfm: 18.93451384995719 sim_pfm: 943.9134231209755
episode: 500 training return: tensor(940.9166, device='cuda:0')
episode: 501 training return: tensor(956.1513, device='cuda:0')
episode: 502 training return: tensor(952.5098, device='cuda:0')
episode: 503 training return: tensor(942.1122, device='cuda:0')
epoch: 126 test_true_pfm: 2.1545449234886718 sim_pfm: 989.2332364022732
episode: 504 training return: tensor(977.4925, device='cuda:0')
episode: 505 training return: tensor(976.9965, device='cuda:0')
episode: 506 training return: tensor(961.8531, device='cuda:0')
episode: 507 training return: tensor(954.4354, device='cuda:0')
epoch: 127 test_true_pfm: -13.444537976884016 sim_pfm: 983.6762097626925
episode: 508 training return: tensor(967.9938, device='cuda:0')
episode: 509 training return: tensor(982.9340, device='cuda:0')
episode: 510 training return: tensor(985.1898, device='cuda:0')
episode: 511 training return: tensor(941.9432, device='cuda:0')
epoch: 128 test_true_pfm: -7.8073778255096205 sim_pfm: 994.6603246212005
episode: 512 training return: tensor(931.8550, device='cuda:0')
episode: 513 training return: tensor(856.6917, device='cuda:0')
episode: 514 training return: tensor(993.5959, device='cuda:0')
episode: 515 training return: tensor(980.5521, device='cuda:0')
epoch: 129 test_true_pfm: -15.306322649044134 sim_pfm: 990.965945251286
episode: 516 training return: tensor(986.4612, device='cuda:0')
episode: 517 training return: tensor(982.4648, device='cuda:0')
episode: 518 training return: tensor(986.3444, device='cuda:0')
episode: 519 training return: tensor(765.9730, device='cuda:0')
epoch: 130 test_true_pfm: -13.66998377331355 sim_pfm: 994.4043466228061
episode: 520 training return: tensor(994.1303, device='cuda:0')
episode: 521 training return: tensor(992.8951, device='cuda:0')
episode: 522 training return: tensor(970.1432, device='cuda:0')
episode: 523 training return: tensor(852.3440, device='cuda:0')
epoch: 131 test_true_pfm: -3.68512208683361 sim_pfm: 982.8191941946745
episode: 524 training return: tensor(984.3657, device='cuda:0')
episode: 525 training return: tensor(976.0245, device='cuda:0')
episode: 526 training return: tensor(988.1530, device='cuda:0')
episode: 527 training return: tensor(995.5697, device='cuda:0')
epoch: 132 test_true_pfm: 2.7276691131712267 sim_pfm: 979.7311212837697
episode: 528 training return: tensor(963.3945, device='cuda:0')
episode: 529 training return: tensor(995.9305, device='cuda:0')
episode: 530 training return: tensor(996.3143, device='cuda:0')
episode: 531 training return: tensor(993.7450, device='cuda:0')
epoch: 133 test_true_pfm: 3.52502064316445 sim_pfm: 995.492284321785
episode: 532 training return: tensor(989.6980, device='cuda:0')
episode: 533 training return: tensor(992.2468, device='cuda:0')
episode: 534 training return: tensor(994.7603, device='cuda:0')
episode: 535 training return: tensor(995.8151, device='cuda:0')
epoch: 134 test_true_pfm: -1.1567162119563221 sim_pfm: 993.780739620328
episode: 536 training return: tensor(995.3899, device='cuda:0')
episode: 537 training return: tensor(994.2167, device='cuda:0')
episode: 538 training return: tensor(995.3759, device='cuda:0')
episode: 539 training return: tensor(995.2668, device='cuda:0')
epoch: 135 test_true_pfm: 3.790247346417368 sim_pfm: 986.9452934026718
episode: 540 training return: tensor(985.3635, device='cuda:0')
episode: 541 training return: tensor(993.3179, device='cuda:0')
episode: 542 training return: tensor(993.3184, device='cuda:0')
episode: 543 training return: tensor(988.4771, device='cuda:0')
epoch: 136 test_true_pfm: -6.61622484779266 sim_pfm: 969.8323498595506
episode: 544 training return: tensor(892.6403, device='cuda:0')
episode: 545 training return: tensor(914.6573, device='cuda:0')
episode: 546 training return: tensor(946.8536, device='cuda:0')
episode: 547 training return: tensor(953.5284, device='cuda:0')
epoch: 137 test_true_pfm: -3.035810533110731 sim_pfm: 969.0472478534095
episode: 548 training return: tensor(968.3113, device='cuda:0')
episode: 549 training return: tensor(907.0534, device='cuda:0')
episode: 550 training return: tensor(990.4105, device='cuda:0')
episode: 551 training return: tensor(991.8112, device='cuda:0')
epoch: 138 test_true_pfm: -11.56130953221064 sim_pfm: 993.7269817866385
episode: 552 training return: tensor(992.5559, device='cuda:0')
episode: 553 training return: tensor(988.9731, device='cuda:0')
episode: 554 training return: tensor(991.1631, device='cuda:0')
episode: 555 training return: tensor(991.3378, device='cuda:0')
epoch: 139 test_true_pfm: -8.693508247941626 sim_pfm: 993.7356823891402
episode: 556 training return: tensor(984.9002, device='cuda:0')
episode: 557 training return: tensor(992.6631, device='cuda:0')
episode: 558 training return: tensor(993.6460, device='cuda:0')
episode: 559 training return: tensor(994.4454, device='cuda:0')
epoch: 140 test_true_pfm: -6.829067241547177 sim_pfm: 990.187606087327
episode: 560 training return: tensor(983.1003, device='cuda:0')
episode: 561 training return: tensor(991.4171, device='cuda:0')
episode: 562 training return: tensor(985.0626, device='cuda:0')
episode: 563 training return: tensor(991.8261, device='cuda:0')
epoch: 141 test_true_pfm: -9.661767264027699 sim_pfm: 995.1606402425328
episode: 564 training return: tensor(994.2306, device='cuda:0')
episode: 565 training return: tensor(993.7839, device='cuda:0')
episode: 566 training return: tensor(981.3113, device='cuda:0')
episode: 567 training return: tensor(991.2793, device='cuda:0')
epoch: 142 test_true_pfm: -4.673502571499165 sim_pfm: 989.9029356826097
episode: 568 training return: tensor(991.4166, device='cuda:0')
episode: 569 training return: tensor(993.5627, device='cuda:0')
episode: 570 training return: tensor(995.9039, device='cuda:0')
episode: 571 training return: tensor(993.5614, device='cuda:0')
epoch: 143 test_true_pfm: -9.107841411250272 sim_pfm: 995.5710310697556
episode: 572 training return: tensor(994.7152, device='cuda:0')
episode: 573 training return: tensor(991.6822, device='cuda:0')
episode: 574 training return: tensor(994.5353, device='cuda:0')
episode: 575 training return: tensor(994.3341, device='cuda:0')
epoch: 144 test_true_pfm: -5.401426361016353 sim_pfm: 995.9278197646141
episode: 576 training return: tensor(995.0955, device='cuda:0')
episode: 577 training return: tensor(994.1377, device='cuda:0')
episode: 578 training return: tensor(996.0789, device='cuda:0')
episode: 579 training return: tensor(994.7236, device='cuda:0')
epoch: 145 test_true_pfm: -1.2760431460712645 sim_pfm: 995.3740068793297
episode: 580 training return: tensor(993.1854, device='cuda:0')
episode: 581 training return: tensor(992.7198, device='cuda:0')
episode: 582 training return: tensor(990.1537, device='cuda:0')
episode: 583 training return: tensor(995.1101, device='cuda:0')
epoch: 146 test_true_pfm: 0.9353269111173408 sim_pfm: 996.3779869824648
episode: 584 training return: tensor(994.0442, device='cuda:0')
episode: 585 training return: tensor(995.5514, device='cuda:0')
episode: 586 training return: tensor(993.4714, device='cuda:0')
episode: 587 training return: tensor(993.7949, device='cuda:0')
epoch: 147 test_true_pfm: 3.578820363851937 sim_pfm: 995.049731516838
episode: 588 training return: tensor(991.5031, device='cuda:0')
episode: 589 training return: tensor(993.4709, device='cuda:0')
episode: 590 training return: tensor(991.9126, device='cuda:0')
episode: 591 training return: tensor(992.3489, device='cuda:0')
epoch: 148 test_true_pfm: 8.728232912851634 sim_pfm: 993.1243922829628
episode: 592 training return: tensor(994.5792, device='cuda:0')
episode: 593 training return: tensor(991.2765, device='cuda:0')
episode: 594 training return: tensor(988.0574, device='cuda:0')
episode: 595 training return: tensor(987.5424, device='cuda:0')
epoch: 149 test_true_pfm: -2.666067986035442 sim_pfm: 986.216692507267
episode: 596 training return: tensor(993.0579, device='cuda:0')
episode: 597 training return: tensor(994.6879, device='cuda:0')
episode: 598 training return: tensor(996.2129, device='cuda:0')
episode: 599 training return: tensor(992.5660, device='cuda:0')
epoch: 150 test_true_pfm: -0.36107612729667604 sim_pfm: 994.1763806015254
