['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'baseline', '--traj', 'expert', '--seed', '4']
episode: 0 training return: tensor(-999.9995, device='cuda:0')
episode: 1 training return: tensor(-999.9996, device='cuda:0')
episode: 2 training return: tensor(-999.9998, device='cuda:0')
episode: 3 training return: tensor(-999.9997, device='cuda:0')
epoch: 1 test_true_pfm: -0.42682153710872645 sim_pfm: -999.9755874077479
episode: 4 training return: tensor(-999.9998, device='cuda:0')
episode: 5 training return: tensor(-999.9998, device='cuda:0')
episode: 6 training return: tensor(-999.9998, device='cuda:0')
episode: 7 training return: tensor(-999.9997, device='cuda:0')
epoch: 2 test_true_pfm: -0.24291030356838653 sim_pfm: -999.9770206809044
episode: 8 training return: tensor(-999.9999, device='cuda:0')
episode: 9 training return: tensor(-999.9995, device='cuda:0')
episode: 10 training return: tensor(-999.9998, device='cuda:0')
episode: 11 training return: tensor(-999.9999, device='cuda:0')
epoch: 3 test_true_pfm: -0.01381064855640011 sim_pfm: -999.9771618445715
episode: 12 training return: tensor(-999.9998, device='cuda:0')
episode: 13 training return: tensor(-999.9963, device='cuda:0')
episode: 14 training return: tensor(-999.9998, device='cuda:0')
episode: 15 training return: tensor(-999.9997, device='cuda:0')
epoch: 4 test_true_pfm: -0.21467575577821973 sim_pfm: -999.9767172336578
episode: 16 training return: tensor(-999.9998, device='cuda:0')
episode: 17 training return: tensor(-999.9998, device='cuda:0')
episode: 18 training return: tensor(-999.9996, device='cuda:0')
episode: 19 training return: tensor(-999.9998, device='cuda:0')
epoch: 5 test_true_pfm: -0.31570772292940613 sim_pfm: -999.9767463405927
episode: 20 training return: tensor(-999.9999, device='cuda:0')
episode: 21 training return: tensor(-999.9998, device='cuda:0')
episode: 22 training return: tensor(-999.9998, device='cuda:0')
episode: 23 training return: tensor(-999.9996, device='cuda:0')
epoch: 6 test_true_pfm: -0.13087146598857993 sim_pfm: -999.9766426285108
episode: 24 training return: tensor(-999.9996, device='cuda:0')
episode: 25 training return: tensor(-999.9997, device='cuda:0')
episode: 26 training return: tensor(-999.9998, device='cuda:0')
episode: 27 training return: tensor(-999.9998, device='cuda:0')
epoch: 7 test_true_pfm: -0.5032020053100233 sim_pfm: -999.9768676161766
episode: 28 training return: tensor(-999.9997, device='cuda:0')
episode: 29 training return: tensor(-999.9996, device='cuda:0')
episode: 30 training return: tensor(-999.9998, device='cuda:0')
episode: 31 training return: tensor(-999.9995, device='cuda:0')
epoch: 8 test_true_pfm: -0.24335332405407306 sim_pfm: -999.9768674572309
episode: 32 training return: tensor(-999.9998, device='cuda:0')
episode: 33 training return: tensor(-999.9995, device='cuda:0')
episode: 34 training return: tensor(-999.9998, device='cuda:0')
episode: 35 training return: tensor(-999.9997, device='cuda:0')
epoch: 9 test_true_pfm: -0.49438818944801194 sim_pfm: -999.9770473043124
episode: 36 training return: tensor(-999.9998, device='cuda:0')
episode: 37 training return: tensor(-999.9998, device='cuda:0')
episode: 38 training return: tensor(-999.9998, device='cuda:0')
episode: 39 training return: tensor(-999.9999, device='cuda:0')
epoch: 10 test_true_pfm: -0.7993350317782518 sim_pfm: -999.977240562439
episode: 40 training return: tensor(-999.9996, device='cuda:0')
episode: 41 training return: tensor(-999.9998, device='cuda:0')
episode: 42 training return: tensor(-999.9997, device='cuda:0')
episode: 43 training return: tensor(-999.9999, device='cuda:0')
epoch: 11 test_true_pfm: 0.14552286544372742 sim_pfm: -999.977069735527
episode: 44 training return: tensor(-999.9996, device='cuda:0')
episode: 45 training return: tensor(-999.9997, device='cuda:0')
episode: 46 training return: tensor(-999.9997, device='cuda:0')
episode: 47 training return: tensor(-999.9999, device='cuda:0')
epoch: 12 test_true_pfm: 0.5161218278165108 sim_pfm: -999.9770600597063
episode: 48 training return: tensor(-999.9998, device='cuda:0')
episode: 49 training return: tensor(-999.9998, device='cuda:0')
episode: 50 training return: tensor(-999.9998, device='cuda:0')
episode: 51 training return: tensor(-999.9999, device='cuda:0')
epoch: 13 test_true_pfm: -0.6039760522482213 sim_pfm: -999.9774468541145
episode: 52 training return: tensor(-999.9997, device='cuda:0')
episode: 53 training return: tensor(-999.9995, device='cuda:0')
episode: 54 training return: tensor(-999.9998, device='cuda:0')
episode: 55 training return: tensor(-999.9995, device='cuda:0')
epoch: 14 test_true_pfm: 0.5260049614073934 sim_pfm: -999.9767999251684
episode: 56 training return: tensor(-999.9997, device='cuda:0')
episode: 57 training return: tensor(-999.9999, device='cuda:0')
episode: 58 training return: tensor(-999.9998, device='cuda:0')
episode: 59 training return: tensor(-999.9999, device='cuda:0')
epoch: 15 test_true_pfm: -0.5142437490095707 sim_pfm: -999.9767948389053
episode: 60 training return: tensor(-999.9999, device='cuda:0')
episode: 61 training return: tensor(-999.9997, device='cuda:0')
episode: 62 training return: tensor(-999.9998, device='cuda:0')
episode: 63 training return: tensor(-999.9998, device='cuda:0')
epoch: 16 test_true_pfm: -0.07718594645622265 sim_pfm: -999.9769191145897
episode: 64 training return: tensor(-999.9998, device='cuda:0')
episode: 65 training return: tensor(-999.9998, device='cuda:0')
episode: 66 training return: tensor(-999.9997, device='cuda:0')
episode: 67 training return: tensor(-999.9997, device='cuda:0')
epoch: 17 test_true_pfm: 1.2045489237884142 sim_pfm: -999.9772372047106
episode: 68 training return: tensor(-999.9998, device='cuda:0')
episode: 69 training return: tensor(-999.9998, device='cuda:0')
episode: 70 training return: tensor(-999.9997, device='cuda:0')
episode: 71 training return: tensor(-999.9997, device='cuda:0')
epoch: 18 test_true_pfm: -0.08519666120939462 sim_pfm: -999.9757305781046
episode: 72 training return: tensor(-999.9999, device='cuda:0')
episode: 73 training return: tensor(-999.9999, device='cuda:0')
episode: 74 training return: tensor(-999.9999, device='cuda:0')
episode: 75 training return: tensor(-999.9998, device='cuda:0')
epoch: 19 test_true_pfm: -0.77836965712683 sim_pfm: -999.9768518606821
episode: 76 training return: tensor(-999.9998, device='cuda:0')
episode: 77 training return: tensor(-999.9999, device='cuda:0')
episode: 78 training return: tensor(-999.9999, device='cuda:0')
episode: 79 training return: tensor(-999.9996, device='cuda:0')
epoch: 20 test_true_pfm: -0.2946864540411383 sim_pfm: -999.9766457080841
episode: 80 training return: tensor(-999.9999, device='cuda:0')
episode: 81 training return: tensor(-999.9996, device='cuda:0')
episode: 82 training return: tensor(-999.9998, device='cuda:0')
episode: 83 training return: tensor(-999.9999, device='cuda:0')
epoch: 21 test_true_pfm: 0.12191777600288357 sim_pfm: -999.9766674836477
episode: 84 training return: tensor(-999.9998, device='cuda:0')
episode: 85 training return: tensor(-999.9998, device='cuda:0')
episode: 86 training return: tensor(-999.9998, device='cuda:0')
episode: 87 training return: tensor(-999.9998, device='cuda:0')
epoch: 22 test_true_pfm: 0.025918732119236104 sim_pfm: -999.9764391183853
episode: 88 training return: tensor(-999.9999, device='cuda:0')
episode: 89 training return: tensor(-999.9998, device='cuda:0')
episode: 90 training return: tensor(-999.9997, device='cuda:0')
episode: 91 training return: tensor(-999.9997, device='cuda:0')
epoch: 23 test_true_pfm: -0.5372900068686098 sim_pfm: -999.9771845539411
episode: 92 training return: tensor(-999.9998, device='cuda:0')
episode: 93 training return: tensor(-1000., device='cuda:0')
episode: 94 training return: tensor(-999.9994, device='cuda:0')
episode: 95 training return: tensor(-999.9999, device='cuda:0')
epoch: 24 test_true_pfm: -0.6625745372611561 sim_pfm: -999.9766167004904
episode: 96 training return: tensor(-999.9996, device='cuda:0')
episode: 97 training return: tensor(-999.9997, device='cuda:0')
episode: 98 training return: tensor(-999.9998, device='cuda:0')
episode: 99 training return: tensor(-999.9997, device='cuda:0')
epoch: 25 test_true_pfm: -0.33760141361644164 sim_pfm: -999.9764950275421
episode: 100 training return: tensor(-999.9999, device='cuda:0')
episode: 101 training return: tensor(-999.9996, device='cuda:0')
episode: 102 training return: tensor(-999.9999, device='cuda:0')
episode: 103 training return: tensor(-999.9999, device='cuda:0')
epoch: 26 test_true_pfm: 0.1302138378882808 sim_pfm: -999.9767991105715
episode: 104 training return: tensor(-999.9998, device='cuda:0')
episode: 105 training return: tensor(-999.9998, device='cuda:0')
episode: 106 training return: tensor(-999.9996, device='cuda:0')
episode: 107 training return: tensor(-999.9996, device='cuda:0')
epoch: 27 test_true_pfm: -0.21589411284040247 sim_pfm: -999.977622906367
episode: 108 training return: tensor(-999.9996, device='cuda:0')
episode: 109 training return: tensor(-999.9988, device='cuda:0')
episode: 110 training return: tensor(-999.9996, device='cuda:0')
episode: 111 training return: tensor(-999.9999, device='cuda:0')
epoch: 28 test_true_pfm: 0.014407190983415955 sim_pfm: -999.9768065214157
episode: 112 training return: tensor(-999.9999, device='cuda:0')
episode: 113 training return: tensor(-999.9999, device='cuda:0')
episode: 114 training return: tensor(-999.9999, device='cuda:0')
episode: 115 training return: tensor(-999.9999, device='cuda:0')
epoch: 29 test_true_pfm: -0.08977635289490733 sim_pfm: -999.975972533226
episode: 116 training return: tensor(-999.9999, device='cuda:0')
episode: 117 training return: tensor(-999.9995, device='cuda:0')
episode: 118 training return: tensor(-999.9998, device='cuda:0')
episode: 119 training return: tensor(-999.9998, device='cuda:0')
epoch: 30 test_true_pfm: -0.3792722084805051 sim_pfm: -999.9761380751928
episode: 120 training return: tensor(-999.9996, device='cuda:0')
episode: 121 training return: tensor(-999.9998, device='cuda:0')
episode: 122 training return: tensor(-999.9996, device='cuda:0')
episode: 123 training return: tensor(-999.9995, device='cuda:0')
epoch: 31 test_true_pfm: 0.3043470175099088 sim_pfm: -999.9776448210081
episode: 124 training return: tensor(-999.9999, device='cuda:0')
episode: 125 training return: tensor(-999.9998, device='cuda:0')
episode: 126 training return: tensor(-999.9998, device='cuda:0')
episode: 127 training return: tensor(-999.9998, device='cuda:0')
epoch: 32 test_true_pfm: 0.24887001957325686 sim_pfm: -999.9761006037394
episode: 128 training return: tensor(-999.9997, device='cuda:0')
episode: 129 training return: tensor(-999.9998, device='cuda:0')
episode: 130 training return: tensor(-999.9998, device='cuda:0')
episode: 131 training return: tensor(-999.9999, device='cuda:0')
epoch: 33 test_true_pfm: -1.1345522373060597 sim_pfm: -999.9767163991928
episode: 132 training return: tensor(-999.9999, device='cuda:0')
episode: 133 training return: tensor(-999.9996, device='cuda:0')
episode: 134 training return: tensor(-999.9995, device='cuda:0')
episode: 135 training return: tensor(-999.9998, device='cuda:0')
epoch: 34 test_true_pfm: -0.7161367513936273 sim_pfm: -999.9766673445702
episode: 136 training return: tensor(-999.9996, device='cuda:0')
episode: 137 training return: tensor(-999.9995, device='cuda:0')
episode: 138 training return: tensor(-999.9998, device='cuda:0')
episode: 139 training return: tensor(-999.9997, device='cuda:0')
epoch: 35 test_true_pfm: 0.7585972221685618 sim_pfm: -999.9767287572225
episode: 140 training return: tensor(-999.9996, device='cuda:0')
episode: 141 training return: tensor(-999.9992, device='cuda:0')
episode: 142 training return: tensor(-999.9999, device='cuda:0')
episode: 143 training return: tensor(-999.9996, device='cuda:0')
epoch: 36 test_true_pfm: -0.27199508211423 sim_pfm: -999.9764373898506
episode: 144 training return: tensor(-999.9996, device='cuda:0')
episode: 145 training return: tensor(-999.9997, device='cuda:0')
episode: 146 training return: tensor(-999.9997, device='cuda:0')
episode: 147 training return: tensor(-999.9997, device='cuda:0')
epoch: 37 test_true_pfm: -0.4155394848051422 sim_pfm: -999.9766828219095
episode: 148 training return: tensor(-999.9999, device='cuda:0')
episode: 149 training return: tensor(-999.9999, device='cuda:0')
episode: 150 training return: tensor(-999.9999, device='cuda:0')
episode: 151 training return: tensor(-999.9997, device='cuda:0')
epoch: 38 test_true_pfm: 0.1213873333892272 sim_pfm: -999.9773066242536
episode: 152 training return: tensor(-999.9992, device='cuda:0')
episode: 153 training return: tensor(-999.9998, device='cuda:0')
episode: 154 training return: tensor(-999.9997, device='cuda:0')
episode: 155 training return: tensor(-999.9998, device='cuda:0')
epoch: 39 test_true_pfm: 0.3736863182240598 sim_pfm: -999.9762073556582
episode: 156 training return: tensor(-999.9996, device='cuda:0')
episode: 157 training return: tensor(-999.9998, device='cuda:0')
episode: 158 training return: tensor(-999.9999, device='cuda:0')
episode: 159 training return: tensor(-999.9999, device='cuda:0')
epoch: 40 test_true_pfm: -0.3826010795439026 sim_pfm: -999.9767687916756
episode: 160 training return: tensor(-999.9996, device='cuda:0')
episode: 161 training return: tensor(-999.9999, device='cuda:0')
episode: 162 training return: tensor(-999.9997, device='cuda:0')
episode: 163 training return: tensor(-999.9998, device='cuda:0')
epoch: 41 test_true_pfm: -0.4805798050252757 sim_pfm: -999.977113266786
episode: 164 training return: tensor(-999.9999, device='cuda:0')
episode: 165 training return: tensor(-999.9996, device='cuda:0')
episode: 166 training return: tensor(-999.9996, device='cuda:0')
episode: 167 training return: tensor(-999.9998, device='cuda:0')
epoch: 42 test_true_pfm: -0.04720814929789235 sim_pfm: -999.9764302770296
episode: 168 training return: tensor(-999.9998, device='cuda:0')
episode: 169 training return: tensor(-999.9998, device='cuda:0')
episode: 170 training return: tensor(-999.9996, device='cuda:0')
episode: 171 training return: tensor(-999.9996, device='cuda:0')
epoch: 43 test_true_pfm: -1.057979941272461 sim_pfm: -999.9773999849955
episode: 172 training return: tensor(-999.9997, device='cuda:0')
episode: 173 training return: tensor(-1000., device='cuda:0')
episode: 174 training return: tensor(-999.9997, device='cuda:0')
episode: 175 training return: tensor(-999.9998, device='cuda:0')
epoch: 44 test_true_pfm: 0.3304049575109576 sim_pfm: -999.9772269924482
episode: 176 training return: tensor(-999.9998, device='cuda:0')
episode: 177 training return: tensor(-999.9995, device='cuda:0')
episode: 178 training return: tensor(-999.9996, device='cuda:0')
episode: 179 training return: tensor(-999.9998, device='cuda:0')
epoch: 45 test_true_pfm: 0.48433263318851694 sim_pfm: -999.9767191608747
episode: 180 training return: tensor(-999.9999, device='cuda:0')
episode: 181 training return: tensor(-999.9998, device='cuda:0')
episode: 182 training return: tensor(-999.9997, device='cuda:0')
episode: 183 training return: tensor(-999.9999, device='cuda:0')
epoch: 46 test_true_pfm: -0.058938035011482814 sim_pfm: -999.9763882954916
episode: 184 training return: tensor(-999.9998, device='cuda:0')
episode: 185 training return: tensor(-999.9995, device='cuda:0')
episode: 186 training return: tensor(-999.9998, device='cuda:0')
episode: 187 training return: tensor(-999.9996, device='cuda:0')
epoch: 47 test_true_pfm: 1.0976115008773693 sim_pfm: -999.976840098699
episode: 188 training return: tensor(-999.9996, device='cuda:0')
episode: 189 training return: tensor(-999.9998, device='cuda:0')
episode: 190 training return: tensor(-999.9997, device='cuda:0')
episode: 191 training return: tensor(-999.9998, device='cuda:0')
epoch: 48 test_true_pfm: 0.004635882557751343 sim_pfm: -999.976922015349
episode: 192 training return: tensor(-999.9998, device='cuda:0')
episode: 193 training return: tensor(-999.9998, device='cuda:0')
episode: 194 training return: tensor(-999.9999, device='cuda:0')
episode: 195 training return: tensor(-999.9996, device='cuda:0')
epoch: 49 test_true_pfm: -1.3882852972161182 sim_pfm: -999.9766858816147
episode: 196 training return: tensor(-999.9998, device='cuda:0')
episode: 197 training return: tensor(-999.9998, device='cuda:0')
episode: 198 training return: tensor(-999.9995, device='cuda:0')
episode: 199 training return: tensor(-999.9998, device='cuda:0')
epoch: 50 test_true_pfm: -0.5690797894081582 sim_pfm: -999.9772143959999
episode: 200 training return: tensor(-999.9994, device='cuda:0')
episode: 201 training return: tensor(-999.9997, device='cuda:0')
episode: 202 training return: tensor(-999.9998, device='cuda:0')
episode: 203 training return: tensor(-999.9991, device='cuda:0')
epoch: 51 test_true_pfm: 0.7120693360403942 sim_pfm: -999.9767620960871
episode: 204 training return: tensor(-999.9999, device='cuda:0')
episode: 205 training return: tensor(-999.9998, device='cuda:0')
episode: 206 training return: tensor(-999.9998, device='cuda:0')
episode: 207 training return: tensor(-999.9997, device='cuda:0')
epoch: 52 test_true_pfm: -0.8469786139043493 sim_pfm: -999.9768972396851
episode: 208 training return: tensor(-999.9998, device='cuda:0')
episode: 209 training return: tensor(-999.9999, device='cuda:0')
episode: 210 training return: tensor(-999.9999, device='cuda:0')
episode: 211 training return: tensor(-999.9997, device='cuda:0')
epoch: 53 test_true_pfm: -0.2754692581589128 sim_pfm: -999.9769857327143
episode: 212 training return: tensor(-999.9999, device='cuda:0')
episode: 213 training return: tensor(-999.9998, device='cuda:0')
episode: 214 training return: tensor(-999.9997, device='cuda:0')
episode: 215 training return: tensor(-999.9998, device='cuda:0')
epoch: 54 test_true_pfm: -0.13912492339694146 sim_pfm: -999.9767382740974
episode: 216 training return: tensor(-999.9998, device='cuda:0')
episode: 217 training return: tensor(-999.9998, device='cuda:0')
episode: 218 training return: tensor(-999.9996, device='cuda:0')
episode: 219 training return: tensor(-999.9998, device='cuda:0')
epoch: 55 test_true_pfm: -0.7691366740878481 sim_pfm: -999.9774041970571
episode: 220 training return: tensor(-999.9997, device='cuda:0')
episode: 221 training return: tensor(-999.9998, device='cuda:0')
episode: 222 training return: tensor(-999.9997, device='cuda:0')
episode: 223 training return: tensor(-999.9998, device='cuda:0')
epoch: 56 test_true_pfm: 0.0032041771520373206 sim_pfm: -999.9776168465614
episode: 224 training return: tensor(-999.9995, device='cuda:0')
episode: 225 training return: tensor(-999.9998, device='cuda:0')
episode: 226 training return: tensor(-999.9999, device='cuda:0')
episode: 227 training return: tensor(-999.9999, device='cuda:0')
epoch: 57 test_true_pfm: -0.9858115609014858 sim_pfm: -999.9769337375959
episode: 228 training return: tensor(-999.9998, device='cuda:0')
episode: 229 training return: tensor(-999.9998, device='cuda:0')
episode: 230 training return: tensor(-999.9998, device='cuda:0')
episode: 231 training return: tensor(-999.9998, device='cuda:0')
epoch: 58 test_true_pfm: -0.5396908203139977 sim_pfm: -999.9762360056242
episode: 232 training return: tensor(-999.9998, device='cuda:0')
episode: 233 training return: tensor(-999.9999, device='cuda:0')
episode: 234 training return: tensor(-999.9998, device='cuda:0')
episode: 235 training return: tensor(-999.9996, device='cuda:0')
epoch: 59 test_true_pfm: 0.00710818893003673 sim_pfm: -999.9764656027158
episode: 236 training return: tensor(-999.9996, device='cuda:0')
episode: 237 training return: tensor(-999.9995, device='cuda:0')
episode: 238 training return: tensor(-999.9998, device='cuda:0')
episode: 239 training return: tensor(-999.9998, device='cuda:0')
epoch: 60 test_true_pfm: -0.11680681386392315 sim_pfm: -999.976503431797
episode: 240 training return: tensor(-999.9999, device='cuda:0')
episode: 241 training return: tensor(-999.9998, device='cuda:0')
episode: 242 training return: tensor(-999.9998, device='cuda:0')
episode: 243 training return: tensor(-999.9997, device='cuda:0')
epoch: 61 test_true_pfm: -0.8384004221268878 sim_pfm: -999.9767592549324
episode: 244 training return: tensor(-999.9999, device='cuda:0')
episode: 245 training return: tensor(-999.9996, device='cuda:0')
episode: 246 training return: tensor(-999.9997, device='cuda:0')
episode: 247 training return: tensor(-999.9997, device='cuda:0')
epoch: 62 test_true_pfm: 0.38627659335338027 sim_pfm: -999.976514617602
episode: 248 training return: tensor(-999.9998, device='cuda:0')
episode: 249 training return: tensor(-999.9997, device='cuda:0')
episode: 250 training return: tensor(-999.9998, device='cuda:0')
episode: 251 training return: tensor(-999.9998, device='cuda:0')
epoch: 63 test_true_pfm: -0.4600642393357764 sim_pfm: -999.9769165317217
episode: 252 training return: tensor(-999.9997, device='cuda:0')
episode: 253 training return: tensor(-999.9998, device='cuda:0')
episode: 254 training return: tensor(-999.9999, device='cuda:0')
episode: 255 training return: tensor(-999.9996, device='cuda:0')
epoch: 64 test_true_pfm: -0.14960820561556873 sim_pfm: -999.9766479730606
episode: 256 training return: tensor(-999.9998, device='cuda:0')
episode: 257 training return: tensor(-999.9996, device='cuda:0')
episode: 258 training return: tensor(-999.9997, device='cuda:0')
episode: 259 training return: tensor(-999.9997, device='cuda:0')
epoch: 65 test_true_pfm: -0.4917303822750387 sim_pfm: -999.9767410755157
episode: 260 training return: tensor(-999.9997, device='cuda:0')
episode: 261 training return: tensor(-999.9998, device='cuda:0')
episode: 262 training return: tensor(-999.9998, device='cuda:0')
episode: 263 training return: tensor(-999.9997, device='cuda:0')
epoch: 66 test_true_pfm: -0.5692784293611174 sim_pfm: -999.9769701560339
episode: 264 training return: tensor(-999.9998, device='cuda:0')
episode: 265 training return: tensor(-999.9998, device='cuda:0')
episode: 266 training return: tensor(-999.9995, device='cuda:0')
episode: 267 training return: tensor(-999.9999, device='cuda:0')
epoch: 67 test_true_pfm: -0.07197668832316116 sim_pfm: -999.9768626093864
episode: 268 training return: tensor(-999.9996, device='cuda:0')
episode: 269 training return: tensor(-999.9998, device='cuda:0')
episode: 270 training return: tensor(-999.9999, device='cuda:0')
episode: 271 training return: tensor(-999.9997, device='cuda:0')
epoch: 68 test_true_pfm: 0.04281642087700251 sim_pfm: -999.9763097564379
episode: 272 training return: tensor(-999.9998, device='cuda:0')
episode: 273 training return: tensor(-999.9997, device='cuda:0')
episode: 274 training return: tensor(-999.9999, device='cuda:0')
episode: 275 training return: tensor(-999.9999, device='cuda:0')
epoch: 69 test_true_pfm: -0.19582315149296825 sim_pfm: -999.9768039186796
episode: 276 training return: tensor(-999.9999, device='cuda:0')
episode: 277 training return: tensor(-999.9998, device='cuda:0')
episode: 278 training return: tensor(-999.9998, device='cuda:0')
episode: 279 training return: tensor(-999.9996, device='cuda:0')
epoch: 70 test_true_pfm: -0.08736703120612682 sim_pfm: -999.9767088294029
episode: 280 training return: tensor(-999.9999, device='cuda:0')
episode: 281 training return: tensor(-999.9996, device='cuda:0')
episode: 282 training return: tensor(-999.9998, device='cuda:0')
episode: 283 training return: tensor(-999.9998, device='cuda:0')
epoch: 71 test_true_pfm: -0.2331972355853381 sim_pfm: -999.9768371383349
episode: 284 training return: tensor(-999.9996, device='cuda:0')
episode: 285 training return: tensor(-999.9996, device='cuda:0')
episode: 286 training return: tensor(-999.9999, device='cuda:0')
episode: 287 training return: tensor(-999.9999, device='cuda:0')
epoch: 72 test_true_pfm: 0.030262865356968056 sim_pfm: -999.976979692777
episode: 288 training return: tensor(-999.9999, device='cuda:0')
episode: 289 training return: tensor(-999.9997, device='cuda:0')
episode: 290 training return: tensor(-999.9998, device='cuda:0')
episode: 291 training return: tensor(-999.9998, device='cuda:0')
epoch: 73 test_true_pfm: -0.3005733274826003 sim_pfm: -999.9764637947083
episode: 292 training return: tensor(-999.9998, device='cuda:0')
episode: 293 training return: tensor(-999.9996, device='cuda:0')
episode: 294 training return: tensor(-999.9996, device='cuda:0')
episode: 295 training return: tensor(-999.9997, device='cuda:0')
epoch: 74 test_true_pfm: 0.29265678053897554 sim_pfm: -999.9768635630608
episode: 296 training return: tensor(-999.9999, device='cuda:0')
episode: 297 training return: tensor(-999.9998, device='cuda:0')
episode: 298 training return: tensor(-999.9998, device='cuda:0')
episode: 299 training return: tensor(-999.9995, device='cuda:0')
epoch: 75 test_true_pfm: -0.1783772477216918 sim_pfm: -999.9766136805216
episode: 300 training return: tensor(-999.9998, device='cuda:0')
episode: 301 training return: tensor(-999.9998, device='cuda:0')
episode: 302 training return: tensor(-999.9998, device='cuda:0')
episode: 303 training return: tensor(-999.9997, device='cuda:0')
epoch: 76 test_true_pfm: -0.6227145291996918 sim_pfm: -999.9767839511236
episode: 304 training return: tensor(-999.9997, device='cuda:0')
episode: 305 training return: tensor(-999.9998, device='cuda:0')
episode: 306 training return: tensor(-999.9997, device='cuda:0')
episode: 307 training return: tensor(-999.9996, device='cuda:0')
epoch: 77 test_true_pfm: -0.3195614467895389 sim_pfm: -999.9764537215233
episode: 308 training return: tensor(-999.9999, device='cuda:0')
episode: 309 training return: tensor(-999.9997, device='cuda:0')
episode: 310 training return: tensor(-999.9998, device='cuda:0')
episode: 311 training return: tensor(-999.9998, device='cuda:0')
epoch: 78 test_true_pfm: -0.03756223592661486 sim_pfm: -999.9768500328064
episode: 312 training return: tensor(-999.9998, device='cuda:0')
episode: 313 training return: tensor(-999.9996, device='cuda:0')
episode: 314 training return: tensor(-999.9997, device='cuda:0')
episode: 315 training return: tensor(-999.9997, device='cuda:0')
epoch: 79 test_true_pfm: -0.9608383055614943 sim_pfm: -999.9766696890196
episode: 316 training return: tensor(-999.9998, device='cuda:0')
episode: 317 training return: tensor(-999.9996, device='cuda:0')
episode: 318 training return: tensor(-999.9999, device='cuda:0')
episode: 319 training return: tensor(-999.9998, device='cuda:0')
epoch: 80 test_true_pfm: -0.15702243682390069 sim_pfm: -999.9770999352137
episode: 320 training return: tensor(-999.9998, device='cuda:0')
episode: 321 training return: tensor(-999.9998, device='cuda:0')
episode: 322 training return: tensor(-999.9998, device='cuda:0')
episode: 323 training return: tensor(-999.9999, device='cuda:0')
epoch: 81 test_true_pfm: -0.281578522881008 sim_pfm: -999.9772028326988
episode: 324 training return: tensor(-999.9999, device='cuda:0')
episode: 325 training return: tensor(-999.9998, device='cuda:0')
episode: 326 training return: tensor(-999.9998, device='cuda:0')
episode: 327 training return: tensor(-999.9996, device='cuda:0')
epoch: 82 test_true_pfm: -0.8213696232924456 sim_pfm: -999.9767204324404
episode: 328 training return: tensor(-999.9996, device='cuda:0')
episode: 329 training return: tensor(-999.9996, device='cuda:0')
episode: 330 training return: tensor(-999.9999, device='cuda:0')
episode: 331 training return: tensor(-999.9999, device='cuda:0')
epoch: 83 test_true_pfm: -0.6648198833199208 sim_pfm: -999.9765203197798
episode: 332 training return: tensor(-999.9997, device='cuda:0')
episode: 333 training return: tensor(-999.9998, device='cuda:0')
episode: 334 training return: tensor(-999.9999, device='cuda:0')
episode: 335 training return: tensor(-999.9998, device='cuda:0')
epoch: 84 test_true_pfm: -1.1140356448123272 sim_pfm: -999.9773833950361
episode: 336 training return: tensor(-999.9995, device='cuda:0')
episode: 337 training return: tensor(-999.9998, device='cuda:0')
episode: 338 training return: tensor(-999.9996, device='cuda:0')
episode: 339 training return: tensor(-999.9998, device='cuda:0')
epoch: 85 test_true_pfm: 0.05090951841668586 sim_pfm: -999.9764610330263
episode: 340 training return: tensor(-999.9999, device='cuda:0')
episode: 341 training return: tensor(-999.9999, device='cuda:0')
episode: 342 training return: tensor(-999.9999, device='cuda:0')
episode: 343 training return: tensor(-999.9998, device='cuda:0')
epoch: 86 test_true_pfm: -0.006246888277702071 sim_pfm: -999.9765734473864
episode: 344 training return: tensor(-999.9998, device='cuda:0')
episode: 345 training return: tensor(-999.9998, device='cuda:0')
episode: 346 training return: tensor(-999.9998, device='cuda:0')
episode: 347 training return: tensor(-999.9998, device='cuda:0')
epoch: 87 test_true_pfm: -0.8192345182761173 sim_pfm: -999.9766481518745
episode: 348 training return: tensor(-999.9979, device='cuda:0')
episode: 349 training return: tensor(-999.9997, device='cuda:0')
episode: 350 training return: tensor(-999.9996, device='cuda:0')
episode: 351 training return: tensor(-999.9998, device='cuda:0')
epoch: 88 test_true_pfm: -0.849358637880167 sim_pfm: -999.9773428837458
episode: 352 training return: tensor(-999.9998, device='cuda:0')
episode: 353 training return: tensor(-999.9997, device='cuda:0')
episode: 354 training return: tensor(-999.9999, device='cuda:0')
episode: 355 training return: tensor(-999.9998, device='cuda:0')
epoch: 89 test_true_pfm: 0.30796400066677926 sim_pfm: -999.9770032962164
episode: 356 training return: tensor(-999.9999, device='cuda:0')
episode: 357 training return: tensor(-999.9998, device='cuda:0')
episode: 358 training return: tensor(-999.9997, device='cuda:0')
episode: 359 training return: tensor(-999.9994, device='cuda:0')
epoch: 90 test_true_pfm: -0.8012398227007141 sim_pfm: -999.977159778277
episode: 360 training return: tensor(-999.9996, device='cuda:0')
episode: 361 training return: tensor(-999.9998, device='cuda:0')
episode: 362 training return: tensor(-999.9999, device='cuda:0')
episode: 363 training return: tensor(-999.9996, device='cuda:0')
epoch: 91 test_true_pfm: -0.42313126361929404 sim_pfm: -999.9773954749107
episode: 364 training return: tensor(-999.9998, device='cuda:0')
episode: 365 training return: tensor(-999.9996, device='cuda:0')
episode: 366 training return: tensor(-999.9998, device='cuda:0')
episode: 367 training return: tensor(-999.9998, device='cuda:0')
epoch: 92 test_true_pfm: -0.24366180786661049 sim_pfm: -999.9773802161217
episode: 368 training return: tensor(-999.9998, device='cuda:0')
episode: 369 training return: tensor(-999.9999, device='cuda:0')
episode: 370 training return: tensor(-999.9999, device='cuda:0')
episode: 371 training return: tensor(-999.9996, device='cuda:0')
epoch: 93 test_true_pfm: -0.09866916483757464 sim_pfm: -999.9771503011385
episode: 372 training return: tensor(-999.9998, device='cuda:0')
episode: 373 training return: tensor(-999.9996, device='cuda:0')
episode: 374 training return: tensor(-999.9999, device='cuda:0')
episode: 375 training return: tensor(-999.9998, device='cuda:0')
epoch: 94 test_true_pfm: -0.9622531225201486 sim_pfm: -999.9768311580023
episode: 376 training return: tensor(-999.9996, device='cuda:0')
episode: 377 training return: tensor(-999.9998, device='cuda:0')
episode: 378 training return: tensor(-999.9999, device='cuda:0')
episode: 379 training return: tensor(-999.9999, device='cuda:0')
epoch: 95 test_true_pfm: -0.8459290651869774 sim_pfm: -999.9773864944776
episode: 380 training return: tensor(-999.9997, device='cuda:0')
episode: 381 training return: tensor(-999.9996, device='cuda:0')
episode: 382 training return: tensor(-999.9997, device='cuda:0')
episode: 383 training return: tensor(-999.9998, device='cuda:0')
epoch: 96 test_true_pfm: -0.6743821207088171 sim_pfm: -999.9769679307938
episode: 384 training return: tensor(-999.9996, device='cuda:0')
episode: 385 training return: tensor(-999.9998, device='cuda:0')
episode: 386 training return: tensor(-999.9999, device='cuda:0')
episode: 387 training return: tensor(-999.9998, device='cuda:0')
epoch: 97 test_true_pfm: -0.7077533827595567 sim_pfm: -999.9770403504372
episode: 388 training return: tensor(-999.9999, device='cuda:0')
episode: 389 training return: tensor(-999.9999, device='cuda:0')
episode: 390 training return: tensor(-999.9998, device='cuda:0')
episode: 391 training return: tensor(-999.9996, device='cuda:0')
epoch: 98 test_true_pfm: -0.28175908435967584 sim_pfm: -999.9766305486361
episode: 392 training return: tensor(-999.9998, device='cuda:0')
episode: 393 training return: tensor(-999.9997, device='cuda:0')
episode: 394 training return: tensor(-999.9996, device='cuda:0')
episode: 395 training return: tensor(-999.9994, device='cuda:0')
epoch: 99 test_true_pfm: -0.23668667478398017 sim_pfm: -999.9768549402555
episode: 396 training return: tensor(-999.9996, device='cuda:0')
episode: 397 training return: tensor(-999.9998, device='cuda:0')
episode: 398 training return: tensor(-999.9996, device='cuda:0')
episode: 399 training return: tensor(-999.9997, device='cuda:0')
epoch: 100 test_true_pfm: -0.48757892638783645 sim_pfm: -999.9772210121155
episode: 400 training return: tensor(-999.9996, device='cuda:0')
episode: 401 training return: tensor(-999.9997, device='cuda:0')
episode: 402 training return: tensor(-999.9998, device='cuda:0')
episode: 403 training return: tensor(-999.9993, device='cuda:0')
epoch: 101 test_true_pfm: -0.5686904963241121 sim_pfm: -999.9774341980616
episode: 404 training return: tensor(-999.9998, device='cuda:0')
episode: 405 training return: tensor(-999.9998, device='cuda:0')
episode: 406 training return: tensor(-999.9999, device='cuda:0')
episode: 407 training return: tensor(-999.9998, device='cuda:0')
epoch: 102 test_true_pfm: -0.06971323185149603 sim_pfm: -999.9773270686468
episode: 408 training return: tensor(-999.9999, device='cuda:0')
episode: 409 training return: tensor(-999.9995, device='cuda:0')
episode: 410 training return: tensor(-999.9996, device='cuda:0')
episode: 411 training return: tensor(-999.9997, device='cuda:0')
epoch: 103 test_true_pfm: -0.030217720731397574 sim_pfm: -999.9770478010178
episode: 412 training return: tensor(-999.9998, device='cuda:0')
episode: 413 training return: tensor(-999.9998, device='cuda:0')
episode: 414 training return: tensor(-999.9999, device='cuda:0')
episode: 415 training return: tensor(-999.9998, device='cuda:0')
epoch: 104 test_true_pfm: -0.2955942055631145 sim_pfm: -999.9770348270735
episode: 416 training return: tensor(-999.9996, device='cuda:0')
episode: 417 training return: tensor(-999.9998, device='cuda:0')
episode: 418 training return: tensor(-999.9997, device='cuda:0')
episode: 419 training return: tensor(-999.9998, device='cuda:0')
epoch: 105 test_true_pfm: -1.0623216548687417 sim_pfm: -999.9766559203466
episode: 420 training return: tensor(-999.9998, device='cuda:0')
episode: 421 training return: tensor(-999.9997, device='cuda:0')
episode: 422 training return: tensor(-999.9995, device='cuda:0')
episode: 423 training return: tensor(-999.9998, device='cuda:0')
epoch: 106 test_true_pfm: -0.9119679421266905 sim_pfm: -999.9763072133064
episode: 424 training return: tensor(-999.9998, device='cuda:0')
episode: 425 training return: tensor(-999.9999, device='cuda:0')
episode: 426 training return: tensor(-999.9997, device='cuda:0')
episode: 427 training return: tensor(-999.9996, device='cuda:0')
epoch: 107 test_true_pfm: -0.3936305887573502 sim_pfm: -999.9763015707334
episode: 428 training return: tensor(-999.9998, device='cuda:0')
episode: 429 training return: tensor(-999.9998, device='cuda:0')
episode: 430 training return: tensor(-999.9996, device='cuda:0')
episode: 431 training return: tensor(-999.9997, device='cuda:0')
epoch: 108 test_true_pfm: -0.6432225518858286 sim_pfm: -999.9767360885938
episode: 432 training return: tensor(-999.9996, device='cuda:0')
episode: 433 training return: tensor(-999.9997, device='cuda:0')
episode: 434 training return: tensor(-999.9998, device='cuda:0')
episode: 435 training return: tensor(-999.9996, device='cuda:0')
epoch: 109 test_true_pfm: -0.20682244142969006 sim_pfm: -999.9773684541384
episode: 436 training return: tensor(-999.9998, device='cuda:0')
episode: 437 training return: tensor(-999.9997, device='cuda:0')
episode: 438 training return: tensor(-999.9997, device='cuda:0')
episode: 439 training return: tensor(-999.9996, device='cuda:0')
epoch: 110 test_true_pfm: -0.10190316339407983 sim_pfm: -999.9773484667143
episode: 440 training return: tensor(-999.9997, device='cuda:0')
episode: 441 training return: tensor(-999.9997, device='cuda:0')
episode: 442 training return: tensor(-999.9997, device='cuda:0')
episode: 443 training return: tensor(-999.9999, device='cuda:0')
epoch: 111 test_true_pfm: 0.5573588302819038 sim_pfm: -999.9774292906126
episode: 444 training return: tensor(-999.9997, device='cuda:0')
episode: 445 training return: tensor(-999.9996, device='cuda:0')
episode: 446 training return: tensor(-999.9998, device='cuda:0')
episode: 447 training return: tensor(-999.9998, device='cuda:0')
epoch: 112 test_true_pfm: -0.24201141213334412 sim_pfm: -999.9768228729566
episode: 448 training return: tensor(-999.9995, device='cuda:0')
episode: 449 training return: tensor(-999.9999, device='cuda:0')
episode: 450 training return: tensor(-999.9996, device='cuda:0')
episode: 451 training return: tensor(-999.9999, device='cuda:0')
epoch: 113 test_true_pfm: -0.028561366946878997 sim_pfm: -999.9771969715754
episode: 452 training return: tensor(-999.9998, device='cuda:0')
episode: 453 training return: tensor(-999.9998, device='cuda:0')
episode: 454 training return: tensor(-999.9997, device='cuda:0')
episode: 455 training return: tensor(-999.9996, device='cuda:0')
epoch: 114 test_true_pfm: -0.4866963210401359 sim_pfm: -999.9770638545355
episode: 456 training return: tensor(-999.9998, device='cuda:0')
episode: 457 training return: tensor(-999.9997, device='cuda:0')
episode: 458 training return: tensor(-999.9997, device='cuda:0')
episode: 459 training return: tensor(-999.9999, device='cuda:0')
epoch: 115 test_true_pfm: -0.40149588226452243 sim_pfm: -999.9768620928129
episode: 460 training return: tensor(-999.9998, device='cuda:0')
episode: 461 training return: tensor(-999.9995, device='cuda:0')
episode: 462 training return: tensor(-999.9998, device='cuda:0')
episode: 463 training return: tensor(-999.9997, device='cuda:0')
epoch: 116 test_true_pfm: -0.25731016800739237 sim_pfm: -999.9766325553259
episode: 464 training return: tensor(-999.9995, device='cuda:0')
episode: 465 training return: tensor(-999.9998, device='cuda:0')
episode: 466 training return: tensor(-999.9998, device='cuda:0')
episode: 467 training return: tensor(-999.9993, device='cuda:0')
epoch: 117 test_true_pfm: 0.6860249938627573 sim_pfm: -999.9771741628647
episode: 468 training return: tensor(-999.9996, device='cuda:0')
episode: 469 training return: tensor(-999.9995, device='cuda:0')
episode: 470 training return: tensor(-999.9998, device='cuda:0')
episode: 471 training return: tensor(-999.9998, device='cuda:0')
epoch: 118 test_true_pfm: -0.15466403839751164 sim_pfm: -999.9768264691035
episode: 472 training return: tensor(-999.9996, device='cuda:0')
episode: 473 training return: tensor(-999.9998, device='cuda:0')
episode: 474 training return: tensor(-999.9998, device='cuda:0')
episode: 475 training return: tensor(-999.9993, device='cuda:0')
epoch: 119 test_true_pfm: -0.6939588706812656 sim_pfm: -999.9772408604622
episode: 476 training return: tensor(-999.9998, device='cuda:0')
episode: 477 training return: tensor(-999.9999, device='cuda:0')
episode: 478 training return: tensor(-999.9999, device='cuda:0')
episode: 479 training return: tensor(-999.9996, device='cuda:0')
epoch: 120 test_true_pfm: -0.8667501866475176 sim_pfm: -999.9769021272659
episode: 480 training return: tensor(-999.9997, device='cuda:0')
episode: 481 training return: tensor(-999.9998, device='cuda:0')
episode: 482 training return: tensor(-999.9999, device='cuda:0')
episode: 483 training return: tensor(-999.9997, device='cuda:0')
epoch: 121 test_true_pfm: -0.9490429762802305 sim_pfm: -999.9764506816864
episode: 484 training return: tensor(-999.9996, device='cuda:0')
episode: 485 training return: tensor(-999.9998, device='cuda:0')
episode: 486 training return: tensor(-999.9999, device='cuda:0')
episode: 487 training return: tensor(-999.9990, device='cuda:0')
epoch: 122 test_true_pfm: 0.026641879747617957 sim_pfm: -999.9769071141878
episode: 488 training return: tensor(-999.9997, device='cuda:0')
episode: 489 training return: tensor(-999.9995, device='cuda:0')
episode: 490 training return: tensor(-999.9997, device='cuda:0')
episode: 491 training return: tensor(-999.9998, device='cuda:0')
epoch: 123 test_true_pfm: -0.652060488490166 sim_pfm: -999.9763041933378
episode: 492 training return: tensor(-999.9999, device='cuda:0')
episode: 493 training return: tensor(-999.9997, device='cuda:0')
episode: 494 training return: tensor(-999.9997, device='cuda:0')
episode: 495 training return: tensor(-999.9995, device='cuda:0')
epoch: 124 test_true_pfm: -0.4799373072258197 sim_pfm: -999.9766696095467
episode: 496 training return: tensor(-999.9997, device='cuda:0')
episode: 497 training return: tensor(-999.9998, device='cuda:0')
episode: 498 training return: tensor(-999.9998, device='cuda:0')
episode: 499 training return: tensor(-999.9998, device='cuda:0')
epoch: 125 test_true_pfm: -0.4485814670940744 sim_pfm: -999.9766682982445
episode: 500 training return: tensor(-999.9998, device='cuda:0')
episode: 501 training return: tensor(-999.9998, device='cuda:0')
episode: 502 training return: tensor(-999.9998, device='cuda:0')
episode: 503 training return: tensor(-999.9996, device='cuda:0')
epoch: 126 test_true_pfm: -0.47341898434938817 sim_pfm: -999.9769109686216
episode: 504 training return: tensor(-999.9997, device='cuda:0')
episode: 505 training return: tensor(-999.9997, device='cuda:0')
episode: 506 training return: tensor(-999.9996, device='cuda:0')
episode: 507 training return: tensor(-999.9996, device='cuda:0')
epoch: 127 test_true_pfm: -0.13333398185584575 sim_pfm: -999.9772425889969
episode: 508 training return: tensor(-999.9998, device='cuda:0')
episode: 509 training return: tensor(-999.9995, device='cuda:0')
episode: 510 training return: tensor(-999.9996, device='cuda:0')
episode: 511 training return: tensor(-999.9999, device='cuda:0')
epoch: 128 test_true_pfm: -0.01780175604829029 sim_pfm: -999.9767313599586
episode: 512 training return: tensor(-999.9997, device='cuda:0')
episode: 513 training return: tensor(-999.9998, device='cuda:0')
episode: 514 training return: tensor(-999.9995, device='cuda:0')
episode: 515 training return: tensor(-999.9998, device='cuda:0')
epoch: 129 test_true_pfm: -0.12108819779615004 sim_pfm: -999.9770865241686
episode: 516 training return: tensor(-999.9998, device='cuda:0')
episode: 517 training return: tensor(-999.9998, device='cuda:0')
episode: 518 training return: tensor(-999.9998, device='cuda:0')
episode: 519 training return: tensor(-999.9997, device='cuda:0')
epoch: 130 test_true_pfm: -0.282144541758301 sim_pfm: -999.976668616136
episode: 520 training return: tensor(-999.9998, device='cuda:0')
episode: 521 training return: tensor(-999.9998, device='cuda:0')
episode: 522 training return: tensor(-999.9998, device='cuda:0')
episode: 523 training return: tensor(-999.9998, device='cuda:0')
epoch: 131 test_true_pfm: -0.07902896731405502 sim_pfm: -999.9770890076956
episode: 524 training return: tensor(-999.9999, device='cuda:0')
episode: 525 training return: tensor(-999.9998, device='cuda:0')
episode: 526 training return: tensor(-999.9998, device='cuda:0')
episode: 527 training return: tensor(-999.9999, device='cuda:0')
epoch: 132 test_true_pfm: -0.323772115069187 sim_pfm: -999.9762579600016
episode: 528 training return: tensor(-999.9999, device='cuda:0')
episode: 529 training return: tensor(-999.9998, device='cuda:0')
episode: 530 training return: tensor(-999.9997, device='cuda:0')
episode: 531 training return: tensor(-999.9998, device='cuda:0')
epoch: 133 test_true_pfm: -0.2232648777923085 sim_pfm: -999.9769260485967
episode: 532 training return: tensor(-999.9999, device='cuda:0')
episode: 533 training return: tensor(-999.9998, device='cuda:0')
episode: 534 training return: tensor(-999.9996, device='cuda:0')
episode: 535 training return: tensor(-999.9999, device='cuda:0')
epoch: 134 test_true_pfm: -0.6157951842099281 sim_pfm: -999.9759897589684
episode: 536 training return: tensor(-999.9997, device='cuda:0')
episode: 537 training return: tensor(-999.9997, device='cuda:0')
episode: 538 training return: tensor(-999.9998, device='cuda:0')
episode: 539 training return: tensor(-999.9996, device='cuda:0')
epoch: 135 test_true_pfm: -0.8124709562471861 sim_pfm: -999.977421104908
episode: 540 training return: tensor(-999.9999, device='cuda:0')
episode: 541 training return: tensor(-999.9997, device='cuda:0')
episode: 542 training return: tensor(-999.9999, device='cuda:0')
episode: 543 training return: tensor(-999.9998, device='cuda:0')
epoch: 136 test_true_pfm: -0.19073993082355956 sim_pfm: -999.9771700501442
episode: 544 training return: tensor(-999.9999, device='cuda:0')
episode: 545 training return: tensor(-999.9999, device='cuda:0')
episode: 546 training return: tensor(-999.9998, device='cuda:0')
episode: 547 training return: tensor(-999.9996, device='cuda:0')
epoch: 137 test_true_pfm: -0.2629706128426919 sim_pfm: -999.9769811828932
episode: 548 training return: tensor(-999.9998, device='cuda:0')
episode: 549 training return: tensor(-999.9998, device='cuda:0')
episode: 550 training return: tensor(-999.9999, device='cuda:0')
episode: 551 training return: tensor(-999.9994, device='cuda:0')
epoch: 138 test_true_pfm: -0.2472567284636301 sim_pfm: -999.9768757621447
episode: 552 training return: tensor(-999.9998, device='cuda:0')
episode: 553 training return: tensor(-999.9998, device='cuda:0')
episode: 554 training return: tensor(-999.9998, device='cuda:0')
episode: 555 training return: tensor(-999.9997, device='cuda:0')
epoch: 139 test_true_pfm: -0.39767314551632554 sim_pfm: -999.9774240454038
episode: 556 training return: tensor(-999.9996, device='cuda:0')
episode: 557 training return: tensor(-999.9998, device='cuda:0')
episode: 558 training return: tensor(-999.9999, device='cuda:0')
episode: 559 training return: tensor(-999.9996, device='cuda:0')
epoch: 140 test_true_pfm: 0.14667680217319554 sim_pfm: -999.9773328900337
episode: 560 training return: tensor(-999.9998, device='cuda:0')
episode: 561 training return: tensor(-999.9998, device='cuda:0')
episode: 562 training return: tensor(-999.9997, device='cuda:0')
episode: 563 training return: tensor(-999.9997, device='cuda:0')
epoch: 141 test_true_pfm: -0.3639015187037587 sim_pfm: -999.9769372145335
episode: 564 training return: tensor(-999.9997, device='cuda:0')
episode: 565 training return: tensor(-999.9996, device='cuda:0')
episode: 566 training return: tensor(-999.9999, device='cuda:0')
episode: 567 training return: tensor(-999.9999, device='cuda:0')
epoch: 142 test_true_pfm: 0.4111405790979054 sim_pfm: -999.976552605629
episode: 568 training return: tensor(-999.9996, device='cuda:0')
episode: 569 training return: tensor(-999.9995, device='cuda:0')
episode: 570 training return: tensor(-999.9998, device='cuda:0')
episode: 571 training return: tensor(-999.9999, device='cuda:0')
epoch: 143 test_true_pfm: -0.33437669427117517 sim_pfm: -999.9765284856161
episode: 572 training return: tensor(-999.9997, device='cuda:0')
episode: 573 training return: tensor(-999.9997, device='cuda:0')
episode: 574 training return: tensor(-999.9998, device='cuda:0')
episode: 575 training return: tensor(-999.9998, device='cuda:0')
epoch: 144 test_true_pfm: -0.022417064288882682 sim_pfm: -999.976777891318
episode: 576 training return: tensor(-999.9998, device='cuda:0')
episode: 577 training return: tensor(-999.9998, device='cuda:0')
episode: 578 training return: tensor(-999.9997, device='cuda:0')
episode: 579 training return: tensor(-999.9985, device='cuda:0')
epoch: 145 test_true_pfm: -0.10575948242257832 sim_pfm: -999.9764772057533
episode: 580 training return: tensor(-999.9998, device='cuda:0')
episode: 581 training return: tensor(-999.9997, device='cuda:0')
episode: 582 training return: tensor(-999.9998, device='cuda:0')
episode: 583 training return: tensor(-999.9994, device='cuda:0')
epoch: 146 test_true_pfm: -0.5152086605679905 sim_pfm: -999.9771645863851
episode: 584 training return: tensor(-999.9998, device='cuda:0')
episode: 585 training return: tensor(-999.9999, device='cuda:0')
episode: 586 training return: tensor(-999.9998, device='cuda:0')
episode: 587 training return: tensor(-999.9998, device='cuda:0')
epoch: 147 test_true_pfm: 0.22678694040525427 sim_pfm: -999.9769560893377
episode: 588 training return: tensor(-999.9997, device='cuda:0')
episode: 589 training return: tensor(-999.9999, device='cuda:0')
episode: 590 training return: tensor(-999.9998, device='cuda:0')
episode: 591 training return: tensor(-999.9997, device='cuda:0')
epoch: 148 test_true_pfm: -0.24908498616856903 sim_pfm: -999.9771787524223
episode: 592 training return: tensor(-999.9995, device='cuda:0')
episode: 593 training return: tensor(-999.9998, device='cuda:0')
episode: 594 training return: tensor(-999.9996, device='cuda:0')
episode: 595 training return: tensor(-999.9996, device='cuda:0')
epoch: 149 test_true_pfm: -0.2417256236166848 sim_pfm: -999.9768492976824
episode: 596 training return: tensor(-999.9998, device='cuda:0')
episode: 597 training return: tensor(-999.9999, device='cuda:0')
episode: 598 training return: tensor(-999.9997, device='cuda:0')
episode: 599 training return: tensor(-999.9997, device='cuda:0')
epoch: 150 test_true_pfm: -0.02158950518273756 sim_pfm: -999.9764330387115
