['--alg', 'ddpg', '--env', 'Swimmer-v3', '--learn', 'uncertainty', '--traj', 'expert', '--seed', '0', '--data', '100000']
episode: 0 training return: -1488.8641506938297
episode: 1 training return: -1387.0164982371111
episode: 2 training return: -1002.6565134966038
episode: 3 training return: -738.3271221227219
epoch: 1 test_true_pfm: 26.736445713041075 sim_pfm: -1460.382893713317
episode: 4 training return: -1350.3032356084063
episode: 5 training return: -1173.6181749517546
episode: 6 training return: -1284.9058351451063
episode: 7 training return: -1506.1998604921225
epoch: 2 test_true_pfm: 24.980404693599535 sim_pfm: -1581.9240544878717
episode: 8 training return: -1071.87400042532
episode: 9 training return: -1363.0477366322584
episode: 10 training return: -517.6985701886086
episode: 11 training return: -497.96641859431156
epoch: 3 test_true_pfm: 8.581763367327733 sim_pfm: -667.8238335760332
episode: 12 training return: -1089.0957193121674
episode: 13 training return: -117.40754366792646
episode: 14 training return: -279.990594652214
episode: 15 training return: -411.61087565544966
epoch: 4 test_true_pfm: -1.7625894914843934 sim_pfm: -148.0315818367555
episode: 16 training return: -768.5371625153088
episode: 17 training return: -762.2415535270774
episode: 18 training return: -246.56271021250777
episode: 19 training return: 0.47974564452407664
epoch: 5 test_true_pfm: -53.64234623184649 sim_pfm: 311.2468120019744
episode: 20 training return: 219.65710500842118
episode: 21 training return: 307.7867538944266
episode: 22 training return: 65.70066873907439
episode: 23 training return: 547.4237668108357
epoch: 6 test_true_pfm: 8.03269743715238 sim_pfm: 674.0978347671282
episode: 24 training return: 390.99990914625124
episode: 25 training return: 72.6245502391887
episode: 26 training return: -1.5698135843665522
episode: 27 training return: 457.3983178964814
epoch: 7 test_true_pfm: 82.3943783790146 sim_pfm: -1.4075450500638815
episode: 28 training return: 439.11043969435883
episode: 29 training return: 505.13224892221547
episode: 30 training return: 535.5918594838208
episode: 31 training return: 515.5126393174016
epoch: 8 test_true_pfm: 15.619883038552885 sim_pfm: 655.97813315053
episode: 32 training return: 689.3270845098497
episode: 33 training return: 487.2640059363597
episode: 34 training return: 671.6792253757956
episode: 35 training return: 633.0871193340668
epoch: 9 test_true_pfm: 3.6159922348396307 sim_pfm: 614.752717000604
episode: 36 training return: 305.2481810549333
episode: 37 training return: 787.4317231234519
episode: 38 training return: 776.867828669888
episode: 39 training return: 738.4912565493532
epoch: 10 test_true_pfm: 2.491528658324864 sim_pfm: 787.6342723495986
episode: 40 training return: 723.5674869247839
episode: 41 training return: 761.4128212305418
episode: 42 training return: 759.9843725112823
episode: 43 training return: 798.5767654099847
epoch: 11 test_true_pfm: -11.064329364966568 sim_pfm: 775.348682440354
episode: 44 training return: 707.2123100020867
episode: 45 training return: 773.615222322694
episode: 46 training return: 784.4118391749566
episode: 47 training return: 771.6411833296102
epoch: 12 test_true_pfm: -10.688422430235075 sim_pfm: 884.9861054861615
episode: 48 training return: 719.8821710599993
episode: 49 training return: 788.0836658106795
episode: 50 training return: 795.0645798890429
episode: 51 training return: 776.0698448375961
epoch: 13 test_true_pfm: -15.256786680643836 sim_pfm: 848.6033296237143
episode: 52 training return: 773.1613959754342
episode: 53 training return: 700.0331616098284
episode: 54 training return: 819.1508312224806
episode: 55 training return: 798.144007560689
epoch: 14 test_true_pfm: -11.449322064062846 sim_pfm: 880.8593134459736
episode: 56 training return: 796.3898968777506
episode: 57 training return: 774.1661447047773
episode: 58 training return: 815.53921735317
episode: 59 training return: 796.409139567656
epoch: 15 test_true_pfm: -9.680577796160946 sim_pfm: 871.630446024665
episode: 60 training return: 741.8484848631813
episode: 61 training return: 807.7217093348306
episode: 62 training return: 802.0392628472368
episode: 63 training return: 767.5534085486543
epoch: 16 test_true_pfm: -17.218670742641756 sim_pfm: 870.3733162287892
episode: 64 training return: 795.3866507289844
episode: 65 training return: 773.6922203244893
episode: 66 training return: 752.6978024155966
episode: 67 training return: 682.5001275166132
epoch: 17 test_true_pfm: -13.576719953316484 sim_pfm: 838.8774212039827
episode: 68 training return: 699.738681387234
episode: 69 training return: 742.8532101631514
episode: 70 training return: 703.0979959360315
episode: 71 training return: 798.3927748790333
epoch: 18 test_true_pfm: -20.47949269109686 sim_pfm: 851.3314112030026
episode: 72 training return: 682.5875735519227
episode: 73 training return: 697.7032686460556
episode: 74 training return: 749.8655277233078
episode: 75 training return: 775.7029093424168
epoch: 19 test_true_pfm: -19.699099988253977 sim_pfm: 871.1485412292956
episode: 76 training return: 842.293314280741
episode: 77 training return: 744.447722132007
episode: 78 training return: 791.5193328310146
episode: 79 training return: 789.7744072569344
epoch: 20 test_true_pfm: -8.221662200204918 sim_pfm: 917.3940255832891
episode: 80 training return: 812.7692333841081
episode: 81 training return: 790.9902153752087
episode: 82 training return: 824.6083433060124
episode: 83 training return: 829.7858571563688
epoch: 21 test_true_pfm: -15.043662115848397 sim_pfm: 879.5891050814132
episode: 84 training return: 820.2731893873311
episode: 85 training return: 796.6451020574074
episode: 86 training return: 816.8196880049662
episode: 87 training return: 824.0340280957823
epoch: 22 test_true_pfm: -18.132993516741436 sim_pfm: 900.8898337142466
episode: 88 training return: 754.1444670974723
episode: 89 training return: 801.8325237635914
episode: 90 training return: 811.6555084871131
episode: 91 training return: 746.64348360233
epoch: 23 test_true_pfm: -14.553839387808551 sim_pfm: 878.376174799736
episode: 92 training return: 765.3564303580907
episode: 93 training return: 785.7498783112567
episode: 94 training return: 761.1340880724874
episode: 95 training return: 743.78684373702
epoch: 24 test_true_pfm: -8.02145679141645 sim_pfm: 927.1636602464548
episode: 96 training return: 828.2640896607579
episode: 97 training return: 824.2553785865742
episode: 98 training return: 814.2304143323216
episode: 99 training return: 792.5579886309516
epoch: 25 test_true_pfm: -16.599906225817186 sim_pfm: 890.7722527245744
episode: 100 training return: 757.8100334271924
episode: 101 training return: 757.4924954726074
episode: 102 training return: 786.8513951643771
episode: 103 training return: 813.3722824665855
epoch: 26 test_true_pfm: 3.621210169585821 sim_pfm: 927.8758549297721
episode: 104 training return: 824.6212597760351
episode: 105 training return: 791.157102544722
episode: 106 training return: 750.6178849253253
episode: 107 training return: 815.1422135734975
epoch: 27 test_true_pfm: -14.535316924073788 sim_pfm: 881.4664849108918
episode: 108 training return: 814.1395501416662
episode: 109 training return: 774.5448941552468
episode: 110 training return: 799.5362431381147
episode: 111 training return: 761.9551943937124
epoch: 28 test_true_pfm: -6.09610296780169 sim_pfm: 894.7140690525459
episode: 112 training return: 799.1307790441924
episode: 113 training return: 790.2582978428243
episode: 114 training return: 735.2992909530237
episode: 115 training return: 457.17179434728115
epoch: 29 test_true_pfm: -0.2458759845397922 sim_pfm: 925.5129699262676
episode: 116 training return: 778.4413637560142
episode: 117 training return: 742.453464716328
episode: 118 training return: 813.2636004505465
episode: 119 training return: 796.9365434379156
epoch: 30 test_true_pfm: -12.111847921761482 sim_pfm: 861.7774332936722
episode: 120 training return: 699.3275229797259
episode: 121 training return: 710.30031771934
episode: 122 training return: 767.5864464749734
episode: 123 training return: 790.2215427660303
epoch: 31 test_true_pfm: 2.7697627571683525 sim_pfm: 887.7486566528976
episode: 124 training return: 769.3078796019846
episode: 125 training return: 754.4146865391218
episode: 126 training return: 817.0249836830458
episode: 127 training return: 825.9990787127251
epoch: 32 test_true_pfm: -7.48576984209936 sim_pfm: 923.752883291457
episode: 128 training return: 817.2312864210573
episode: 129 training return: 790.6268968465919
episode: 130 training return: 801.9227736911254
episode: 131 training return: 792.5201145005636
epoch: 33 test_true_pfm: 1.121258745763818 sim_pfm: 881.47457307359
episode: 132 training return: 793.1413298816087
episode: 133 training return: 779.6108087780856
episode: 134 training return: 786.7482085567264
episode: 135 training return: 785.8763973091097
epoch: 34 test_true_pfm: -28.42141120104255 sim_pfm: 873.7358976416074
episode: 136 training return: 806.9304526709286
episode: 137 training return: 794.0685157168075
episode: 138 training return: 825.1132253686916
episode: 139 training return: 772.8545951201428
epoch: 35 test_true_pfm: -24.78449098568054 sim_pfm: 913.0821604602941
episode: 140 training return: 806.1364301896314
episode: 141 training return: 805.0362224930487
episode: 142 training return: 797.3069051968573
episode: 143 training return: 771.1952786433055
epoch: 36 test_true_pfm: -15.063227944544963 sim_pfm: 875.5373230965063
episode: 144 training return: 800.5983251207883
episode: 145 training return: 767.7195989137239
episode: 146 training return: 768.7999272503741
episode: 147 training return: 766.983500074498
epoch: 37 test_true_pfm: 6.685902311405239 sim_pfm: 927.3501186742502
episode: 148 training return: 789.9754616497286
episode: 149 training return: 770.7177649140845
episode: 150 training return: 799.0174001225155
episode: 151 training return: 764.6013196586531
epoch: 38 test_true_pfm: 13.663980481801184 sim_pfm: 878.5929448421493
episode: 152 training return: 728.3009595200524
episode: 153 training return: 686.3718029758232
episode: 154 training return: 727.6568043638096
episode: 155 training return: 686.4124138503952
epoch: 39 test_true_pfm: 6.8339411915677015 sim_pfm: 916.1150996415612
episode: 156 training return: 640.4424339300986
episode: 157 training return: 584.7860280427819
episode: 158 training return: 660.7478158582283
episode: 159 training return: 598.4372665233373
epoch: 40 test_true_pfm: 4.114082551704561 sim_pfm: 882.100140939708
episode: 160 training return: 654.9160507230596
episode: 161 training return: 617.5665269488809
episode: 162 training return: 666.60040301889
episode: 163 training return: 640.2164269719029
epoch: 41 test_true_pfm: 9.494370774270239 sim_pfm: 911.0323749621099
episode: 164 training return: 657.8536551014014
episode: 165 training return: 674.3179125031061
episode: 166 training return: 704.1382927120427
episode: 167 training return: 591.3124519786237
epoch: 42 test_true_pfm: 4.680323067247123 sim_pfm: 914.6099743030094
episode: 168 training return: 585.8663521279772
episode: 169 training return: 561.974504922767
episode: 170 training return: 701.116965084323
episode: 171 training return: 709.3698673561817
epoch: 43 test_true_pfm: -2.095832547782131 sim_pfm: 916.5993842189386
episode: 172 training return: 630.9208194625514
episode: 173 training return: 484.65007122931144
episode: 174 training return: 658.3717983828951
episode: 175 training return: 509.9083729092343
epoch: 44 test_true_pfm: 5.365045398687122 sim_pfm: 892.609013684385
episode: 176 training return: 496.18824520865473
episode: 177 training return: 650.6521292345931
episode: 178 training return: 685.3696323918651
episode: 179 training return: 589.5166984471178
epoch: 45 test_true_pfm: 8.793318674347422 sim_pfm: 897.8383544795937
episode: 180 training return: 611.0332208143307
episode: 181 training return: 590.2848816085818
episode: 182 training return: 648.7782062993853
episode: 183 training return: 573.3993795100001
epoch: 46 test_true_pfm: 7.695550751209607 sim_pfm: 892.335030408523
episode: 184 training return: 622.3555017622431
episode: 185 training return: 555.4348470009185
episode: 186 training return: 532.872845954571
episode: 187 training return: 543.8828249376687
epoch: 47 test_true_pfm: 9.885509152195292 sim_pfm: 923.4852141665084
episode: 188 training return: 515.8982086256495
episode: 189 training return: 649.8430487727903
episode: 190 training return: 631.4019786195577
episode: 191 training return: 656.7285231503679
epoch: 48 test_true_pfm: 10.068650193203009 sim_pfm: 895.8211970160097
episode: 192 training return: 623.2957061784296
episode: 193 training return: 648.0338626023338
episode: 194 training return: 667.4732889442678
episode: 195 training return: 638.1537196695081
epoch: 49 test_true_pfm: 7.206036688044226 sim_pfm: 916.3220341768289
episode: 196 training return: 665.3086931013214
episode: 197 training return: 574.9053769016014
episode: 198 training return: 707.9801890541155
episode: 199 training return: 703.9356082985726
epoch: 50 test_true_pfm: 8.838854370197364 sim_pfm: 920.6564163644786
episode: 200 training return: 568.3525782600402
episode: 201 training return: 656.4933395922784
episode: 202 training return: 605.3376887931437
episode: 203 training return: 610.3536280329804
epoch: 51 test_true_pfm: 8.140446028989865 sim_pfm: 909.8627488666292
episode: 204 training return: 566.8501727212482
episode: 205 training return: 632.5112082612247
episode: 206 training return: 614.6254258614708
episode: 207 training return: 686.9657911512545
epoch: 52 test_true_pfm: 11.798158761313651 sim_pfm: 898.3861061798052
episode: 208 training return: 691.8024614278746
episode: 209 training return: 647.8079108932658
episode: 210 training return: 510.9253308741048
episode: 211 training return: 609.267909154968
epoch: 53 test_true_pfm: 0.5919673572224223 sim_pfm: 896.8414210282559
episode: 212 training return: 517.0405800273388
episode: 213 training return: 556.6697604112192
episode: 214 training return: 693.6860964485937
episode: 215 training return: 710.1682825995141
epoch: 54 test_true_pfm: 2.2030470991732374 sim_pfm: 869.2610354921974
episode: 216 training return: 655.740000244042
episode: 217 training return: 595.5174665468978
episode: 218 training return: 615.86444004708
episode: 219 training return: 535.5615018349984
epoch: 55 test_true_pfm: 7.746022390105095 sim_pfm: 891.6002741513341
episode: 220 training return: 524.9764691689379
episode: 221 training return: 677.0868344997359
episode: 222 training return: 626.4783123269681
episode: 223 training return: 636.3657381255193
epoch: 56 test_true_pfm: -4.840903743083544 sim_pfm: 910.5667158016656
episode: 224 training return: 547.2039015749639
episode: 225 training return: 611.0331952936497
episode: 226 training return: 604.0006425097368
episode: 227 training return: 645.6370651442043
epoch: 57 test_true_pfm: -4.051987313715143 sim_pfm: 871.8356760318572
episode: 228 training return: 649.5856910041965
episode: 229 training return: 538.5759598378871
episode: 230 training return: 606.1217150354656
episode: 231 training return: 628.32313344813
epoch: 58 test_true_pfm: 3.5281258865534495 sim_pfm: 894.5304853582195
episode: 232 training return: 692.1436371001242
episode: 233 training return: 665.9088628989517
episode: 234 training return: 658.6324429095761
episode: 235 training return: 635.5579223845751
epoch: 59 test_true_pfm: 2.6236564772962714 sim_pfm: 907.6412790431738
episode: 236 training return: 644.2633448653704
episode: 237 training return: 554.2720700931857
episode: 238 training return: 698.769178971057
episode: 239 training return: 706.8922473007042
epoch: 60 test_true_pfm: 1.8404685148297777 sim_pfm: 891.121918312828
episode: 240 training return: 687.1900504448325
episode: 241 training return: 705.4487916359683
episode: 242 training return: 702.4691352943402
episode: 243 training return: 745.2348935264965
epoch: 61 test_true_pfm: 5.457928744975528 sim_pfm: 894.377807039964
episode: 244 training return: 586.2701550678571
episode: 245 training return: 643.5030471672053
episode: 246 training return: 608.0389829142657
episode: 247 training return: 661.7742450431228
epoch: 62 test_true_pfm: -5.20797272211312 sim_pfm: 888.2582332721319
episode: 248 training return: 583.853004623815
episode: 249 training return: 551.6618329505008
episode: 250 training return: 653.0586982090867
episode: 251 training return: 593.4991189945043
epoch: 63 test_true_pfm: -5.435184756306356 sim_pfm: 904.8894667677403
episode: 252 training return: 576.3371294575599
episode: 253 training return: 577.0910534278992
episode: 254 training return: 586.388368803012
episode: 255 training return: 536.069143313365
epoch: 64 test_true_pfm: 0.767725886828875 sim_pfm: 885.8445998640285
episode: 256 training return: 598.1859209598093
episode: 257 training return: 561.2735432516504
episode: 258 training return: 674.9136567332228
episode: 259 training return: 553.7552584954985
epoch: 65 test_true_pfm: -2.85827508382347 sim_pfm: 877.8319589868219
episode: 260 training return: 608.5941044533781
episode: 261 training return: 544.3097385949584
episode: 262 training return: 655.1954193299867
episode: 263 training return: 668.1169434130477
epoch: 66 test_true_pfm: 9.947815921737732 sim_pfm: 875.7279364537584
episode: 264 training return: 543.8752290017144
episode: 265 training return: 717.9840803984191
episode: 266 training return: 579.3563675668113
episode: 267 training return: 630.7997673012767
epoch: 67 test_true_pfm: -14.962708565369695 sim_pfm: 914.62741748734
episode: 268 training return: 736.0294881573501
episode: 269 training return: 507.7108458755763
episode: 270 training return: 584.695833627457
episode: 271 training return: 654.0130908627998
epoch: 68 test_true_pfm: -19.39799729058109 sim_pfm: 894.5606194871476
episode: 272 training return: 680.7050335923585
episode: 273 training return: 541.426728653886
episode: 274 training return: 677.8583399254252
episode: 275 training return: 752.0201641182935
epoch: 69 test_true_pfm: 8.710271653959383 sim_pfm: 928.6949681370894
episode: 276 training return: 589.3547382071687
episode: 277 training return: 630.3286295135916
episode: 278 training return: 637.2902058737907
episode: 279 training return: 549.006279019214
epoch: 70 test_true_pfm: 11.008480326231993 sim_pfm: 919.0416811751417
episode: 280 training return: 640.4669903464045
episode: 281 training return: 736.3656748672047
episode: 282 training return: 604.4361696402304
episode: 283 training return: 676.843177722884
epoch: 71 test_true_pfm: -2.3199748421481723 sim_pfm: 887.7244340439235
episode: 284 training return: 657.7165273904017
episode: 285 training return: 595.0691989117242
episode: 286 training return: 670.0191365853537
episode: 287 training return: 609.0325860562934
epoch: 72 test_true_pfm: -7.273131547007966 sim_pfm: 902.1285968512377
episode: 288 training return: 523.214968125654
episode: 289 training return: 679.5271467908693
episode: 290 training return: 620.9079625572772
episode: 291 training return: 659.2352267728945
epoch: 73 test_true_pfm: 5.1814946937740745 sim_pfm: 896.9978560535401
episode: 292 training return: 681.8019303619952
episode: 293 training return: 557.5952302414876
episode: 294 training return: 684.2180535508711
episode: 295 training return: 576.5501593229003
epoch: 74 test_true_pfm: -0.7299354405716064 sim_pfm: 905.5366284878539
episode: 296 training return: 655.0203383943866
episode: 297 training return: 694.9104459059567
episode: 298 training return: 610.2087118393855
episode: 299 training return: 720.5395746554053
epoch: 75 test_true_pfm: 3.672568714655907 sim_pfm: 907.7462309693434
episode: 300 training return: 739.9808376263735
episode: 301 training return: 589.5367355027215
episode: 302 training return: 668.3591315509451
episode: 303 training return: 593.173243280554
epoch: 76 test_true_pfm: -17.200017962156046 sim_pfm: 915.7796108146434
episode: 304 training return: 566.0717750185662
episode: 305 training return: 525.2824387480365
episode: 306 training return: 658.8339775163494
episode: 307 training return: 647.7022299424548
epoch: 77 test_true_pfm: -16.754156883848275 sim_pfm: 915.9598063434594
episode: 308 training return: 720.7545135421262
episode: 309 training return: 588.3887756986389
episode: 310 training return: 640.6041106409867
episode: 311 training return: 778.3572550249836
epoch: 78 test_true_pfm: -14.386323963600995 sim_pfm: 896.7850384773446
episode: 312 training return: 661.1580103452295
episode: 313 training return: 668.9066260252622
episode: 314 training return: 666.3213560916844
episode: 315 training return: 692.8386374598265
epoch: 79 test_true_pfm: -15.892007725223754 sim_pfm: 893.2594117675911
episode: 316 training return: 656.813123772577
episode: 317 training return: 530.6158455840352
episode: 318 training return: 531.554503605941
episode: 319 training return: 653.0826953785711
epoch: 80 test_true_pfm: -13.251758727388694 sim_pfm: 924.2875047705354
episode: 320 training return: 593.8039645620364
episode: 321 training return: 658.7851611798866
episode: 322 training return: 634.8773258239407
episode: 323 training return: 680.3132384230908
epoch: 81 test_true_pfm: 7.646206535681328 sim_pfm: 896.0422242181266
episode: 324 training return: 705.7776769783474
episode: 325 training return: 700.007073924953
episode: 326 training return: 763.0091784387978
episode: 327 training return: 672.9945530112866
epoch: 82 test_true_pfm: -10.083456185335262 sim_pfm: 905.5278939226071
episode: 328 training return: 733.4349899863431
episode: 329 training return: 664.3402403727155
episode: 330 training return: 693.7048056172639
episode: 331 training return: 679.3910406348933
epoch: 83 test_true_pfm: 13.844537020389518 sim_pfm: 914.9153268756397
episode: 332 training return: 718.9940216872766
episode: 333 training return: 734.6124697841747
episode: 334 training return: 760.5461360301538
episode: 335 training return: 731.2073122049811
epoch: 84 test_true_pfm: -4.418079347789477 sim_pfm: 912.5885877517339
episode: 336 training return: 605.8983281482402
episode: 337 training return: 770.2644878348372
episode: 338 training return: 653.4231801823956
episode: 339 training return: 631.1712631772632
epoch: 85 test_true_pfm: -0.43317450269858393 sim_pfm: 902.4964294414103
episode: 340 training return: 636.6539084185953
episode: 341 training return: 679.9304271103388
episode: 342 training return: 657.7693167593552
episode: 343 training return: 754.4478363269678
epoch: 86 test_true_pfm: 6.489245797931389 sim_pfm: 908.487402413583
episode: 344 training return: 668.4832483438227
episode: 345 training return: 712.6866896867106
episode: 346 training return: 667.4672355523215
episode: 347 training return: 666.4127743021271
epoch: 87 test_true_pfm: -21.96388811383225 sim_pfm: 902.5473469742021
episode: 348 training return: 645.3463864208367
episode: 349 training return: 659.7969880265896
episode: 350 training return: 636.3590569988594
episode: 351 training return: 685.939426070823
epoch: 88 test_true_pfm: -4.658816101234448 sim_pfm: 919.7596284037409
episode: 352 training return: 616.7885256378975
episode: 353 training return: 599.2724526946844
episode: 354 training return: 625.4012108931914
episode: 355 training return: 664.4310287176862
epoch: 89 test_true_pfm: 1.2862742448231725 sim_pfm: 871.1132008746603
episode: 356 training return: 707.495055229571
episode: 357 training return: 735.621404315028
episode: 358 training return: 713.3226581928279
episode: 359 training return: 735.3180315389625
epoch: 90 test_true_pfm: -25.49878181113727 sim_pfm: 871.3438914292353
episode: 360 training return: 614.8563950207609
episode: 361 training return: 654.0441283578533
episode: 362 training return: 708.6508778416519
episode: 363 training return: 770.0144163944418
epoch: 91 test_true_pfm: -16.769558086188347 sim_pfm: 886.5973235175259
episode: 364 training return: 728.5901679609109
episode: 365 training return: 769.0699588291789
episode: 366 training return: 722.7753631332597
episode: 367 training return: 609.7666118046651
epoch: 92 test_true_pfm: -15.719066322921526 sim_pfm: 880.5555550723568
episode: 368 training return: 648.274069684575
episode: 369 training return: 704.8000853148969
episode: 370 training return: 693.5814578763776
episode: 371 training return: 699.1789694206917
epoch: 93 test_true_pfm: -20.504022005722693 sim_pfm: 886.9673801717784
episode: 372 training return: 568.0366074605254
episode: 373 training return: 518.1475588945094
episode: 374 training return: 496.9216808166962
episode: 375 training return: 699.6612754777333
epoch: 94 test_true_pfm: 1.4755984776505757 sim_pfm: 917.835967462345
episode: 376 training return: 623.444680760492
episode: 377 training return: 536.3850750996967
episode: 378 training return: 575.8307425646949
episode: 379 training return: 641.1771580777175
epoch: 95 test_true_pfm: 3.4142356584077853 sim_pfm: 901.0619514713092
episode: 380 training return: 593.124846542414
episode: 381 training return: 675.8984389342614
episode: 382 training return: 637.7046509609255
episode: 383 training return: 642.1171293861505
epoch: 96 test_true_pfm: -36.465311855974505 sim_pfm: 828.4067480023153
episode: 384 training return: 618.9400111598859
episode: 385 training return: 656.2919437678172
episode: 386 training return: 683.4612753720003
episode: 387 training return: 673.6367656105789
epoch: 97 test_true_pfm: 4.400522652129989 sim_pfm: 916.76693228289
episode: 388 training return: 662.4900208812888
episode: 389 training return: 619.3601117745765
episode: 390 training return: 763.267573177866
episode: 391 training return: 729.3495446967825
epoch: 98 test_true_pfm: -0.9183034408105254 sim_pfm: 925.5252528716094
episode: 392 training return: 731.1311129608282
episode: 393 training return: 713.6731840765801
episode: 394 training return: 709.6590138206536
episode: 395 training return: 708.2214068216701
epoch: 99 test_true_pfm: 5.718908955619912 sim_pfm: 887.2088335349263
episode: 396 training return: 711.7859057561146
episode: 397 training return: 624.6438031747581
episode: 398 training return: 643.9864599184439
episode: 399 training return: 657.7797430367534
epoch: 100 test_true_pfm: 7.82979665048466 sim_pfm: 923.6825064365285
episode: 400 training return: 687.3001273007118
episode: 401 training return: 693.1046850647057
episode: 402 training return: 713.660926333124
episode: 403 training return: 713.317046863284
epoch: 101 test_true_pfm: -13.80083927634713 sim_pfm: 934.8841647013681
episode: 404 training return: 655.3188850491025
episode: 405 training return: 613.2398382856585
episode: 406 training return: 634.9668671155631
episode: 407 training return: 741.497090148051
epoch: 102 test_true_pfm: -9.869279988213474 sim_pfm: 908.768986230038
episode: 408 training return: 623.5182073633584
episode: 409 training return: 531.6570891717678
episode: 410 training return: 684.4842777356926
episode: 411 training return: 622.0039227964982
epoch: 103 test_true_pfm: -53.69117980232962 sim_pfm: 831.2301431947426
episode: 412 training return: 573.6443300765304
episode: 413 training return: 737.4307280645321
episode: 414 training return: 707.3646118382482
episode: 415 training return: 706.0948778792168
epoch: 104 test_true_pfm: 6.4885768405665685 sim_pfm: 935.390982685048
episode: 416 training return: 695.9496064831106
episode: 417 training return: 571.3131680758682
episode: 418 training return: 730.2264890762075
episode: 419 training return: 593.2514002332589
epoch: 105 test_true_pfm: 1.0176763722401923 sim_pfm: 897.3792962095637
episode: 420 training return: 682.1461759268119
episode: 421 training return: 551.8145743049843
episode: 422 training return: 649.3864767317922
episode: 423 training return: 669.2589847184117
epoch: 106 test_true_pfm: 2.5708757194147127 sim_pfm: 912.8660716797258
episode: 424 training return: 721.3620143984155
episode: 425 training return: 673.0215106208873
episode: 426 training return: 736.4929807009621
episode: 427 training return: 707.7557514249886
epoch: 107 test_true_pfm: 1.9056219450969258 sim_pfm: 875.4239464225445
episode: 428 training return: 727.5056465314692
episode: 429 training return: 659.1884521540883
episode: 430 training return: 766.4737029587893
episode: 431 training return: 689.7465867171879
epoch: 108 test_true_pfm: -0.7376815818923943 sim_pfm: 889.1472825865922
episode: 432 training return: 617.497420326983
episode: 433 training return: 654.8914590153852
episode: 434 training return: 509.7576658687632
episode: 435 training return: 708.351223967265
epoch: 109 test_true_pfm: -19.911832379158614 sim_pfm: 907.5230869605972
episode: 436 training return: 664.2804329198692
episode: 437 training return: 634.7478382974862
episode: 438 training return: 540.9125442940614
episode: 439 training return: 617.1657451782426
epoch: 110 test_true_pfm: -19.337171251406403 sim_pfm: 901.8591874502466
episode: 440 training return: 586.5101394272202
episode: 441 training return: 572.4987794943311
episode: 442 training return: 556.2578678872544
episode: 443 training return: 671.700686300642
epoch: 111 test_true_pfm: -13.613177952143136 sim_pfm: 869.7332323725352
episode: 444 training return: 577.1938382239414
episode: 445 training return: 604.8094127042344
episode: 446 training return: 615.166379709166
episode: 447 training return: 569.6206966035755
epoch: 112 test_true_pfm: 3.296175785347964 sim_pfm: 907.4796949930372
episode: 448 training return: 605.8145188684887
episode: 449 training return: 687.5979461516247
episode: 450 training return: 664.1695894957065
episode: 451 training return: 612.5245324189088
epoch: 113 test_true_pfm: 9.945352261096064 sim_pfm: 901.7340102707892
episode: 452 training return: 691.0221756308418
episode: 453 training return: 671.2202496670176
episode: 454 training return: 734.7579466872019
episode: 455 training return: 685.1826236966351
epoch: 114 test_true_pfm: 18.57383455712704 sim_pfm: 905.5192142667502
episode: 456 training return: 709.0686053840053
episode: 457 training return: 663.5301955573988
episode: 458 training return: 744.431021764045
episode: 459 training return: 731.8706059059327
epoch: 115 test_true_pfm: -0.6770011388566262 sim_pfm: 879.735124899263
episode: 460 training return: 712.3260386415615
episode: 461 training return: 604.6877184054165
episode: 462 training return: 653.8149223272508
episode: 463 training return: 663.2204656380948
epoch: 116 test_true_pfm: 6.997850601603058 sim_pfm: 862.3059842011502
episode: 464 training return: 637.9882314155549
episode: 465 training return: 579.8178546266173
episode: 466 training return: 581.8044612011113
episode: 467 training return: 606.0695565972012
epoch: 117 test_true_pfm: 11.228914775220762 sim_pfm: 916.0792505742522
episode: 468 training return: 646.8150660423006
episode: 469 training return: 649.3278870546928
episode: 470 training return: 591.0970705086788
episode: 471 training return: 621.2973753341195
epoch: 118 test_true_pfm: -15.57606604144255 sim_pfm: 899.0107830635043
episode: 472 training return: 644.3663284817642
episode: 473 training return: 663.8790562694677
episode: 474 training return: 621.4812816906298
episode: 475 training return: 727.5334466635929
epoch: 119 test_true_pfm: -5.721596322981078 sim_pfm: 914.3011553843529
episode: 476 training return: 674.1433188162075
episode: 477 training return: 690.6750826685707
episode: 478 training return: 719.5477888493615
episode: 479 training return: 674.0485097220646
epoch: 120 test_true_pfm: 13.796018909942372 sim_pfm: 817.7322362055436
episode: 480 training return: 572.8393104403851
episode: 481 training return: 597.7366375111745
episode: 482 training return: 504.5912988245036
episode: 483 training return: 627.5471394349647
epoch: 121 test_true_pfm: -2.1081586008832462 sim_pfm: 892.3441204636043
episode: 484 training return: 690.1829579980547
episode: 485 training return: 605.7597702765195
episode: 486 training return: 684.0731904072934
episode: 487 training return: 721.8125497305674
epoch: 122 test_true_pfm: -8.298991542065966 sim_pfm: 912.7232438826019
episode: 488 training return: 664.1429227153255
episode: 489 training return: 655.7150966924158
episode: 490 training return: 640.4896074340114
episode: 491 training return: 628.2629047162316
epoch: 123 test_true_pfm: -8.893366640335355 sim_pfm: 898.9933185920275
episode: 492 training return: 613.3810326140543
episode: 493 training return: 619.3540165442184
episode: 494 training return: 673.8787041041492
episode: 495 training return: 672.1589433087861
epoch: 124 test_true_pfm: -1.9746276743350648 sim_pfm: 866.5593720769826
episode: 496 training return: 711.5768675529447
episode: 497 training return: 613.4219214279518
episode: 498 training return: 685.4536596212138
episode: 499 training return: 586.7025176405856
epoch: 125 test_true_pfm: -18.426514325412935 sim_pfm: 733.5536426453457
episode: 500 training return: 640.8548316747408
episode: 501 training return: 603.6891473000708
episode: 502 training return: 634.9103733275352
episode: 503 training return: 685.8831923830714
epoch: 126 test_true_pfm: -4.053050969685127 sim_pfm: 860.6886483404076
episode: 504 training return: 644.0132684814226
episode: 505 training return: 526.7915754085283
episode: 506 training return: 640.5717881272274
episode: 507 training return: 624.3416328215382
epoch: 127 test_true_pfm: -4.569228359081709 sim_pfm: 889.8187049917415
episode: 508 training return: 591.6874689022109
episode: 509 training return: 530.5514766525507
episode: 510 training return: 619.8231609313121
episode: 511 training return: 632.2206386357171
epoch: 128 test_true_pfm: -0.14100483521588775 sim_pfm: 913.8368716866116
episode: 512 training return: 562.3868743455088
episode: 513 training return: 635.0476077915407
episode: 514 training return: 642.5776566400166
episode: 515 training return: 591.7409828662608
epoch: 129 test_true_pfm: -11.85452288571075 sim_pfm: 881.9817945306538
episode: 516 training return: 553.9994012746233
episode: 517 training return: 610.8815926417177
episode: 518 training return: 571.4909545800065
episode: 519 training return: 562.8806670128843
epoch: 130 test_true_pfm: -17.762412590330207 sim_pfm: 898.6543295150639
episode: 520 training return: 629.8661491631906
episode: 521 training return: 604.0571797673273
episode: 522 training return: 522.2116802086416
episode: 523 training return: 628.707359855526
epoch: 131 test_true_pfm: -5.136021830559633 sim_pfm: 859.9809295979574
episode: 524 training return: 552.6348740337666
episode: 525 training return: 659.252083156191
episode: 526 training return: 496.20989561983765
episode: 527 training return: 678.5141479255851
epoch: 132 test_true_pfm: -14.164403309401402 sim_pfm: 905.5411513695896
episode: 528 training return: 637.419828682424
episode: 529 training return: 664.978901313929
episode: 530 training return: 725.8374384137438
episode: 531 training return: 671.0188648124904
epoch: 133 test_true_pfm: 0.48577674603941007 sim_pfm: 923.270182337317
episode: 532 training return: 704.639679624516
episode: 533 training return: 563.2408876894991
episode: 534 training return: 598.1507778907882
episode: 535 training return: 675.9127659247233
epoch: 134 test_true_pfm: -10.528308154946636 sim_pfm: 918.2291458071692
episode: 536 training return: 614.5701402892647
episode: 537 training return: 599.0571528850508
episode: 538 training return: 656.623713508405
episode: 539 training return: 579.3952870453921
epoch: 135 test_true_pfm: -13.181023017191498 sim_pfm: 891.3577472081379
episode: 540 training return: 590.5948922536238
episode: 541 training return: 648.9886789189785
episode: 542 training return: 646.380837147158
episode: 543 training return: 611.2140696124355
epoch: 136 test_true_pfm: -6.834325917077095 sim_pfm: 858.873776726515
episode: 544 training return: 497.9849419681232
episode: 545 training return: 543.0619433939164
episode: 546 training return: 599.1516375055792
episode: 547 training return: 579.0689382506357
epoch: 137 test_true_pfm: 6.64148873485636 sim_pfm: 878.4028642103492
episode: 548 training return: 559.3199710618827
episode: 549 training return: 556.0216050873944
episode: 550 training return: 556.9808720965971
episode: 551 training return: 543.914592187806
epoch: 138 test_true_pfm: -3.1667922619057705 sim_pfm: 900.284547861624
episode: 552 training return: 578.2755131078518
episode: 553 training return: 628.265860206229
episode: 554 training return: 532.9704500163158
episode: 555 training return: 595.343790242898
epoch: 139 test_true_pfm: 4.769211887393007 sim_pfm: 881.51323207201
episode: 556 training return: 553.1875673970542
episode: 557 training return: 612.6283368457782
episode: 558 training return: 586.7947232176963
episode: 559 training return: 548.1012879102814
epoch: 140 test_true_pfm: 1.9951589501537932 sim_pfm: 920.6740032860947
episode: 560 training return: 529.1726617728667
episode: 561 training return: 541.8611721742204
episode: 562 training return: 620.0331391126188
episode: 563 training return: 631.1572422394013
epoch: 141 test_true_pfm: -4.945879002910766 sim_pfm: 893.9135009526897
episode: 564 training return: 522.1022735686128
episode: 565 training return: 675.4053787752462
episode: 566 training return: 582.1937265978949
episode: 567 training return: 497.98344455087124
epoch: 142 test_true_pfm: -8.025463310824694 sim_pfm: 873.740850706
episode: 568 training return: 565.2338621821524
episode: 569 training return: 549.063458711403
episode: 570 training return: 509.923912313487
episode: 571 training return: 525.6761394324791
epoch: 143 test_true_pfm: -7.578436589924708 sim_pfm: 837.7239040633485
episode: 572 training return: 556.4866261881141
episode: 573 training return: 573.4787703234751
episode: 574 training return: 545.2792714597443
episode: 575 training return: 586.7772656053558
epoch: 144 test_true_pfm: 5.919869541005058 sim_pfm: 875.9210287969147
episode: 576 training return: 538.2892550564924
episode: 577 training return: 656.211605964273
episode: 578 training return: 624.8567031187372
episode: 579 training return: 611.4710700107366
epoch: 145 test_true_pfm: 4.007925364919691 sim_pfm: 884.7575309976148
episode: 580 training return: 592.510722551849
episode: 581 training return: 568.7616593909231
episode: 582 training return: 575.3455854888625
episode: 583 training return: 561.094452883546
epoch: 146 test_true_pfm: 5.968751739957352 sim_pfm: 892.4911056761796
episode: 584 training return: 659.7754795054841
episode: 585 training return: 509.9385504869259
episode: 586 training return: 602.3757433864919
episode: 587 training return: 546.1172628356192
epoch: 147 test_true_pfm: -10.373658836459153 sim_pfm: 868.5789183357061
episode: 588 training return: 480.7011859731724
episode: 589 training return: 488.57471169293177
episode: 590 training return: 524.4022344321625
episode: 591 training return: 609.8181109470822
epoch: 148 test_true_pfm: -18.017961866895167 sim_pfm: 893.3176320744521
episode: 592 training return: 522.8438417936401
episode: 593 training return: 437.3529322251424
episode: 594 training return: 468.8999511641985
episode: 595 training return: 571.204338943467
epoch: 149 test_true_pfm: 2.8249581165939395 sim_pfm: 887.6220576258733
episode: 596 training return: 596.2871472443309
episode: 597 training return: 626.6614021092769
episode: 598 training return: 604.8465227245496
episode: 599 training return: 532.557649342675
epoch: 150 test_true_pfm: -0.425952672605694 sim_pfm: 870.6703147142483
