['--alg', 'ddpg', '--env', 'Swimmer-v3', '--learn', 'behavior', '--traj', 'mixed', '--seed', '1', '--data', '100000']
epoch: 0 training_loss 0.4288453334569931 test_loss: 0.4687049865722656
epoch: 1 training_loss 0.3715608555078507 test_loss: 0.35220890045166015
epoch: 2 training_loss 0.36043588921427727 test_loss: 0.32098543643951416
epoch: 3 training_loss 0.35914990305900574 test_loss: 0.3435216426849365
epoch: 4 training_loss 0.3499909932911396 test_loss: 0.3310720920562744
epoch: 5 training_loss 0.35579641729593275 test_loss: 0.3351154327392578
epoch: 6 training_loss 0.3572003896534443 test_loss: 0.36242692470550536
epoch: 7 training_loss 0.34253689482808114 test_loss: 0.344912052154541
epoch: 8 training_loss 0.35214430138468744 test_loss: 0.3531543970108032
epoch: 9 training_loss 0.35270687654614447 test_loss: 0.34340362548828124
epoch: 10 training_loss 0.35704936027526857 test_loss: 0.3447761297225952
epoch: 11 training_loss 0.3463126003742218 test_loss: 0.3416886329650879
epoch: 12 training_loss 0.3545701029896736 test_loss: 0.38035080432891843
epoch: 13 training_loss 0.351042944341898 test_loss: 0.3599994421005249
epoch: 14 training_loss 0.3372734636068344 test_loss: 0.3274636507034302
epoch: 15 training_loss 0.3542668367922306 test_loss: 0.3474767208099365
epoch: 16 training_loss 0.33691907569766044 test_loss: 0.3382234573364258
epoch: 17 training_loss 0.3354914580285549 test_loss: 0.365442681312561
epoch: 18 training_loss 0.34004349827766417 test_loss: 0.3299174070358276
epoch: 19 training_loss 0.33694736823439597 test_loss: 0.32930042743682864
epoch: 20 training_loss 0.3388227564096451 test_loss: 0.3520824432373047
epoch: 21 training_loss 0.3445600733160973 test_loss: 0.3534461259841919
epoch: 22 training_loss 0.3475154837965965 test_loss: 0.35168209075927737
epoch: 23 training_loss 0.3448418737947941 test_loss: 0.3301759004592896
epoch: 24 training_loss 0.34552529141306876 test_loss: 0.3440821409225464
epoch: 25 training_loss 0.3458533908426762 test_loss: 0.32686901092529297
epoch: 26 training_loss 0.3472338902950287 test_loss: 0.32589542865753174
epoch: 27 training_loss 0.3402633644640446 test_loss: 0.35062782764434813
epoch: 28 training_loss 0.33420233964920043 test_loss: 0.3211954355239868
epoch: 29 training_loss 0.34795440137386324 test_loss: 0.37654144763946534
epoch: 30 training_loss 0.34877245292067527 test_loss: 0.3098795413970947
epoch: 31 training_loss 0.34430994153022765 test_loss: 0.31351141929626464
epoch: 32 training_loss 0.3349834904074669 test_loss: 0.3283864974975586
epoch: 33 training_loss 0.34632987052202224 test_loss: 0.3278249979019165
epoch: 34 training_loss 0.346472804248333 test_loss: 0.3567173004150391
epoch: 35 training_loss 0.3544060254096985 test_loss: 0.3599457979202271
epoch: 36 training_loss 0.3394982285797596 test_loss: 0.3137736082077026
epoch: 37 training_loss 0.34323638424277303 test_loss: 0.35123112201690676
epoch: 38 training_loss 0.35102140173316004 test_loss: 0.33742127418518064
epoch: 39 training_loss 0.3476221600174904 test_loss: 0.31283342838287354
epoch: 40 training_loss 0.336457554847002 test_loss: 0.33210268020629885
epoch: 41 training_loss 0.3439641819894314 test_loss: 0.3321765661239624
epoch: 42 training_loss 0.33714219644665716 test_loss: 0.3254444360733032
epoch: 43 training_loss 0.3376924256980419 test_loss: 0.33476178646087645
epoch: 44 training_loss 0.34343507468700407 test_loss: 0.32306296825408937
epoch: 45 training_loss 0.3499974228441715 test_loss: 0.3357934713363647
epoch: 46 training_loss 0.34045132786035537 test_loss: 0.34303596019744875
epoch: 47 training_loss 0.34866996601223943 test_loss: 0.370134973526001
epoch: 48 training_loss 0.34044210135936737 test_loss: 0.3213620901107788
epoch: 49 training_loss 0.32656862020492555 test_loss: 0.3525115966796875
epoch: 50 training_loss 0.3343957282602787 test_loss: 0.3645244359970093
epoch: 51 training_loss 0.33408659040927885 test_loss: 0.360729718208313
epoch: 52 training_loss 0.34063863530755045 test_loss: 0.3472947597503662
epoch: 53 training_loss 0.34583014249801636 test_loss: 0.3493018865585327
epoch: 54 training_loss 0.3348670716583729 test_loss: 0.3480975151062012
epoch: 55 training_loss 0.3358689786493778 test_loss: 0.32856292724609376
epoch: 56 training_loss 0.3461167135834694 test_loss: 0.33614885807037354
epoch: 57 training_loss 0.34460230603814124 test_loss: 0.35209012031555176
epoch: 58 training_loss 0.33220088601112363 test_loss: 0.31818721294403074
epoch: 59 training_loss 0.3517693051695824 test_loss: 0.32372112274169923
epoch: 60 training_loss 0.3367768903076649 test_loss: 0.33222341537475586
epoch: 61 training_loss 0.3402113428711891 test_loss: 0.33965554237365725
epoch: 62 training_loss 0.33912657931447027 test_loss: 0.37201657295227053
epoch: 63 training_loss 0.34588633611798286 test_loss: 0.3316547155380249
epoch: 64 training_loss 0.34233902290463447 test_loss: 0.32951557636260986
epoch: 65 training_loss 0.3359816797077656 test_loss: 0.3344804525375366
epoch: 66 training_loss 0.3283489692211151 test_loss: 0.34135310649871825
epoch: 67 training_loss 0.3489943142235279 test_loss: 0.32118592262268064
epoch: 68 training_loss 0.33216539725661276 test_loss: 0.332507848739624
epoch: 69 training_loss 0.3370152130723 test_loss: 0.32993412017822266
epoch: 70 training_loss 0.3485801921784878 test_loss: 0.33759608268737795
epoch: 71 training_loss 0.3438054820895195 test_loss: 0.34862120151519777
epoch: 72 training_loss 0.34892374247312546 test_loss: 0.3399356842041016
epoch: 73 training_loss 0.34043934762477873 test_loss: 0.3298178195953369
epoch: 74 training_loss 0.3436718159914017 test_loss: 0.3173191785812378
epoch: 75 training_loss 0.33984126672148707 test_loss: 0.3430901288986206
epoch: 76 training_loss 0.344549615830183 test_loss: 0.3257057428359985
epoch: 77 training_loss 0.3285223226249218 test_loss: 0.3396944522857666
epoch: 78 training_loss 0.34650590240955353 test_loss: 0.37057783603668215
epoch: 79 training_loss 0.3468909963965416 test_loss: 0.32623264789581297
epoch: 80 training_loss 0.33018210157752037 test_loss: 0.32600975036621094
epoch: 81 training_loss 0.3376678541302681 test_loss: 0.32376396656036377
epoch: 82 training_loss 0.33377272978425027 test_loss: 0.32627828121185304
epoch: 83 training_loss 0.3384286519885063 test_loss: 0.30535733699798584
epoch: 84 training_loss 0.3392780770361423 test_loss: 0.324531626701355
epoch: 85 training_loss 0.3406009802222252 test_loss: 0.3624776124954224
epoch: 86 training_loss 0.343676570802927 test_loss: 0.35114312171936035
epoch: 87 training_loss 0.33697076186537744 test_loss: 0.32130839824676516
epoch: 88 training_loss 0.3336993719637394 test_loss: 0.34370262622833253
epoch: 89 training_loss 0.3328193128108978 test_loss: 0.3311152458190918
epoch: 90 training_loss 0.3398663628101349 test_loss: 0.3450202703475952
epoch: 91 training_loss 0.3324144756793976 test_loss: 0.33469998836517334
epoch: 92 training_loss 0.3380933231115341 test_loss: 0.3314187526702881
epoch: 93 training_loss 0.3270918823778629 test_loss: 0.3150970220565796
epoch: 94 training_loss 0.3434293510019779 test_loss: 0.3221131801605225
epoch: 95 training_loss 0.33481322273612024 test_loss: 0.3273500442504883
epoch: 96 training_loss 0.3387999378144741 test_loss: 0.3270639181137085
epoch: 97 training_loss 0.33404877603054045 test_loss: 0.3507631063461304
epoch: 98 training_loss 0.34812056973576544 test_loss: 0.3415080070495605
epoch: 99 training_loss 0.3491887986660004 test_loss: 0.3667186498641968
epoch: 100 training_loss 0.3279324445128441 test_loss: 0.3163196325302124
epoch: 101 training_loss 0.3350472202897072 test_loss: 0.3421430826187134
epoch: 102 training_loss 0.33602075442671775 test_loss: 0.34158427715301515
epoch: 103 training_loss 0.33474492937326433 test_loss: 0.3380091190338135
epoch: 104 training_loss 0.345798110216856 test_loss: 0.32030458450317384
epoch: 105 training_loss 0.3447535790503025 test_loss: 0.3548213005065918
epoch: 106 training_loss 0.3388432937860489 test_loss: 0.3104612588882446
epoch: 107 training_loss 0.3449764309823513 test_loss: 0.34525630474090574
epoch: 108 training_loss 0.33997824653983116 test_loss: 0.33381946086883546
epoch: 109 training_loss 0.3388739790022373 test_loss: 0.3367337226867676
epoch: 110 training_loss 0.34455033391714096 test_loss: 0.3370147466659546
epoch: 111 training_loss 0.3275727778673172 test_loss: 0.34932761192321776
epoch: 112 training_loss 0.3470983248949051 test_loss: 0.32937026023864746
epoch: 113 training_loss 0.3428079241514206 test_loss: 0.37598876953125
epoch: 114 training_loss 0.3287903033196926 test_loss: 0.3348271608352661
epoch: 115 training_loss 0.33831477880477906 test_loss: 0.3239373922348022
epoch: 116 training_loss 0.33580743834376336 test_loss: 0.3138047933578491
epoch: 117 training_loss 0.34149906679987907 test_loss: 0.33958611488342283
epoch: 118 training_loss 0.336930001527071 test_loss: 0.3311247587203979
epoch: 119 training_loss 0.32242186859250066 test_loss: 0.3348344326019287
epoch: 120 training_loss 0.34022840529680254 test_loss: 0.3525003671646118
epoch: 121 training_loss 0.3389190228283405 test_loss: 0.33998024463653564
epoch: 122 training_loss 0.337515437155962 test_loss: 0.3430457353591919
epoch: 123 training_loss 0.3538765047490597 test_loss: 0.3461229562759399
epoch: 124 training_loss 0.3428156903386116 test_loss: 0.31367390155792235
epoch: 125 training_loss 0.3397175432741642 test_loss: 0.34318015575408933
epoch: 126 training_loss 0.3404572321474552 test_loss: 0.3221099853515625
epoch: 127 training_loss 0.3377058456838131 test_loss: 0.33121275901794434
epoch: 128 training_loss 0.3403588210046291 test_loss: 0.3722975492477417
epoch: 129 training_loss 0.3354101365804672 test_loss: 0.32179696559906007
epoch: 130 training_loss 0.3373566511273384 test_loss: 0.33614065647125246
epoch: 131 training_loss 0.33899835154414176 test_loss: 0.32791140079498293
epoch: 132 training_loss 0.3468269208073616 test_loss: 0.33575143814086916
epoch: 133 training_loss 0.3318026228249073 test_loss: 0.35100932121276857
epoch: 134 training_loss 0.3481099438667297 test_loss: 0.34979116916656494
epoch: 135 training_loss 0.3438294526934624 test_loss: 0.3305664539337158
epoch: 136 training_loss 0.34471974909305575 test_loss: 0.3548171043395996
epoch: 137 training_loss 0.3342616593837738 test_loss: 0.3218285083770752
epoch: 138 training_loss 0.35303345173597334 test_loss: 0.34461121559143065
epoch: 139 training_loss 0.3354087042808533 test_loss: 0.35332293510437013
epoch: 140 training_loss 0.3474853201210499 test_loss: 0.32493436336517334
epoch: 141 training_loss 0.345396858304739 test_loss: 0.32774930000305175
epoch: 142 training_loss 0.3370831836760044 test_loss: 0.34839327335357667
epoch: 143 training_loss 0.3386269664764404 test_loss: 0.32993905544281005
epoch: 144 training_loss 0.332082911580801 test_loss: 0.338280987739563
epoch: 145 training_loss 0.33470630645751953 test_loss: 0.3433009386062622
epoch: 146 training_loss 0.3356806139647961 test_loss: 0.3490039587020874
epoch: 147 training_loss 0.3395942281186581 test_loss: 0.3400370359420776
epoch: 148 training_loss 0.33568095102906226 test_loss: 0.3361647367477417
epoch: 149 training_loss 0.33915064424276353 test_loss: 0.33640470504760744
epoch: 0 training_loss 38.08827877044678 test_loss: 27.3765869140625
epoch: 1 training_loss 22.790315685272216 test_loss: 19.468838500976563
epoch: 2 training_loss 17.720439805984498 test_loss: 16.207560729980468
epoch: 3 training_loss 15.23207582473755 test_loss: 14.795436096191406
epoch: 4 training_loss 13.739555492401124 test_loss: 12.738668823242188
epoch: 5 training_loss 13.080947952270508 test_loss: 12.287329864501952
epoch: 6 training_loss 12.095945568084717 test_loss: 12.307762145996094
epoch: 7 training_loss 11.290379390716552 test_loss: 11.260586547851563
epoch: 8 training_loss 10.673313665390015 test_loss: 10.645074462890625
epoch: 9 training_loss 10.309814491271972 test_loss: 10.50217514038086
epoch: 10 training_loss 9.838580255508424 test_loss: 9.42514877319336
epoch: 11 training_loss 9.40576457977295 test_loss: 9.40986328125
epoch: 12 training_loss 9.348961839675903 test_loss: 9.100116729736328
epoch: 13 training_loss 8.872283744812012 test_loss: 9.103196716308593
epoch: 14 training_loss 8.777749209403991 test_loss: 8.365863800048828
epoch: 15 training_loss 8.262646880149841 test_loss: 7.8893898010253904
epoch: 16 training_loss 7.979197583198547 test_loss: 9.02051010131836
epoch: 17 training_loss 7.9264948225021366 test_loss: 8.784967041015625
epoch: 18 training_loss 7.5766764259338375 test_loss: 7.555393218994141
epoch: 19 training_loss 7.571454949378968 test_loss: 7.463823699951172
epoch: 20 training_loss 7.393591566085815 test_loss: 8.227460479736328
epoch: 21 training_loss 7.191948261260986 test_loss: 6.9123176574707035
epoch: 22 training_loss 7.13254596710205 test_loss: 7.1126708984375
epoch: 23 training_loss 6.987046842575073 test_loss: 6.8125251770019535
epoch: 24 training_loss 7.012092852592469 test_loss: 6.690727996826172
epoch: 25 training_loss 6.716907272338867 test_loss: 6.543424987792969
epoch: 26 training_loss 6.622405166625977 test_loss: 7.0564826965332035
epoch: 27 training_loss 6.428975377082825 test_loss: 6.4027046203613285
epoch: 28 training_loss 6.463698406219482 test_loss: 6.581201171875
epoch: 29 training_loss 6.368899474143982 test_loss: 6.271358871459961
epoch: 30 training_loss 6.186272540092468 test_loss: 6.854337310791015
epoch: 31 training_loss 6.360197739601135 test_loss: 6.299461364746094
epoch: 32 training_loss 5.896336331367492 test_loss: 6.174980163574219
epoch: 33 training_loss 6.095651144981384 test_loss: 6.5835723876953125
epoch: 34 training_loss 6.200168423652649 test_loss: 5.858592987060547
epoch: 35 training_loss 5.841887583732605 test_loss: 6.029773712158203
epoch: 36 training_loss 6.072806553840637 test_loss: 5.997374725341797
epoch: 37 training_loss 5.777745943069458 test_loss: 5.853993606567383
epoch: 38 training_loss 5.790321607589721 test_loss: 5.967774200439453
epoch: 39 training_loss 5.761025276184082 test_loss: 5.636248397827148
epoch: 40 training_loss 5.774695129394531 test_loss: 5.8091480255126955
epoch: 41 training_loss 5.7696052932739255 test_loss: 5.330315399169922
epoch: 42 training_loss 5.508203158378601 test_loss: 5.511161804199219
epoch: 43 training_loss 5.434650492668152 test_loss: 5.818587875366211
epoch: 44 training_loss 5.603140735626221 test_loss: 5.677815628051758
epoch: 45 training_loss 5.284384768009186 test_loss: 5.781857299804687
epoch: 46 training_loss 5.37734893321991 test_loss: 5.260199737548828
epoch: 47 training_loss 5.443321509361267 test_loss: 5.480788803100586
epoch: 48 training_loss 5.307808780670166 test_loss: 4.994787979125976
epoch: 49 training_loss 5.434825000762939 test_loss: 5.430071258544922
epoch: 50 training_loss 5.265575246810913 test_loss: 5.198078155517578
epoch: 51 training_loss 5.326107034683227 test_loss: 5.313665771484375
epoch: 52 training_loss 5.212675850391388 test_loss: 5.1579437255859375
epoch: 53 training_loss 5.2929334688186644 test_loss: 5.1035011291503904
epoch: 54 training_loss 5.180601487159729 test_loss: 5.68114013671875
epoch: 55 training_loss 5.3036072635650635 test_loss: 5.2356201171875
epoch: 56 training_loss 5.161838464736938 test_loss: 5.153974151611328
epoch: 57 training_loss 5.1320707535743715 test_loss: 5.415825653076172
epoch: 58 training_loss 5.136894407272339 test_loss: 4.934011459350586
epoch: 59 training_loss 5.0472151470184325 test_loss: 5.282901763916016
epoch: 60 training_loss 5.101280732154846 test_loss: 4.9730888366699215
epoch: 61 training_loss 5.06651547908783 test_loss: 5.121847152709961
epoch: 62 training_loss 4.985042173862457 test_loss: 4.992431640625
epoch: 63 training_loss 4.974731795787811 test_loss: 4.803234100341797
epoch: 64 training_loss 4.933251411914825 test_loss: 5.390799331665039
epoch: 65 training_loss 4.8987340950965885 test_loss: 5.365496444702148
epoch: 66 training_loss 5.046207249164581 test_loss: 5.216700744628906
epoch: 67 training_loss 5.0460461688041685 test_loss: 5.038459396362304
epoch: 68 training_loss 4.922342946529389 test_loss: 4.936734390258789
epoch: 69 training_loss 4.828386542797088 test_loss: 4.745408248901367
epoch: 70 training_loss 4.891524138450623 test_loss: 4.7816001892089846
epoch: 71 training_loss 4.886415522098542 test_loss: 4.703123474121094
epoch: 72 training_loss 4.793724920749664 test_loss: 4.8348438262939455
epoch: 73 training_loss 4.75328227519989 test_loss: 4.657966232299804
epoch: 74 training_loss 4.863065717220306 test_loss: 4.883959579467773
epoch: 75 training_loss 4.667873976230621 test_loss: 4.638078689575195
epoch: 76 training_loss 4.619964559078216 test_loss: 4.650704193115234
epoch: 77 training_loss 4.717504744529724 test_loss: 5.124299240112305
epoch: 78 training_loss 4.68891893863678 test_loss: 4.483559036254883
epoch: 79 training_loss 4.877355024814606 test_loss: 4.7314105987548825
epoch: 80 training_loss 4.652497329711914 test_loss: 4.600654602050781
epoch: 81 training_loss 4.643040716648102 test_loss: 4.811130523681641
epoch: 82 training_loss 4.6127514958381655 test_loss: 4.531931304931641
epoch: 83 training_loss 4.659491319656372 test_loss: 5.066445541381836
epoch: 84 training_loss 4.800029253959655 test_loss: 4.4714916229248045
epoch: 85 training_loss 4.4811198210716245 test_loss: 4.412682342529297
epoch: 86 training_loss 4.468987607955933 test_loss: 4.6418296813964846
epoch: 87 training_loss 4.433197090625763 test_loss: 4.995552062988281
epoch: 88 training_loss 4.578012640476227 test_loss: 4.531332778930664
epoch: 89 training_loss 4.457688076496124 test_loss: 4.4181266784667965
epoch: 90 training_loss 4.552134666442871 test_loss: 4.617732238769531
epoch: 91 training_loss 4.4459928607940675 test_loss: 4.708320617675781
epoch: 92 training_loss 4.503333101272583 test_loss: 4.689817810058594
epoch: 93 training_loss 4.623436374664307 test_loss: 4.395473861694336
epoch: 94 training_loss 4.424864242076874 test_loss: 4.730125045776367
epoch: 95 training_loss 4.421967437267304 test_loss: 4.402129745483398
epoch: 96 training_loss 4.48799971818924 test_loss: 4.905402374267578
epoch: 97 training_loss 4.435185809135437 test_loss: 4.5737358093261715
epoch: 98 training_loss 4.416907229423523 test_loss: 4.444966888427734
epoch: 99 training_loss 4.308078563213348 test_loss: 4.324656677246094
epoch: 100 training_loss 4.401956434249878 test_loss: 4.393429946899414
epoch: 101 training_loss 4.444361050128936 test_loss: 4.565165710449219
epoch: 102 training_loss 4.295209369659424 test_loss: 4.823719787597656
epoch: 103 training_loss 4.386787149906159 test_loss: 4.456349945068359
epoch: 104 training_loss 4.530970442295074 test_loss: 4.44648323059082
epoch: 105 training_loss 4.303599529266357 test_loss: 4.268012619018554
epoch: 106 training_loss 4.290425524711609 test_loss: 4.389641952514649
epoch: 107 training_loss 4.422731380462647 test_loss: 4.48420524597168
epoch: 108 training_loss 4.379150941371917 test_loss: 4.411429595947266
epoch: 109 training_loss 4.262450575828552 test_loss: 4.202808380126953
epoch: 110 training_loss 4.235848264694214 test_loss: 4.454007339477539
epoch: 111 training_loss 4.284032142162323 test_loss: 4.127402877807617
epoch: 112 training_loss 4.218716390132904 test_loss: 4.376101303100586
epoch: 113 training_loss 4.209464795589447 test_loss: 4.141334533691406
epoch: 114 training_loss 4.365685679912567 test_loss: 4.449542999267578
epoch: 115 training_loss 4.156392185688019 test_loss: 4.223207092285156
epoch: 116 training_loss 4.123413672447205 test_loss: 3.937664031982422
epoch: 117 training_loss 4.17372296333313 test_loss: 4.400132369995117
epoch: 118 training_loss 4.293123075962066 test_loss: 4.308794021606445
epoch: 119 training_loss 4.206562969684601 test_loss: 4.1724700927734375
epoch: 120 training_loss 4.083249127864837 test_loss: 4.059729385375976
epoch: 121 training_loss 4.325427901744843 test_loss: 4.277945709228516
epoch: 122 training_loss 4.158521592617035 test_loss: 4.203678131103516
epoch: 123 training_loss 4.090859367847442 test_loss: 4.315432739257813
epoch: 124 training_loss 4.081565721035004 test_loss: 4.153952789306641
epoch: 125 training_loss 4.063685746192932 test_loss: 4.280638885498047
epoch: 126 training_loss 4.094561331272125 test_loss: 3.9720684051513673
epoch: 127 training_loss 4.259886593818664 test_loss: 4.276794052124023
epoch: 128 training_loss 4.1274662566185 test_loss: 3.9808586120605467
epoch: 129 training_loss 4.142621777057648 test_loss: 4.24468994140625
epoch: 130 training_loss 4.231568827629089 test_loss: 3.8230628967285156
epoch: 131 training_loss 3.969914698600769 test_loss: 4.247797012329102
epoch: 132 training_loss 3.918010516166687 test_loss: 4.00780143737793
epoch: 133 training_loss 4.089801657199859 test_loss: 4.682197189331054
epoch: 134 training_loss 4.125651907920838 test_loss: 4.177536392211914
epoch: 135 training_loss 4.083208577632904 test_loss: 3.8724655151367187
epoch: 136 training_loss 4.020373718738556 test_loss: 4.03892822265625
epoch: 137 training_loss 3.8756856155395507 test_loss: 4.232320022583008
epoch: 138 training_loss 4.118536620140076 test_loss: 4.719441223144531
epoch: 139 training_loss 4.017399549484253 test_loss: 4.169757080078125
epoch: 140 training_loss 3.9195076489448546 test_loss: 3.8539066314697266
epoch: 141 training_loss 3.9550364923477175 test_loss: 4.228433609008789
epoch: 142 training_loss 4.121907482147217 test_loss: 4.010605239868164
epoch: 143 training_loss 4.083350794315338 test_loss: 3.6759342193603515
epoch: 144 training_loss 3.930534157752991 test_loss: 3.863058090209961
epoch: 145 training_loss 4.087567975521088 test_loss: 4.306031036376953
epoch: 146 training_loss 3.9807911682128907 test_loss: 3.951493835449219
epoch: 147 training_loss 3.9706309175491334 test_loss: 4.200849151611328
epoch: 148 training_loss 3.93564563035965 test_loss: 3.7050765991210937
epoch: 149 training_loss 3.935577290058136 test_loss: 4.359724044799805
55.762017257434536
episode: 0 training return: tensor(33.4799, device='cuda:0')
episode: 1 training return: tensor(2.9266, device='cuda:0')
episode: 2 training return: tensor(3.1901, device='cuda:0')
episode: 3 training return: tensor(7.7185, device='cuda:0')
epoch: 1 test_true_pfm: 59.06673973811526 sim_pfm: 29.556565728242276
episode: 4 training return: tensor(30.1733, device='cuda:0')
episode: 5 training return: tensor(4.8394, device='cuda:0')
episode: 6 training return: tensor(32.7065, device='cuda:0')
episode: 7 training return: tensor(1.2044, device='cuda:0')
epoch: 2 test_true_pfm: 91.1941155220136 sim_pfm: 21.90298427558155
episode: 8 training return: tensor(7.7825, device='cuda:0')
episode: 9 training return: tensor(9.0957, device='cuda:0')
episode: 10 training return: tensor(23.9243, device='cuda:0')
episode: 11 training return: tensor(11.3787, device='cuda:0')
epoch: 3 test_true_pfm: 78.33427264402665 sim_pfm: 31.866338918957627
episode: 12 training return: tensor(55.3721, device='cuda:0')
episode: 13 training return: tensor(62.7643, device='cuda:0')
episode: 14 training return: tensor(3.1369, device='cuda:0')
episode: 15 training return: tensor(76.1829, device='cuda:0')
epoch: 4 test_true_pfm: 56.39772419260731 sim_pfm: 51.949445690668654
episode: 16 training return: tensor(81.8586, device='cuda:0')
episode: 17 training return: tensor(30.1413, device='cuda:0')
episode: 18 training return: tensor(9.5041, device='cuda:0')
episode: 19 training return: tensor(13.5199, device='cuda:0')
epoch: 5 test_true_pfm: 97.64592556652636 sim_pfm: 46.54182835066458
episode: 20 training return: tensor(75.0644, device='cuda:0')
episode: 21 training return: tensor(64.2772, device='cuda:0')
episode: 22 training return: tensor(27.6495, device='cuda:0')
episode: 23 training return: tensor(63.7117, device='cuda:0')
epoch: 6 test_true_pfm: 107.5481660547515 sim_pfm: 18.017133011121768
episode: 24 training return: tensor(69.2903, device='cuda:0')
episode: 25 training return: tensor(13.6215, device='cuda:0')
episode: 26 training return: tensor(8.1979, device='cuda:0')
episode: 27 training return: tensor(59.9802, device='cuda:0')
epoch: 7 test_true_pfm: 82.34363759139733 sim_pfm: 26.219232267141344
episode: 28 training return: tensor(16.2805, device='cuda:0')
episode: 29 training return: tensor(14.6405, device='cuda:0')
episode: 30 training return: tensor(5.2772, device='cuda:0')
episode: 31 training return: tensor(14.8428, device='cuda:0')
epoch: 8 test_true_pfm: 83.94023456817949 sim_pfm: 59.46972598149441
episode: 32 training return: tensor(36.2753, device='cuda:0')
episode: 33 training return: tensor(54.3668, device='cuda:0')
episode: 34 training return: tensor(16.0079, device='cuda:0')
episode: 35 training return: tensor(11.5158, device='cuda:0')
epoch: 9 test_true_pfm: 81.78859429458858 sim_pfm: 32.92867986970232
episode: 36 training return: tensor(58.9679, device='cuda:0')
episode: 37 training return: tensor(31.4449, device='cuda:0')
episode: 38 training return: tensor(4.8956, device='cuda:0')
episode: 39 training return: tensor(7.8059, device='cuda:0')
epoch: 10 test_true_pfm: 51.307284590921384 sim_pfm: 8.447729872172932
episode: 40 training return: tensor(4.2042, device='cuda:0')
episode: 41 training return: tensor(11.3835, device='cuda:0')
episode: 42 training return: tensor(14.1432, device='cuda:0')
episode: 43 training return: tensor(2.8604, device='cuda:0')
epoch: 11 test_true_pfm: 51.431978494146165 sim_pfm: 8.889473196241306
episode: 44 training return: tensor(15.3344, device='cuda:0')
episode: 45 training return: tensor(13.4263, device='cuda:0')
episode: 46 training return: tensor(37.9990, device='cuda:0')
episode: 47 training return: tensor(4.8320, device='cuda:0')
epoch: 12 test_true_pfm: 110.15150769651544 sim_pfm: 26.688341670687077
episode: 48 training return: tensor(5.2091, device='cuda:0')
episode: 49 training return: tensor(13.5930, device='cuda:0')
episode: 50 training return: tensor(8.3834, device='cuda:0')
episode: 51 training return: tensor(14.8542, device='cuda:0')
epoch: 13 test_true_pfm: 71.19589586480387 sim_pfm: 41.306477269070456
episode: 52 training return: tensor(27.6084, device='cuda:0')
episode: 53 training return: tensor(51.6889, device='cuda:0')
episode: 54 training return: tensor(14.3964, device='cuda:0')
episode: 55 training return: tensor(8.2120, device='cuda:0')
epoch: 14 test_true_pfm: 83.89554772332005 sim_pfm: 43.92062275151839
episode: 56 training return: tensor(67.1915, device='cuda:0')
episode: 57 training return: tensor(2.3046, device='cuda:0')
episode: 58 training return: tensor(61.3045, device='cuda:0')
episode: 59 training return: tensor(62.9140, device='cuda:0')
epoch: 15 test_true_pfm: 76.68733759632639 sim_pfm: 35.858341229124925
episode: 60 training return: tensor(43.7017, device='cuda:0')
episode: 61 training return: tensor(72.3228, device='cuda:0')
episode: 62 training return: tensor(-0.1876, device='cuda:0')
episode: 63 training return: tensor(70.5537, device='cuda:0')
epoch: 16 test_true_pfm: 91.90610353922976 sim_pfm: 45.83352031190297
episode: 64 training return: tensor(62.5203, device='cuda:0')
episode: 65 training return: tensor(6.6207, device='cuda:0')
episode: 66 training return: tensor(75.0229, device='cuda:0')
episode: 67 training return: tensor(69.7790, device='cuda:0')
epoch: 17 test_true_pfm: 76.8972967441375 sim_pfm: 25.379765714582753
episode: 68 training return: tensor(27.0350, device='cuda:0')
episode: 69 training return: tensor(4.2126, device='cuda:0')
episode: 70 training return: tensor(37.6644, device='cuda:0')
episode: 71 training return: tensor(13.5113, device='cuda:0')
epoch: 18 test_true_pfm: 127.95676906843046 sim_pfm: 57.639000668318474
episode: 72 training return: tensor(1.1270, device='cuda:0')
episode: 73 training return: tensor(13.7820, device='cuda:0')
episode: 74 training return: tensor(74.9019, device='cuda:0')
episode: 75 training return: tensor(48.5595, device='cuda:0')
epoch: 19 test_true_pfm: 75.55282825446592 sim_pfm: 26.416536519292276
episode: 76 training return: tensor(70.6472, device='cuda:0')
episode: 77 training return: tensor(53.9027, device='cuda:0')
episode: 78 training return: tensor(30.1356, device='cuda:0')
episode: 79 training return: tensor(55.2879, device='cuda:0')
epoch: 20 test_true_pfm: 88.5788214732664 sim_pfm: 47.9871033510135
episode: 80 training return: tensor(82.8084, device='cuda:0')
episode: 81 training return: tensor(11.8333, device='cuda:0')
episode: 82 training return: tensor(1.0016, device='cuda:0')
episode: 83 training return: tensor(57.7386, device='cuda:0')
epoch: 21 test_true_pfm: 83.89895104286977 sim_pfm: 33.11937272644136
episode: 84 training return: tensor(82.4250, device='cuda:0')
episode: 85 training return: tensor(6.9751, device='cuda:0')
episode: 86 training return: tensor(52.3422, device='cuda:0')
episode: 87 training return: tensor(10.6901, device='cuda:0')
epoch: 22 test_true_pfm: 57.605169955498255 sim_pfm: 59.32883993140422
episode: 88 training return: tensor(65.1942, device='cuda:0')
episode: 89 training return: tensor(18.9894, device='cuda:0')
episode: 90 training return: tensor(50.7736, device='cuda:0')
episode: 91 training return: tensor(14.5162, device='cuda:0')
epoch: 23 test_true_pfm: 94.68973514596365 sim_pfm: 57.21204867130145
episode: 92 training return: tensor(19.4890, device='cuda:0')
episode: 93 training return: tensor(22.6166, device='cuda:0')
episode: 94 training return: tensor(9.2131, device='cuda:0')
episode: 95 training return: tensor(69.6447, device='cuda:0')
epoch: 24 test_true_pfm: 79.62864716566449 sim_pfm: 54.32949531024788
episode: 96 training return: tensor(15.6117, device='cuda:0')
episode: 97 training return: tensor(50.9414, device='cuda:0')
episode: 98 training return: tensor(26.1441, device='cuda:0')
episode: 99 training return: tensor(35.6655, device='cuda:0')
epoch: 25 test_true_pfm: 83.10145868698211 sim_pfm: 36.822627930995075
episode: 100 training return: tensor(1.5786, device='cuda:0')
episode: 101 training return: tensor(82.7953, device='cuda:0')
episode: 102 training return: tensor(74.2458, device='cuda:0')
episode: 103 training return: tensor(22.7081, device='cuda:0')
epoch: 26 test_true_pfm: 101.06185824597284 sim_pfm: 39.76852693514665
episode: 104 training return: tensor(72.3953, device='cuda:0')
episode: 105 training return: tensor(80.7129, device='cuda:0')
episode: 106 training return: tensor(-0.4626, device='cuda:0')
episode: 107 training return: tensor(38.7757, device='cuda:0')
epoch: 27 test_true_pfm: 114.19269196165646 sim_pfm: 48.07978893952677
episode: 108 training return: tensor(79.3025, device='cuda:0')
episode: 109 training return: tensor(32.5751, device='cuda:0')
episode: 110 training return: tensor(33.0941, device='cuda:0')
episode: 111 training return: tensor(83.5690, device='cuda:0')
epoch: 28 test_true_pfm: 50.695741666367745 sim_pfm: 42.26556293173344
episode: 112 training return: tensor(79.2840, device='cuda:0')
episode: 113 training return: tensor(26.2876, device='cuda:0')
episode: 114 training return: tensor(28.6637, device='cuda:0')
episode: 115 training return: tensor(53.9011, device='cuda:0')
epoch: 29 test_true_pfm: 77.19005221900285 sim_pfm: 55.743007849005515
episode: 116 training return: tensor(6.3365, device='cuda:0')
episode: 117 training return: tensor(35.0991, device='cuda:0')
episode: 118 training return: tensor(3.9566, device='cuda:0')
episode: 119 training return: tensor(3.2676, device='cuda:0')
epoch: 30 test_true_pfm: 47.49152712654246 sim_pfm: 34.6788846455107
episode: 120 training return: tensor(34.2890, device='cuda:0')
episode: 121 training return: tensor(15.4372, device='cuda:0')
episode: 122 training return: tensor(7.5708, device='cuda:0')
episode: 123 training return: tensor(30.4086, device='cuda:0')
epoch: 31 test_true_pfm: 87.54647713044889 sim_pfm: 51.161271978891456
episode: 124 training return: tensor(0.1776, device='cuda:0')
episode: 125 training return: tensor(83.6258, device='cuda:0')
episode: 126 training return: tensor(81.0372, device='cuda:0')
episode: 127 training return: tensor(79.5785, device='cuda:0')
epoch: 32 test_true_pfm: 95.40523786753695 sim_pfm: 32.272250953310866
episode: 128 training return: tensor(6.5391, device='cuda:0')
episode: 129 training return: tensor(80.8146, device='cuda:0')
episode: 130 training return: tensor(72.9806, device='cuda:0')
episode: 131 training return: tensor(80.2387, device='cuda:0')
epoch: 33 test_true_pfm: 92.23907267862809 sim_pfm: 54.996025211602685
episode: 132 training return: tensor(46.9963, device='cuda:0')
episode: 133 training return: tensor(10.1790, device='cuda:0')
episode: 134 training return: tensor(24.0979, device='cuda:0')
episode: 135 training return: tensor(31.8283, device='cuda:0')
epoch: 34 test_true_pfm: 87.86836259693241 sim_pfm: 35.758738831349184
episode: 136 training return: tensor(70.8182, device='cuda:0')
episode: 137 training return: tensor(61.2653, device='cuda:0')
episode: 138 training return: tensor(82.0767, device='cuda:0')
episode: 139 training return: tensor(41.2783, device='cuda:0')
epoch: 35 test_true_pfm: 93.23426854532521 sim_pfm: 41.3749605783436
episode: 140 training return: tensor(61.9669, device='cuda:0')
episode: 141 training return: tensor(31.3170, device='cuda:0')
episode: 142 training return: tensor(12.5537, device='cuda:0')
episode: 143 training return: tensor(8.2648, device='cuda:0')
epoch: 36 test_true_pfm: 89.61790030067428 sim_pfm: 26.269682645786087
episode: 144 training return: tensor(8.7469, device='cuda:0')
episode: 145 training return: tensor(13.2822, device='cuda:0')
episode: 146 training return: tensor(10.9370, device='cuda:0')
episode: 147 training return: tensor(19.2230, device='cuda:0')
epoch: 37 test_true_pfm: 68.00532101760385 sim_pfm: 10.105927277012961
episode: 148 training return: tensor(79.8887, device='cuda:0')
episode: 149 training return: tensor(78.3427, device='cuda:0')
episode: 150 training return: tensor(30.1121, device='cuda:0')
episode: 151 training return: tensor(7.2527, device='cuda:0')
epoch: 38 test_true_pfm: 77.34872098604812 sim_pfm: 14.981513059162534
episode: 152 training return: tensor(12.8689, device='cuda:0')
episode: 153 training return: tensor(44.5803, device='cuda:0')
episode: 154 training return: tensor(46.0316, device='cuda:0')
episode: 155 training return: tensor(13.8449, device='cuda:0')
epoch: 39 test_true_pfm: 85.92105766579353 sim_pfm: 44.03242099962663
episode: 156 training return: tensor(67.1522, device='cuda:0')
episode: 157 training return: tensor(13.1423, device='cuda:0')
episode: 158 training return: tensor(49.2348, device='cuda:0')
episode: 159 training return: tensor(73.5928, device='cuda:0')
epoch: 40 test_true_pfm: 78.75467812210164 sim_pfm: 48.80539896830451
episode: 160 training return: tensor(8.3787, device='cuda:0')
episode: 161 training return: tensor(32.7196, device='cuda:0')
episode: 162 training return: tensor(7.9106, device='cuda:0')
episode: 163 training return: tensor(10.2864, device='cuda:0')
epoch: 41 test_true_pfm: 62.293147876920145 sim_pfm: 31.455246443208306
episode: 164 training return: tensor(8.8769, device='cuda:0')
episode: 165 training return: tensor(7.0024, device='cuda:0')
episode: 166 training return: tensor(29.2762, device='cuda:0')
episode: 167 training return: tensor(72.5909, device='cuda:0')
epoch: 42 test_true_pfm: 56.95238997531978 sim_pfm: 33.59003172357916
episode: 168 training return: tensor(32.0078, device='cuda:0')
episode: 169 training return: tensor(81.0575, device='cuda:0')
episode: 170 training return: tensor(44.7375, device='cuda:0')
episode: 171 training return: tensor(50.1758, device='cuda:0')
epoch: 43 test_true_pfm: 69.61469015194896 sim_pfm: 25.906308951729443
episode: 172 training return: tensor(34.3893, device='cuda:0')
episode: 173 training return: tensor(14.7320, device='cuda:0')
episode: 174 training return: tensor(23.5745, device='cuda:0')
episode: 175 training return: tensor(64.2834, device='cuda:0')
epoch: 44 test_true_pfm: 75.40845099992626 sim_pfm: 34.25366806827951
episode: 176 training return: tensor(77.9540, device='cuda:0')
episode: 177 training return: tensor(12.8554, device='cuda:0')
episode: 178 training return: tensor(75.5724, device='cuda:0')
episode: 179 training return: tensor(36.5202, device='cuda:0')
epoch: 45 test_true_pfm: 70.97663373413873 sim_pfm: 39.18638483827235
episode: 180 training return: tensor(20.7399, device='cuda:0')
episode: 181 training return: tensor(75.8295, device='cuda:0')
episode: 182 training return: tensor(80.3761, device='cuda:0')
episode: 183 training return: tensor(31.9244, device='cuda:0')
epoch: 46 test_true_pfm: 96.33035089099141 sim_pfm: 73.99434150522575
episode: 184 training return: tensor(13.4836, device='cuda:0')
episode: 185 training return: tensor(23.9996, device='cuda:0')
episode: 186 training return: tensor(13.4276, device='cuda:0')
episode: 187 training return: tensor(80.1245, device='cuda:0')
epoch: 47 test_true_pfm: 84.86528521972419 sim_pfm: 77.55315178968594
episode: 188 training return: tensor(70.7560, device='cuda:0')
episode: 189 training return: tensor(61.6170, device='cuda:0')
episode: 190 training return: tensor(49.6531, device='cuda:0')
episode: 191 training return: tensor(74.6237, device='cuda:0')
epoch: 48 test_true_pfm: 102.01453568262134 sim_pfm: 61.560065105819376
episode: 192 training return: tensor(67.6058, device='cuda:0')
episode: 193 training return: tensor(66.5942, device='cuda:0')
episode: 194 training return: tensor(80.5158, device='cuda:0')
episode: 195 training return: tensor(61.9041, device='cuda:0')
epoch: 49 test_true_pfm: 87.96251632573419 sim_pfm: 64.54112835630659
episode: 196 training return: tensor(47.3050, device='cuda:0')
episode: 197 training return: tensor(71.9706, device='cuda:0')
episode: 198 training return: tensor(14.0172, device='cuda:0')
episode: 199 training return: tensor(7.3042, device='cuda:0')
epoch: 50 test_true_pfm: 63.55770110524435 sim_pfm: 46.62978511422989
episode: 200 training return: tensor(59.6656, device='cuda:0')
episode: 201 training return: tensor(47.9470, device='cuda:0')
episode: 202 training return: tensor(7.8433, device='cuda:0')
episode: 203 training return: tensor(78.7538, device='cuda:0')
epoch: 51 test_true_pfm: 91.4364090354788 sim_pfm: 46.844808722729795
episode: 204 training return: tensor(77.2219, device='cuda:0')
episode: 205 training return: tensor(56.4546, device='cuda:0')
episode: 206 training return: tensor(68.4123, device='cuda:0')
episode: 207 training return: tensor(60.7101, device='cuda:0')
epoch: 52 test_true_pfm: 101.61202982517798 sim_pfm: 58.147492445912214
episode: 208 training return: tensor(31.9071, device='cuda:0')
episode: 209 training return: tensor(13.6254, device='cuda:0')
episode: 210 training return: tensor(47.9567, device='cuda:0')
episode: 211 training return: tensor(65.9535, device='cuda:0')
epoch: 53 test_true_pfm: 98.31909626611684 sim_pfm: 66.66120687662041
episode: 212 training return: tensor(62.3630, device='cuda:0')
episode: 213 training return: tensor(73.4412, device='cuda:0')
episode: 214 training return: tensor(22.1898, device='cuda:0')
episode: 215 training return: tensor(63.6208, device='cuda:0')
epoch: 54 test_true_pfm: 89.39881860590481 sim_pfm: 65.63914954386419
episode: 216 training return: tensor(13.1424, device='cuda:0')
episode: 217 training return: tensor(64.8722, device='cuda:0')
episode: 218 training return: tensor(35.7437, device='cuda:0')
episode: 219 training return: tensor(72.4623, device='cuda:0')
epoch: 55 test_true_pfm: 98.3793225226649 sim_pfm: 57.56133483562735
episode: 220 training return: tensor(69.0757, device='cuda:0')
episode: 221 training return: tensor(59.6038, device='cuda:0')
episode: 222 training return: tensor(30.8908, device='cuda:0')
episode: 223 training return: tensor(68.1940, device='cuda:0')
epoch: 56 test_true_pfm: 78.6641801702408 sim_pfm: 38.76675931061036
episode: 224 training return: tensor(75.7218, device='cuda:0')
episode: 225 training return: tensor(75.1376, device='cuda:0')
episode: 226 training return: tensor(32.1990, device='cuda:0')
episode: 227 training return: tensor(57.5914, device='cuda:0')
epoch: 57 test_true_pfm: 82.5821920130525 sim_pfm: 57.08110286766896
episode: 228 training return: tensor(35.7951, device='cuda:0')
episode: 229 training return: tensor(57.8671, device='cuda:0')
episode: 230 training return: tensor(70.3044, device='cuda:0')
episode: 231 training return: tensor(80.8528, device='cuda:0')
epoch: 58 test_true_pfm: 104.7441224943669 sim_pfm: 52.2956769318087
episode: 232 training return: tensor(72.1619, device='cuda:0')
episode: 233 training return: tensor(67.1028, device='cuda:0')
episode: 234 training return: tensor(14.3849, device='cuda:0')
episode: 235 training return: tensor(81.2503, device='cuda:0')
epoch: 59 test_true_pfm: 88.36405123768 sim_pfm: 29.65322609694558
episode: 236 training return: tensor(13.7397, device='cuda:0')
episode: 237 training return: tensor(30.9903, device='cuda:0')
episode: 238 training return: tensor(57.9679, device='cuda:0')
episode: 239 training return: tensor(25.7034, device='cuda:0')
epoch: 60 test_true_pfm: 113.0877118616381 sim_pfm: 32.99990859484533
episode: 240 training return: tensor(46.9402, device='cuda:0')
episode: 241 training return: tensor(11.6101, device='cuda:0')
episode: 242 training return: tensor(46.9579, device='cuda:0')
episode: 243 training return: tensor(73.4349, device='cuda:0')
epoch: 61 test_true_pfm: 90.94608399980673 sim_pfm: 45.28680455692229
episode: 244 training return: tensor(36.0639, device='cuda:0')
episode: 245 training return: tensor(36.8746, device='cuda:0')
episode: 246 training return: tensor(14.3313, device='cuda:0')
episode: 247 training return: tensor(50.3865, device='cuda:0')
epoch: 62 test_true_pfm: 76.5398509153693 sim_pfm: 51.15331647324492
episode: 248 training return: tensor(76.3261, device='cuda:0')
episode: 249 training return: tensor(82.3559, device='cuda:0')
episode: 250 training return: tensor(77.2244, device='cuda:0')
episode: 251 training return: tensor(16.5116, device='cuda:0')
epoch: 63 test_true_pfm: 92.68364145012849 sim_pfm: 44.140115628030614
episode: 252 training return: tensor(12.8977, device='cuda:0')
episode: 253 training return: tensor(11.9898, device='cuda:0')
episode: 254 training return: tensor(61.9352, device='cuda:0')
episode: 255 training return: tensor(52.0248, device='cuda:0')
epoch: 64 test_true_pfm: 106.99322235177515 sim_pfm: 58.0246291301155
episode: 256 training return: tensor(77.5284, device='cuda:0')
episode: 257 training return: tensor(75.2907, device='cuda:0')
episode: 258 training return: tensor(14.4049, device='cuda:0')
episode: 259 training return: tensor(49.4770, device='cuda:0')
epoch: 65 test_true_pfm: 99.96904503587855 sim_pfm: 62.064820308063645
episode: 260 training return: tensor(82.1976, device='cuda:0')
episode: 261 training return: tensor(75.9788, device='cuda:0')
episode: 262 training return: tensor(58.7016, device='cuda:0')
episode: 263 training return: tensor(69.8963, device='cuda:0')
epoch: 66 test_true_pfm: 101.77975470440398 sim_pfm: 54.03850827178685
episode: 264 training return: tensor(78.4172, device='cuda:0')
episode: 265 training return: tensor(36.0543, device='cuda:0')
episode: 266 training return: tensor(69.1953, device='cuda:0')
episode: 267 training return: tensor(80.1265, device='cuda:0')
epoch: 67 test_true_pfm: 109.28961931812611 sim_pfm: 69.18114746342762
episode: 268 training return: tensor(10.2486, device='cuda:0')
episode: 269 training return: tensor(59.7601, device='cuda:0')
episode: 270 training return: tensor(13.0573, device='cuda:0')
episode: 271 training return: tensor(33.5867, device='cuda:0')
epoch: 68 test_true_pfm: 89.57718695419801 sim_pfm: 25.611976087937364
episode: 272 training return: tensor(60.4016, device='cuda:0')
episode: 273 training return: tensor(47.6490, device='cuda:0')
episode: 274 training return: tensor(27.7146, device='cuda:0')
episode: 275 training return: tensor(23.9639, device='cuda:0')
epoch: 69 test_true_pfm: 86.7265150871603 sim_pfm: 34.70428393170587
episode: 276 training return: tensor(12.9371, device='cuda:0')
episode: 277 training return: tensor(14.8088, device='cuda:0')
episode: 278 training return: tensor(71.5039, device='cuda:0')
episode: 279 training return: tensor(68.4740, device='cuda:0')
epoch: 70 test_true_pfm: 59.63551319950081 sim_pfm: 48.125942382367796
episode: 280 training return: tensor(84.1531, device='cuda:0')
episode: 281 training return: tensor(66.9359, device='cuda:0')
episode: 282 training return: tensor(62.0032, device='cuda:0')
episode: 283 training return: tensor(14.2629, device='cuda:0')
epoch: 71 test_true_pfm: 82.76306108811433 sim_pfm: 41.5991548715916
episode: 284 training return: tensor(79.8258, device='cuda:0')
episode: 285 training return: tensor(11.7226, device='cuda:0')
episode: 286 training return: tensor(14.1671, device='cuda:0')
episode: 287 training return: tensor(66.4480, device='cuda:0')
epoch: 72 test_true_pfm: 98.30530952428019 sim_pfm: 49.8393723718822
episode: 288 training return: tensor(53.4780, device='cuda:0')
episode: 289 training return: tensor(53.4302, device='cuda:0')
episode: 290 training return: tensor(43.6112, device='cuda:0')
episode: 291 training return: tensor(73.5430, device='cuda:0')
epoch: 73 test_true_pfm: 122.10315991305376 sim_pfm: 56.03215952938772
episode: 292 training return: tensor(57.3928, device='cuda:0')
episode: 293 training return: tensor(15.4735, device='cuda:0')
episode: 294 training return: tensor(75.7096, device='cuda:0')
episode: 295 training return: tensor(14.2021, device='cuda:0')
epoch: 74 test_true_pfm: 123.21187089379052 sim_pfm: 70.39341583793285
episode: 296 training return: tensor(14.3589, device='cuda:0')
episode: 297 training return: tensor(13.5472, device='cuda:0')
episode: 298 training return: tensor(36.6264, device='cuda:0')
episode: 299 training return: tensor(19.7286, device='cuda:0')
epoch: 75 test_true_pfm: 93.37897051932524 sim_pfm: 66.43505025219056
episode: 300 training return: tensor(80.5667, device='cuda:0')
episode: 301 training return: tensor(28.9978, device='cuda:0')
episode: 302 training return: tensor(23.1684, device='cuda:0')
episode: 303 training return: tensor(11.5631, device='cuda:0')
epoch: 76 test_true_pfm: 90.84026734520205 sim_pfm: 48.82723285410903
episode: 304 training return: tensor(78.5373, device='cuda:0')
episode: 305 training return: tensor(73.7111, device='cuda:0')
episode: 306 training return: tensor(6.7450, device='cuda:0')
episode: 307 training return: tensor(76.2231, device='cuda:0')
epoch: 77 test_true_pfm: 89.74077992501445 sim_pfm: 43.79185024992912
episode: 308 training return: tensor(22.0798, device='cuda:0')
episode: 309 training return: tensor(69.4247, device='cuda:0')
episode: 310 training return: tensor(61.9590, device='cuda:0')
episode: 311 training return: tensor(13.9451, device='cuda:0')
epoch: 78 test_true_pfm: 118.57928316210136 sim_pfm: 56.359849823941474
episode: 312 training return: tensor(11.6915, device='cuda:0')
episode: 313 training return: tensor(60.0308, device='cuda:0')
episode: 314 training return: tensor(83.0155, device='cuda:0')
episode: 315 training return: tensor(7.9591, device='cuda:0')
epoch: 79 test_true_pfm: 98.47936986808028 sim_pfm: 61.4962303582055
episode: 316 training return: tensor(81.4119, device='cuda:0')
episode: 317 training return: tensor(79.2469, device='cuda:0')
episode: 318 training return: tensor(31.9862, device='cuda:0')
episode: 319 training return: tensor(80.2959, device='cuda:0')
epoch: 80 test_true_pfm: 95.35502566669398 sim_pfm: 40.783312490273964
episode: 320 training return: tensor(79.0647, device='cuda:0')
episode: 321 training return: tensor(80.8245, device='cuda:0')
episode: 322 training return: tensor(27.3808, device='cuda:0')
episode: 323 training return: tensor(11.8868, device='cuda:0')
epoch: 81 test_true_pfm: 84.62561135525377 sim_pfm: 37.90918006511638
episode: 324 training return: tensor(74.8625, device='cuda:0')
episode: 325 training return: tensor(72.2461, device='cuda:0')
episode: 326 training return: tensor(13.4125, device='cuda:0')
episode: 327 training return: tensor(35.3130, device='cuda:0')
epoch: 82 test_true_pfm: 81.37558912151326 sim_pfm: 44.6696641241957
episode: 328 training return: tensor(13.1488, device='cuda:0')
episode: 329 training return: tensor(19.5542, device='cuda:0')
episode: 330 training return: tensor(5.6726, device='cuda:0')
episode: 331 training return: tensor(2.4091, device='cuda:0')
epoch: 83 test_true_pfm: 94.54517489202868 sim_pfm: 33.305986941308944
episode: 332 training return: tensor(6.9650, device='cuda:0')
episode: 333 training return: tensor(6.2329, device='cuda:0')
episode: 334 training return: tensor(80.1559, device='cuda:0')
episode: 335 training return: tensor(13.1003, device='cuda:0')
epoch: 84 test_true_pfm: 89.36861252171374 sim_pfm: 45.22666094904416
episode: 336 training return: tensor(14.6127, device='cuda:0')
episode: 337 training return: tensor(79.3870, device='cuda:0')
episode: 338 training return: tensor(11.7576, device='cuda:0')
episode: 339 training return: tensor(14.3193, device='cuda:0')
epoch: 85 test_true_pfm: 75.52723642365426 sim_pfm: 63.54057080835919
episode: 340 training return: tensor(83.6114, device='cuda:0')
episode: 341 training return: tensor(14.0299, device='cuda:0')
episode: 342 training return: tensor(79.9224, device='cuda:0')
episode: 343 training return: tensor(26.0557, device='cuda:0')
epoch: 86 test_true_pfm: 100.54824156089805 sim_pfm: 27.000292556360364
episode: 344 training return: tensor(5.8806, device='cuda:0')
episode: 345 training return: tensor(48.9709, device='cuda:0')
episode: 346 training return: tensor(14.2345, device='cuda:0')
episode: 347 training return: tensor(6.6651, device='cuda:0')
epoch: 87 test_true_pfm: 51.2518156636329 sim_pfm: 30.650133521761745
episode: 348 training return: tensor(2.2568, device='cuda:0')
episode: 349 training return: tensor(0.0373, device='cuda:0')
episode: 350 training return: tensor(10.0006, device='cuda:0')
episode: 351 training return: tensor(48.6263, device='cuda:0')
epoch: 88 test_true_pfm: 81.04955913129079 sim_pfm: 24.288753581122727
episode: 352 training return: tensor(11.9691, device='cuda:0')
episode: 353 training return: tensor(80.5497, device='cuda:0')
episode: 354 training return: tensor(7.3341, device='cuda:0')
episode: 355 training return: tensor(23.7924, device='cuda:0')
epoch: 89 test_true_pfm: 100.80374735052564 sim_pfm: 31.456477704201824
episode: 356 training return: tensor(31.4421, device='cuda:0')
episode: 357 training return: tensor(25.8961, device='cuda:0')
episode: 358 training return: tensor(15.3035, device='cuda:0')
episode: 359 training return: tensor(15.2246, device='cuda:0')
epoch: 90 test_true_pfm: 83.97189122180656 sim_pfm: 38.88344600860728
episode: 360 training return: tensor(23.8295, device='cuda:0')
episode: 361 training return: tensor(12.8003, device='cuda:0')
episode: 362 training return: tensor(73.2078, device='cuda:0')
episode: 363 training return: tensor(71.4025, device='cuda:0')
epoch: 91 test_true_pfm: 98.24121878921211 sim_pfm: 23.624573991668875
episode: 364 training return: tensor(14.6006, device='cuda:0')
episode: 365 training return: tensor(13.6533, device='cuda:0')
episode: 366 training return: tensor(53.8409, device='cuda:0')
episode: 367 training return: tensor(82.3751, device='cuda:0')
epoch: 92 test_true_pfm: 87.66425958863167 sim_pfm: 32.57230622393545
episode: 368 training return: tensor(54.0578, device='cuda:0')
episode: 369 training return: tensor(8.1301, device='cuda:0')
episode: 370 training return: tensor(84.0170, device='cuda:0')
episode: 371 training return: tensor(15.4300, device='cuda:0')
epoch: 93 test_true_pfm: 69.81685479059918 sim_pfm: 31.79463392145117
episode: 372 training return: tensor(82.0904, device='cuda:0')
episode: 373 training return: tensor(16.5269, device='cuda:0')
episode: 374 training return: tensor(6.7876, device='cuda:0')
episode: 375 training return: tensor(3.3982, device='cuda:0')
epoch: 94 test_true_pfm: 74.25006239449856 sim_pfm: 32.614030039974025
episode: 376 training return: tensor(20.1077, device='cuda:0')
episode: 377 training return: tensor(43.2848, device='cuda:0')
episode: 378 training return: tensor(65.1449, device='cuda:0')
episode: 379 training return: tensor(7.3972, device='cuda:0')
epoch: 95 test_true_pfm: 101.08718640279398 sim_pfm: 45.930040470097445
episode: 380 training return: tensor(26.5111, device='cuda:0')
episode: 381 training return: tensor(58.2132, device='cuda:0')
episode: 382 training return: tensor(58.0442, device='cuda:0')
episode: 383 training return: tensor(45.8955, device='cuda:0')
epoch: 96 test_true_pfm: 96.06597340857846 sim_pfm: 44.35329512739554
episode: 384 training return: tensor(15.5758, device='cuda:0')
episode: 385 training return: tensor(79.7410, device='cuda:0')
episode: 386 training return: tensor(55.6623, device='cuda:0')
episode: 387 training return: tensor(11.5170, device='cuda:0')
epoch: 97 test_true_pfm: 79.28586814521151 sim_pfm: 50.58661683124956
episode: 388 training return: tensor(9.4559, device='cuda:0')
episode: 389 training return: tensor(82.2575, device='cuda:0')
episode: 390 training return: tensor(9.5834, device='cuda:0')
episode: 391 training return: tensor(14.9061, device='cuda:0')
epoch: 98 test_true_pfm: 83.91427097684559 sim_pfm: 11.989800837094663
episode: 392 training return: tensor(81.1085, device='cuda:0')
episode: 393 training return: tensor(74.5964, device='cuda:0')
episode: 394 training return: tensor(9.6619, device='cuda:0')
episode: 395 training return: tensor(26.6706, device='cuda:0')
epoch: 99 test_true_pfm: 67.57895309251144 sim_pfm: 25.55369813631405
episode: 396 training return: tensor(6.6133, device='cuda:0')
episode: 397 training return: tensor(21.3461, device='cuda:0')
episode: 398 training return: tensor(35.5929, device='cuda:0')
episode: 399 training return: tensor(11.6993, device='cuda:0')
epoch: 100 test_true_pfm: 82.17832062516293 sim_pfm: 25.011881376919337
episode: 400 training return: tensor(12.4277, device='cuda:0')
episode: 401 training return: tensor(15.5103, device='cuda:0')
episode: 402 training return: tensor(25.7117, device='cuda:0')
episode: 403 training return: tensor(13.7099, device='cuda:0')
epoch: 101 test_true_pfm: 46.63170247801616 sim_pfm: 34.57719012669404
episode: 404 training return: tensor(26.0283, device='cuda:0')
episode: 405 training return: tensor(15.1844, device='cuda:0')
episode: 406 training return: tensor(15.2201, device='cuda:0')
episode: 407 training return: tensor(14.6177, device='cuda:0')
epoch: 102 test_true_pfm: 81.03499135766705 sim_pfm: 19.950834449665855
episode: 408 training return: tensor(70.7982, device='cuda:0')
episode: 409 training return: tensor(7.4638, device='cuda:0')
episode: 410 training return: tensor(10.5763, device='cuda:0')
episode: 411 training return: tensor(57.7431, device='cuda:0')
epoch: 103 test_true_pfm: 56.81837364991011 sim_pfm: 50.44087862735614
episode: 412 training return: tensor(79.0251, device='cuda:0')
episode: 413 training return: tensor(81.8780, device='cuda:0')
episode: 414 training return: tensor(14.3938, device='cuda:0')
episode: 415 training return: tensor(11.7814, device='cuda:0')
epoch: 104 test_true_pfm: 87.66203662787788 sim_pfm: 28.51167518211296
episode: 416 training return: tensor(14.4260, device='cuda:0')
episode: 417 training return: tensor(9.9963, device='cuda:0')
episode: 418 training return: tensor(85.4076, device='cuda:0')
episode: 419 training return: tensor(81.2345, device='cuda:0')
epoch: 105 test_true_pfm: 105.5248143230943 sim_pfm: 35.94387378187967
episode: 420 training return: tensor(9.7928, device='cuda:0')
episode: 421 training return: tensor(8.9415, device='cuda:0')
episode: 422 training return: tensor(40.1293, device='cuda:0')
episode: 423 training return: tensor(13.4281, device='cuda:0')
epoch: 106 test_true_pfm: 90.76343776010869 sim_pfm: 51.846514355100226
episode: 424 training return: tensor(58.5814, device='cuda:0')
episode: 425 training return: tensor(12.1180, device='cuda:0')
episode: 426 training return: tensor(80.3963, device='cuda:0')
episode: 427 training return: tensor(11.9520, device='cuda:0')
epoch: 107 test_true_pfm: 62.66648348029796 sim_pfm: 42.71999011292937
episode: 428 training return: tensor(63.6721, device='cuda:0')
episode: 429 training return: tensor(32.9496, device='cuda:0')
episode: 430 training return: tensor(71.0374, device='cuda:0')
episode: 431 training return: tensor(53.4722, device='cuda:0')
epoch: 108 test_true_pfm: 116.33340817101923 sim_pfm: 43.22097280695452
episode: 432 training return: tensor(15.7015, device='cuda:0')
episode: 433 training return: tensor(63.9084, device='cuda:0')
episode: 434 training return: tensor(83.1780, device='cuda:0')
episode: 435 training return: tensor(9.8759, device='cuda:0')
epoch: 109 test_true_pfm: 95.52528002769574 sim_pfm: 36.873273313982644
episode: 436 training return: tensor(8.2875, device='cuda:0')
episode: 437 training return: tensor(15.3704, device='cuda:0')
episode: 438 training return: tensor(60.3065, device='cuda:0')
episode: 439 training return: tensor(14.3874, device='cuda:0')
epoch: 110 test_true_pfm: 87.74529845442824 sim_pfm: 22.931264485872816
episode: 440 training return: tensor(80.1223, device='cuda:0')
episode: 441 training return: tensor(82.7890, device='cuda:0')
episode: 442 training return: tensor(15.8473, device='cuda:0')
episode: 443 training return: tensor(8.3592, device='cuda:0')
epoch: 111 test_true_pfm: 90.61185355413059 sim_pfm: 16.489298260398208
episode: 444 training return: tensor(11.6584, device='cuda:0')
episode: 445 training return: tensor(78.3933, device='cuda:0')
episode: 446 training return: tensor(7.6563, device='cuda:0')
episode: 447 training return: tensor(6.3136, device='cuda:0')
epoch: 112 test_true_pfm: 95.27530507588463 sim_pfm: 38.02738288187538
episode: 448 training return: tensor(87.6715, device='cuda:0')
episode: 449 training return: tensor(82.9907, device='cuda:0')
episode: 450 training return: tensor(9.4918, device='cuda:0')
episode: 451 training return: tensor(12.5592, device='cuda:0')
epoch: 113 test_true_pfm: 65.57073824803878 sim_pfm: 79.94718972548726
episode: 452 training return: tensor(80.7521, device='cuda:0')
episode: 453 training return: tensor(14.0357, device='cuda:0')
episode: 454 training return: tensor(75.7979, device='cuda:0')
episode: 455 training return: tensor(78.1231, device='cuda:0')
epoch: 114 test_true_pfm: 66.8026329839182 sim_pfm: 51.44781165044988
episode: 456 training return: tensor(8.3804, device='cuda:0')
episode: 457 training return: tensor(13.3597, device='cuda:0')
episode: 458 training return: tensor(78.8563, device='cuda:0')
episode: 459 training return: tensor(13.6386, device='cuda:0')
epoch: 115 test_true_pfm: 87.49437621330578 sim_pfm: 51.3251750367228
episode: 460 training return: tensor(12.6376, device='cuda:0')
episode: 461 training return: tensor(13.6216, device='cuda:0')
episode: 462 training return: tensor(58.8082, device='cuda:0')
episode: 463 training return: tensor(10.2621, device='cuda:0')
epoch: 116 test_true_pfm: 84.33192331428457 sim_pfm: 49.73215009403648
episode: 464 training return: tensor(14.9184, device='cuda:0')
episode: 465 training return: tensor(46.1091, device='cuda:0')
episode: 466 training return: tensor(74.2393, device='cuda:0')
episode: 467 training return: tensor(26.3928, device='cuda:0')
epoch: 117 test_true_pfm: 104.82256864642156 sim_pfm: 65.56217636059736
episode: 468 training return: tensor(9.8674, device='cuda:0')
episode: 469 training return: tensor(10.3487, device='cuda:0')
episode: 470 training return: tensor(22.7864, device='cuda:0')
episode: 471 training return: tensor(70.2151, device='cuda:0')
epoch: 118 test_true_pfm: 43.941563901555284 sim_pfm: 50.707200185838154
episode: 472 training return: tensor(8.0137, device='cuda:0')
episode: 473 training return: tensor(5.2266, device='cuda:0')
episode: 474 training return: tensor(11.4170, device='cuda:0')
episode: 475 training return: tensor(68.2523, device='cuda:0')
epoch: 119 test_true_pfm: 76.53550669429076 sim_pfm: 54.69059304086841
episode: 476 training return: tensor(14.8631, device='cuda:0')
episode: 477 training return: tensor(11.6224, device='cuda:0')
episode: 478 training return: tensor(22.7834, device='cuda:0')
episode: 479 training return: tensor(13.0548, device='cuda:0')
epoch: 120 test_true_pfm: 65.89191608179071 sim_pfm: 65.32591882278211
episode: 480 training return: tensor(67.7277, device='cuda:0')
episode: 481 training return: tensor(6.6738, device='cuda:0')
episode: 482 training return: tensor(81.6531, device='cuda:0')
episode: 483 training return: tensor(13.1623, device='cuda:0')
epoch: 121 test_true_pfm: 79.88892782822725 sim_pfm: 46.90487723809783
episode: 484 training return: tensor(63.2147, device='cuda:0')
episode: 485 training return: tensor(76.6308, device='cuda:0')
episode: 486 training return: tensor(32.0930, device='cuda:0')
episode: 487 training return: tensor(83.3096, device='cuda:0')
epoch: 122 test_true_pfm: 83.5732646317921 sim_pfm: 43.51827613021014
episode: 488 training return: tensor(13.5824, device='cuda:0')
episode: 489 training return: tensor(80.7591, device='cuda:0')
episode: 490 training return: tensor(7.3496, device='cuda:0')
episode: 491 training return: tensor(9.8859, device='cuda:0')
epoch: 123 test_true_pfm: 48.07578628677379 sim_pfm: 53.62625465861056
episode: 492 training return: tensor(6.0234, device='cuda:0')
episode: 493 training return: tensor(73.9622, device='cuda:0')
episode: 494 training return: tensor(10.0585, device='cuda:0')
episode: 495 training return: tensor(27.8308, device='cuda:0')
epoch: 124 test_true_pfm: 75.15487670089479 sim_pfm: 49.3771645253466
episode: 496 training return: tensor(76.4713, device='cuda:0')
episode: 497 training return: tensor(77.3160, device='cuda:0')
episode: 498 training return: tensor(6.8183, device='cuda:0')
episode: 499 training return: tensor(12.3901, device='cuda:0')
epoch: 125 test_true_pfm: 69.12072219415664 sim_pfm: 24.845788397855358
episode: 500 training return: tensor(27.6249, device='cuda:0')
episode: 501 training return: tensor(19.2995, device='cuda:0')
episode: 502 training return: tensor(17.5580, device='cuda:0')
episode: 503 training return: tensor(11.1745, device='cuda:0')
epoch: 126 test_true_pfm: 66.80905574285967 sim_pfm: 55.162162654456914
episode: 504 training return: tensor(83.0620, device='cuda:0')
episode: 505 training return: tensor(7.5108, device='cuda:0')
episode: 506 training return: tensor(14.3310, device='cuda:0')
episode: 507 training return: tensor(11.0744, device='cuda:0')
epoch: 127 test_true_pfm: 64.06495245333367 sim_pfm: 27.879592188261448
episode: 508 training return: tensor(75.1241, device='cuda:0')
episode: 509 training return: tensor(10.1699, device='cuda:0')
episode: 510 training return: tensor(25.9699, device='cuda:0')
episode: 511 training return: tensor(42.0992, device='cuda:0')
epoch: 128 test_true_pfm: 68.23860359395493 sim_pfm: 52.47052104643662
episode: 512 training return: tensor(25.3683, device='cuda:0')
episode: 513 training return: tensor(20.0491, device='cuda:0')
episode: 514 training return: tensor(11.9135, device='cuda:0')
episode: 515 training return: tensor(14.0395, device='cuda:0')
epoch: 129 test_true_pfm: 95.1170434207153 sim_pfm: 34.09307514250395
episode: 516 training return: tensor(13.9873, device='cuda:0')
episode: 517 training return: tensor(14.0162, device='cuda:0')
episode: 518 training return: tensor(13.8082, device='cuda:0')
episode: 519 training return: tensor(4.3932, device='cuda:0')
epoch: 130 test_true_pfm: 63.26608140995042 sim_pfm: 35.71463504809653
episode: 520 training return: tensor(7.6233, device='cuda:0')
episode: 521 training return: tensor(8.1087, device='cuda:0')
episode: 522 training return: tensor(82.0546, device='cuda:0')
episode: 523 training return: tensor(22.8512, device='cuda:0')
epoch: 131 test_true_pfm: 83.14619674611279 sim_pfm: 34.315199019620195
episode: 524 training return: tensor(13.9761, device='cuda:0')
episode: 525 training return: tensor(18.5221, device='cuda:0')
episode: 526 training return: tensor(26.6753, device='cuda:0')
episode: 527 training return: tensor(33.5736, device='cuda:0')
epoch: 132 test_true_pfm: 75.14791561766309 sim_pfm: 37.98878052373766
episode: 528 training return: tensor(60.1576, device='cuda:0')
episode: 529 training return: tensor(14.6191, device='cuda:0')
episode: 530 training return: tensor(14.6878, device='cuda:0')
episode: 531 training return: tensor(14.5454, device='cuda:0')
epoch: 133 test_true_pfm: 61.82331613556433 sim_pfm: 44.650866022985426
episode: 532 training return: tensor(79.4526, device='cuda:0')
episode: 533 training return: tensor(6.5522, device='cuda:0')
episode: 534 training return: tensor(7.1054, device='cuda:0')
episode: 535 training return: tensor(74.6619, device='cuda:0')
epoch: 134 test_true_pfm: 70.65636567526576 sim_pfm: 38.482534928183306
episode: 536 training return: tensor(74.4343, device='cuda:0')
episode: 537 training return: tensor(80.2278, device='cuda:0')
episode: 538 training return: tensor(9.2050, device='cuda:0')
episode: 539 training return: tensor(81.7683, device='cuda:0')
epoch: 135 test_true_pfm: 97.0658728500065 sim_pfm: 43.73681037560455
episode: 540 training return: tensor(81.5341, device='cuda:0')
episode: 541 training return: tensor(70.5949, device='cuda:0')
episode: 542 training return: tensor(14.1179, device='cuda:0')
episode: 543 training return: tensor(55.7230, device='cuda:0')
epoch: 136 test_true_pfm: 61.269973787977946 sim_pfm: 25.585809210472508
episode: 544 training return: tensor(15.4584, device='cuda:0')
episode: 545 training return: tensor(13.2757, device='cuda:0')
episode: 546 training return: tensor(81.7088, device='cuda:0')
episode: 547 training return: tensor(78.7321, device='cuda:0')
epoch: 137 test_true_pfm: 92.19255769428146 sim_pfm: 38.82625700713252
episode: 548 training return: tensor(9.2450, device='cuda:0')
episode: 549 training return: tensor(62.5105, device='cuda:0')
episode: 550 training return: tensor(22.3443, device='cuda:0')
episode: 551 training return: tensor(13.6284, device='cuda:0')
epoch: 138 test_true_pfm: 84.98111166408196 sim_pfm: 55.12251993729733
episode: 552 training return: tensor(67.6426, device='cuda:0')
episode: 553 training return: tensor(13.0914, device='cuda:0')
episode: 554 training return: tensor(6.8504, device='cuda:0')
episode: 555 training return: tensor(15.4793, device='cuda:0')
epoch: 139 test_true_pfm: 53.083434940013184 sim_pfm: 36.70135121793719
episode: 556 training return: tensor(26.0531, device='cuda:0')
episode: 557 training return: tensor(15.0498, device='cuda:0')
episode: 558 training return: tensor(14.5797, device='cuda:0')
episode: 559 training return: tensor(30.0767, device='cuda:0')
epoch: 140 test_true_pfm: 92.88482504644146 sim_pfm: 49.0289395318483
episode: 560 training return: tensor(15.6263, device='cuda:0')
episode: 561 training return: tensor(36.2807, device='cuda:0')
episode: 562 training return: tensor(9.9328, device='cuda:0')
episode: 563 training return: tensor(12.7746, device='cuda:0')
epoch: 141 test_true_pfm: 79.69332741390626 sim_pfm: 33.883778270054606
episode: 564 training return: tensor(85.6691, device='cuda:0')
episode: 565 training return: tensor(15.7601, device='cuda:0')
episode: 566 training return: tensor(11.0751, device='cuda:0')
episode: 567 training return: tensor(14.5468, device='cuda:0')
epoch: 142 test_true_pfm: 69.33679128321955 sim_pfm: 25.363351064466407
episode: 568 training return: tensor(10.6123, device='cuda:0')
episode: 569 training return: tensor(14.9113, device='cuda:0')
episode: 570 training return: tensor(9.4341, device='cuda:0')
episode: 571 training return: tensor(73.0534, device='cuda:0')
epoch: 143 test_true_pfm: 80.95397566634155 sim_pfm: 27.654024792573182
episode: 572 training return: tensor(35.4827, device='cuda:0')
episode: 573 training return: tensor(15.4796, device='cuda:0')
episode: 574 training return: tensor(13.1712, device='cuda:0')
episode: 575 training return: tensor(10.7514, device='cuda:0')
epoch: 144 test_true_pfm: 56.3440117457464 sim_pfm: 47.06560149692814
episode: 576 training return: tensor(13.8208, device='cuda:0')
episode: 577 training return: tensor(14.5522, device='cuda:0')
episode: 578 training return: tensor(10.5341, device='cuda:0')
episode: 579 training return: tensor(7.6351, device='cuda:0')
epoch: 145 test_true_pfm: 76.76834270493295 sim_pfm: 32.41370905041695
episode: 580 training return: tensor(28.8779, device='cuda:0')
episode: 581 training return: tensor(76.4730, device='cuda:0')
episode: 582 training return: tensor(9.0062, device='cuda:0')
episode: 583 training return: tensor(70.3328, device='cuda:0')
epoch: 146 test_true_pfm: 81.32516333012568 sim_pfm: 69.13687178877299
episode: 584 training return: tensor(84.4589, device='cuda:0')
episode: 585 training return: tensor(24.9849, device='cuda:0')
episode: 586 training return: tensor(15.7418, device='cuda:0')
episode: 587 training return: tensor(7.5575, device='cuda:0')
epoch: 147 test_true_pfm: 79.99699306173235 sim_pfm: 42.705204824038084
episode: 588 training return: tensor(15.4929, device='cuda:0')
episode: 589 training return: tensor(6.6333, device='cuda:0')
episode: 590 training return: tensor(78.4354, device='cuda:0')
episode: 591 training return: tensor(8.0741, device='cuda:0')
epoch: 148 test_true_pfm: 45.82994532131245 sim_pfm: 48.38314306617249
episode: 592 training return: tensor(11.0635, device='cuda:0')
episode: 593 training return: tensor(81.6580, device='cuda:0')
episode: 594 training return: tensor(35.0897, device='cuda:0')
episode: 595 training return: tensor(12.0571, device='cuda:0')
epoch: 149 test_true_pfm: 67.74342898938275 sim_pfm: 53.637994053994774
episode: 596 training return: tensor(7.7713, device='cuda:0')
episode: 597 training return: tensor(15.4193, device='cuda:0')
episode: 598 training return: tensor(18.6864, device='cuda:0')
episode: 599 training return: tensor(82.0033, device='cuda:0')
epoch: 150 test_true_pfm: 70.682308190642 sim_pfm: 17.568029291636776
