['--alg', 'ddpg', '--env', 'Swimmer-v3', '--learn', 'behavior', '--traj', 'mixed', '--seed', '3', '--data', '100000']
epoch: 0 training_loss 0.4252843448519707 test_loss: 0.36836421489715576
epoch: 1 training_loss 0.3658664011955261 test_loss: 0.3625674247741699
epoch: 2 training_loss 0.357976678609848 test_loss: 0.3637349605560303
epoch: 3 training_loss 0.3597300827503204 test_loss: 0.3492875576019287
epoch: 4 training_loss 0.3509281276166439 test_loss: 0.34473655223846433
epoch: 5 training_loss 0.34581473886966707 test_loss: 0.3688500165939331
epoch: 6 training_loss 0.3542099019885063 test_loss: 0.33472959995269774
epoch: 7 training_loss 0.3608826322853565 test_loss: 0.3533617973327637
epoch: 8 training_loss 0.34866417542099953 test_loss: 0.32776708602905275
epoch: 9 training_loss 0.35394405156373976 test_loss: 0.33439462184906005
epoch: 10 training_loss 0.33862977504730224 test_loss: 0.36277930736541747
epoch: 11 training_loss 0.35166654601693154 test_loss: 0.37359521389007566
epoch: 12 training_loss 0.342646696716547 test_loss: 0.3414407730102539
epoch: 13 training_loss 0.34997363820672034 test_loss: 0.3553671598434448
epoch: 14 training_loss 0.3464056624472141 test_loss: 0.36167895793914795
epoch: 15 training_loss 0.3527289283275604 test_loss: 0.33207197189331056
epoch: 16 training_loss 0.3381552918255329 test_loss: 0.33165907859802246
epoch: 17 training_loss 0.3449214971065521 test_loss: 0.35291547775268556
epoch: 18 training_loss 0.3599622245132923 test_loss: 0.361862325668335
epoch: 19 training_loss 0.34393673941493036 test_loss: 0.35285804271697996
epoch: 20 training_loss 0.34946780413389206 test_loss: 0.3283733129501343
epoch: 21 training_loss 0.3327475787699223 test_loss: 0.34541571140289307
epoch: 22 training_loss 0.34254868239164354 test_loss: 0.3602471828460693
epoch: 23 training_loss 0.3565276254713535 test_loss: 0.3449291229248047
epoch: 24 training_loss 0.32701320633292197 test_loss: 0.3406283617019653
epoch: 25 training_loss 0.3441534121334553 test_loss: 0.32626297473907473
epoch: 26 training_loss 0.34782919034361837 test_loss: 0.3299166440963745
epoch: 27 training_loss 0.3353027760982513 test_loss: 0.33266637325286863
epoch: 28 training_loss 0.34074456319212915 test_loss: 0.3459814786911011
epoch: 29 training_loss 0.34455925360322 test_loss: 0.3270870685577393
epoch: 30 training_loss 0.34691189020872115 test_loss: 0.33428709506988524
epoch: 31 training_loss 0.33760022729635236 test_loss: 0.3355281114578247
epoch: 32 training_loss 0.3397992114722729 test_loss: 0.33026633262634275
epoch: 33 training_loss 0.34511883080005645 test_loss: 0.31743035316467283
epoch: 34 training_loss 0.3376967312395573 test_loss: 0.3307061195373535
epoch: 35 training_loss 0.3416006322205067 test_loss: 0.36530139446258547
epoch: 36 training_loss 0.3421894608438015 test_loss: 0.32275819778442383
epoch: 37 training_loss 0.35159359067678453 test_loss: 0.3410004138946533
epoch: 38 training_loss 0.346271915435791 test_loss: 0.32812345027923584
epoch: 39 training_loss 0.3355370356142521 test_loss: 0.38079400062561036
epoch: 40 training_loss 0.343748332709074 test_loss: 0.36422927379608155
epoch: 41 training_loss 0.33486936032772063 test_loss: 0.33549511432647705
epoch: 42 training_loss 0.3448150691390037 test_loss: 0.3570509910583496
epoch: 43 training_loss 0.35056216090917586 test_loss: 0.3479938983917236
epoch: 44 training_loss 0.3488661909103394 test_loss: 0.35279335975646975
epoch: 45 training_loss 0.34064831241965293 test_loss: 0.35034162998199464
epoch: 46 training_loss 0.3450841073691845 test_loss: 0.327909517288208
epoch: 47 training_loss 0.3395467236638069 test_loss: 0.3357557773590088
epoch: 48 training_loss 0.34569488346576693 test_loss: 0.34713218212127683
epoch: 49 training_loss 0.34014188721776006 test_loss: 0.34145348072052
epoch: 50 training_loss 0.3455184781551361 test_loss: 0.34028873443603513
epoch: 51 training_loss 0.33194532737135884 test_loss: 0.3204691171646118
epoch: 52 training_loss 0.34144456043839455 test_loss: 0.3520695447921753
epoch: 53 training_loss 0.33019395291805265 test_loss: 0.3264918804168701
epoch: 54 training_loss 0.33389523133635524 test_loss: 0.3565932035446167
epoch: 55 training_loss 0.3428782878816128 test_loss: 0.34013543128967283
epoch: 56 training_loss 0.33992027446627615 test_loss: 0.3400708198547363
epoch: 57 training_loss 0.34912236616015435 test_loss: 0.31914045810699465
epoch: 58 training_loss 0.3369482883810997 test_loss: 0.3328801393508911
epoch: 59 training_loss 0.34660502299666407 test_loss: 0.35531415939331057
epoch: 60 training_loss 0.3456053678691387 test_loss: 0.349635910987854
epoch: 61 training_loss 0.33809885069727896 test_loss: 0.3447383642196655
epoch: 62 training_loss 0.33144376322627067 test_loss: 0.3350908041000366
epoch: 63 training_loss 0.34361683920025826 test_loss: 0.3600956439971924
epoch: 64 training_loss 0.3508143579959869 test_loss: 0.3604097843170166
epoch: 65 training_loss 0.3381592103838921 test_loss: 0.31705336570739745
epoch: 66 training_loss 0.33742183849215507 test_loss: 0.34178688526153567
epoch: 67 training_loss 0.3542679090797901 test_loss: 0.34686779975891113
epoch: 68 training_loss 0.3289695882797241 test_loss: 0.33563766479492185
epoch: 69 training_loss 0.3373495922982693 test_loss: 0.34260756969451905
epoch: 70 training_loss 0.33514650821685793 test_loss: 0.34536521434783934
epoch: 71 training_loss 0.337475089430809 test_loss: 0.3525285482406616
epoch: 72 training_loss 0.34265648558735845 test_loss: 0.31910195350646975
epoch: 73 training_loss 0.3355570510029793 test_loss: 0.35279502868652346
epoch: 74 training_loss 0.3457463659346104 test_loss: 0.3329662322998047
epoch: 75 training_loss 0.33346747800707816 test_loss: 0.3402512550354004
epoch: 76 training_loss 0.326153404712677 test_loss: 0.36875033378601074
epoch: 77 training_loss 0.34424247026443483 test_loss: 0.3479337453842163
epoch: 78 training_loss 0.332822322845459 test_loss: 0.3392373561859131
epoch: 79 training_loss 0.33080876782536506 test_loss: 0.37853245735168456
epoch: 80 training_loss 0.339748881906271 test_loss: 0.33866860866546633
epoch: 81 training_loss 0.34186867728829384 test_loss: 0.32676033973693847
epoch: 82 training_loss 0.33360626578330993 test_loss: 0.3463325023651123
epoch: 83 training_loss 0.34597934171557426 test_loss: 0.3705958366394043
epoch: 84 training_loss 0.3312102971971035 test_loss: 0.35797164440155027
epoch: 85 training_loss 0.3387706536054611 test_loss: 0.33538968563079835
epoch: 86 training_loss 0.3423323954641819 test_loss: 0.3411060333251953
epoch: 87 training_loss 0.3339860986173153 test_loss: 0.34016242027282717
epoch: 88 training_loss 0.3494576424360275 test_loss: 0.3355447292327881
epoch: 89 training_loss 0.33860215231776236 test_loss: 0.362563943862915
epoch: 90 training_loss 0.33609200954437257 test_loss: 0.3424073696136475
epoch: 91 training_loss 0.3313605825603008 test_loss: 0.37409396171569825
epoch: 92 training_loss 0.3277127739787102 test_loss: 0.3594141721725464
epoch: 93 training_loss 0.3455887249112129 test_loss: 0.33645775318145754
epoch: 94 training_loss 0.3281604717671871 test_loss: 0.34064695835113523
epoch: 95 training_loss 0.35141102835536003 test_loss: 0.3423896074295044
epoch: 96 training_loss 0.34157208815217016 test_loss: 0.3682973623275757
epoch: 97 training_loss 0.33814176321029665 test_loss: 0.33384218215942385
epoch: 98 training_loss 0.33971322476863863 test_loss: 0.34791295528411864
epoch: 99 training_loss 0.3321013367176056 test_loss: 0.34622290134429934
epoch: 100 training_loss 0.338727006316185 test_loss: 0.3330742359161377
epoch: 101 training_loss 0.3399770796298981 test_loss: 0.3461554527282715
epoch: 102 training_loss 0.3309091526269913 test_loss: 0.34660322666168214
epoch: 103 training_loss 0.3379117274284363 test_loss: 0.3369997501373291
epoch: 104 training_loss 0.33697028398513795 test_loss: 0.34524078369140626
epoch: 105 training_loss 0.3340773859620094 test_loss: 0.32040555477142335
epoch: 106 training_loss 0.34762688234448436 test_loss: 0.3257885456085205
epoch: 107 training_loss 0.33670873939991 test_loss: 0.33495142459869387
epoch: 108 training_loss 0.35429865181446074 test_loss: 0.32855184078216554
epoch: 109 training_loss 0.3383732271194458 test_loss: 0.32225985527038575
epoch: 110 training_loss 0.3332979699969292 test_loss: 0.33978471755981443
epoch: 111 training_loss 0.324732031673193 test_loss: 0.3196969985961914
epoch: 112 training_loss 0.34350074917078016 test_loss: 0.32355945110321044
epoch: 113 training_loss 0.3423358207941055 test_loss: 0.3413814306259155
epoch: 114 training_loss 0.3443423455953598 test_loss: 0.32641377449035647
epoch: 115 training_loss 0.34312918975949286 test_loss: 0.34145665168762207
epoch: 116 training_loss 0.3379118739068508 test_loss: 0.324877667427063
epoch: 117 training_loss 0.3372469726204872 test_loss: 0.34709579944610597
epoch: 118 training_loss 0.3507393641769886 test_loss: 0.34433004856109617
epoch: 119 training_loss 0.33358641594648364 test_loss: 0.3527805805206299
epoch: 120 training_loss 0.3409985402226448 test_loss: 0.32734107971191406
epoch: 121 training_loss 0.3440557016432285 test_loss: 0.3372344970703125
epoch: 122 training_loss 0.347921771556139 test_loss: 0.32224581241607664
epoch: 123 training_loss 0.33487277120351794 test_loss: 0.33823413848876954
epoch: 124 training_loss 0.338821564912796 test_loss: 0.3260146141052246
epoch: 125 training_loss 0.3361430318653584 test_loss: 0.3152944564819336
epoch: 126 training_loss 0.33953517988324167 test_loss: 0.345608115196228
epoch: 127 training_loss 0.3393045583367348 test_loss: 0.3421013593673706
epoch: 128 training_loss 0.3370456847548485 test_loss: 0.3376603603363037
epoch: 129 training_loss 0.33456678241491317 test_loss: 0.34030325412750245
epoch: 130 training_loss 0.3379008454084396 test_loss: 0.33109748363494873
epoch: 131 training_loss 0.3340116490423679 test_loss: 0.3369927883148193
epoch: 132 training_loss 0.34675565242767337 test_loss: 0.33755042552948
epoch: 133 training_loss 0.33641340225934985 test_loss: 0.33873763084411623
epoch: 134 training_loss 0.33059197425842285 test_loss: 0.3677999019622803
epoch: 135 training_loss 0.33458754912018773 test_loss: 0.3484314203262329
epoch: 136 training_loss 0.34153381898999213 test_loss: 0.33279659748077395
epoch: 137 training_loss 0.32962339743971825 test_loss: 0.34673757553100587
epoch: 138 training_loss 0.33283741131424904 test_loss: 0.355458664894104
epoch: 139 training_loss 0.33733764946460726 test_loss: 0.34999818801879884
epoch: 140 training_loss 0.33960827678442 test_loss: 0.32918219566345214
epoch: 141 training_loss 0.33843168035149573 test_loss: 0.311949348449707
epoch: 142 training_loss 0.33712659940123557 test_loss: 0.33767750263214114
epoch: 143 training_loss 0.34323383510112765 test_loss: 0.3468796968460083
epoch: 144 training_loss 0.3308602198958397 test_loss: 0.33130569458007814
epoch: 145 training_loss 0.3413284119963646 test_loss: 0.3446797847747803
epoch: 146 training_loss 0.3538183544576168 test_loss: 0.3247687339782715
epoch: 147 training_loss 0.33720522046089174 test_loss: 0.3460038185119629
epoch: 148 training_loss 0.3439983317255974 test_loss: 0.33151755332946775
epoch: 149 training_loss 0.3326835232973099 test_loss: 0.30686163902282715
epoch: 0 training_loss 38.607148017883304 test_loss: 26.895626831054688
epoch: 1 training_loss 23.136494846343993 test_loss: 19.045648193359376
epoch: 2 training_loss 17.944893732070923 test_loss: 16.090428161621094
epoch: 3 training_loss 15.195912828445435 test_loss: 14.273088073730468
epoch: 4 training_loss 13.75150951385498 test_loss: 13.146926879882812
epoch: 5 training_loss 12.792929649353027 test_loss: 12.396382904052734
epoch: 6 training_loss 11.94395977973938 test_loss: 11.660572814941407
epoch: 7 training_loss 11.506210584640503 test_loss: 10.779932403564453
epoch: 8 training_loss 10.877917966842652 test_loss: 10.792285919189453
epoch: 9 training_loss 10.34519338607788 test_loss: 10.006149291992188
epoch: 10 training_loss 9.949164910316467 test_loss: 9.283831024169922
epoch: 11 training_loss 9.7323113489151 test_loss: 9.242100524902344
epoch: 12 training_loss 9.288422498703003 test_loss: 8.916943359375
epoch: 13 training_loss 8.876506366729735 test_loss: 9.224069213867187
epoch: 14 training_loss 8.68135627269745 test_loss: 8.067874908447266
epoch: 15 training_loss 8.436460161209107 test_loss: 8.364041900634765
epoch: 16 training_loss 8.27345576286316 test_loss: 8.503367614746093
epoch: 17 training_loss 8.100930271148682 test_loss: 7.5252197265625
epoch: 18 training_loss 7.843561868667603 test_loss: 7.708840179443359
epoch: 19 training_loss 7.8496662044525145 test_loss: 8.006563568115235
epoch: 20 training_loss 7.706306266784668 test_loss: 7.403602600097656
epoch: 21 training_loss 7.634668889045716 test_loss: 7.30797119140625
epoch: 22 training_loss 7.2930124425888065 test_loss: 7.216192626953125
epoch: 23 training_loss 7.308718719482422 test_loss: 6.945173645019532
epoch: 24 training_loss 7.281709432601929 test_loss: 7.6145164489746096
epoch: 25 training_loss 6.986901912689209 test_loss: 6.782991027832031
epoch: 26 training_loss 6.842308683395386 test_loss: 6.631378936767578
epoch: 27 training_loss 6.888683080673218 test_loss: 6.982637023925781
epoch: 28 training_loss 6.855829825401306 test_loss: 6.670450592041016
epoch: 29 training_loss 6.787717189788818 test_loss: 6.767118835449219
epoch: 30 training_loss 6.661279664039612 test_loss: 6.341933059692383
epoch: 31 training_loss 6.721055693626404 test_loss: 6.426744079589843
epoch: 32 training_loss 6.451156005859375 test_loss: 6.535002899169922
epoch: 33 training_loss 6.402991070747375 test_loss: 6.533438110351563
epoch: 34 training_loss 6.326403069496155 test_loss: 6.2977252960205075
epoch: 35 training_loss 6.278849024772644 test_loss: 6.195801162719727
epoch: 36 training_loss 6.379378051757812 test_loss: 6.363280487060547
epoch: 37 training_loss 6.186793823242187 test_loss: 5.8517414093017575
epoch: 38 training_loss 6.074105491638184 test_loss: 6.636253356933594
epoch: 39 training_loss 6.158274993896485 test_loss: 5.653256225585937
epoch: 40 training_loss 5.956610894203186 test_loss: 7.2197418212890625
epoch: 41 training_loss 6.059329705238342 test_loss: 5.847588729858399
epoch: 42 training_loss 5.8110138463974 test_loss: 6.148403549194336
epoch: 43 training_loss 5.856286935806274 test_loss: 6.0871326446533205
epoch: 44 training_loss 5.88857795715332 test_loss: 5.759950256347656
epoch: 45 training_loss 5.80152774810791 test_loss: 5.588446044921875
epoch: 46 training_loss 5.5979185390472415 test_loss: 5.597760391235352
epoch: 47 training_loss 5.58266028881073 test_loss: 5.452510833740234
epoch: 48 training_loss 5.458082895278931 test_loss: 5.5116722106933596
epoch: 49 training_loss 5.494292187690735 test_loss: 5.730478286743164
epoch: 50 training_loss 5.522828741073608 test_loss: 5.089915084838867
epoch: 51 training_loss 5.325989456176758 test_loss: 5.2206871032714846
epoch: 52 training_loss 5.28428765296936 test_loss: 5.349409866333008
epoch: 53 training_loss 5.235442771911621 test_loss: 5.182517242431641
epoch: 54 training_loss 5.21242978811264 test_loss: 5.258306884765625
epoch: 55 training_loss 5.261433138847351 test_loss: 5.509527206420898
epoch: 56 training_loss 5.2198007297515865 test_loss: 5.038543701171875
epoch: 57 training_loss 5.229786610603332 test_loss: 4.95750732421875
epoch: 58 training_loss 5.010671253204346 test_loss: 5.022152328491211
epoch: 59 training_loss 4.970290949344635 test_loss: 5.254024505615234
epoch: 60 training_loss 5.083497931957245 test_loss: 4.734841156005859
epoch: 61 training_loss 4.903882575035095 test_loss: 5.0367889404296875
epoch: 62 training_loss 4.916903626918793 test_loss: 4.844855880737304
epoch: 63 training_loss 4.748461298942566 test_loss: 4.629258346557617
epoch: 64 training_loss 4.6858665657043455 test_loss: 4.856295013427735
epoch: 65 training_loss 4.869593024253845 test_loss: 4.881405258178711
epoch: 66 training_loss 4.6877227306365965 test_loss: 4.653428649902343
epoch: 67 training_loss 4.613934898376465 test_loss: 4.592413711547851
epoch: 68 training_loss 4.691535942554474 test_loss: 4.743508148193359
epoch: 69 training_loss 4.521990728378296 test_loss: 4.482831954956055
epoch: 70 training_loss 4.58240387916565 test_loss: 4.465808868408203
epoch: 71 training_loss 4.482280969619751 test_loss: 4.7456615447998045
epoch: 72 training_loss 4.496759448051453 test_loss: 4.28752555847168
epoch: 73 training_loss 4.4492739486694335 test_loss: 4.431479644775391
epoch: 74 training_loss 4.350675675868988 test_loss: 4.216716766357422
epoch: 75 training_loss 4.310108742713928 test_loss: 4.218094253540039
epoch: 76 training_loss 4.425994515419006 test_loss: 4.132083511352539
epoch: 77 training_loss 4.34545147895813 test_loss: 4.507569885253906
epoch: 78 training_loss 4.507605891227723 test_loss: 4.393276977539062
epoch: 79 training_loss 4.2874700284004215 test_loss: 4.332938385009766
epoch: 80 training_loss 4.330706822872162 test_loss: 3.9971351623535156
epoch: 81 training_loss 4.255483818054199 test_loss: 4.063792419433594
epoch: 82 training_loss 4.302892436981201 test_loss: 4.317245864868164
epoch: 83 training_loss 4.37758668422699 test_loss: 4.314862442016602
epoch: 84 training_loss 4.187139227390289 test_loss: 4.132760620117187
epoch: 85 training_loss 4.1685769295692445 test_loss: 4.261432266235351
epoch: 86 training_loss 4.17622748374939 test_loss: 4.6374671936035154
epoch: 87 training_loss 4.156569571495056 test_loss: 4.239605331420899
epoch: 88 training_loss 4.153704986572266 test_loss: 4.234287261962891
epoch: 89 training_loss 4.058943405151367 test_loss: 4.124850463867188
epoch: 90 training_loss 4.078584280014038 test_loss: 3.9395366668701173
epoch: 91 training_loss 4.104933233261108 test_loss: 3.945404815673828
epoch: 92 training_loss 4.184003400802612 test_loss: 4.076739120483398
epoch: 93 training_loss 4.1650254034996035 test_loss: 4.2903602600097654
epoch: 94 training_loss 4.093315448760986 test_loss: 4.6996101379394535
epoch: 95 training_loss 4.046844549179077 test_loss: 3.8201393127441405
epoch: 96 training_loss 4.168863747119904 test_loss: 4.032389068603516
epoch: 97 training_loss 3.9167744541168212 test_loss: 4.191689300537109
epoch: 98 training_loss 4.073733012676239 test_loss: 4.098049163818359
epoch: 99 training_loss 4.029362094402313 test_loss: 3.990131378173828
epoch: 100 training_loss 3.9390301513671875 test_loss: 4.000100708007812
epoch: 101 training_loss 3.928868176937103 test_loss: 3.991617202758789
epoch: 102 training_loss 3.8670136761665344 test_loss: 3.86242561340332
epoch: 103 training_loss 3.992271385192871 test_loss: 3.7401912689208983
epoch: 104 training_loss 4.011842277050018 test_loss: 3.9016456604003906
epoch: 105 training_loss 3.884391305446625 test_loss: 4.113210678100586
epoch: 106 training_loss 3.929801092147827 test_loss: 4.032782745361328
epoch: 107 training_loss 4.005863320827484 test_loss: 4.034066772460937
epoch: 108 training_loss 3.91131196975708 test_loss: 3.749933624267578
epoch: 109 training_loss 3.953895106315613 test_loss: 3.7514663696289063
epoch: 110 training_loss 3.8625215220451357 test_loss: 3.8136470794677733
epoch: 111 training_loss 3.914349615573883 test_loss: 3.6142658233642577
epoch: 112 training_loss 3.8856848812103273 test_loss: 3.909725570678711
epoch: 113 training_loss 3.7424050831794737 test_loss: 4.497226715087891
epoch: 114 training_loss 3.7661646795272827 test_loss: 3.719778823852539
epoch: 115 training_loss 3.824994761943817 test_loss: 3.817835235595703
epoch: 116 training_loss 3.7965541791915896 test_loss: 3.7429603576660155
epoch: 117 training_loss 3.949046175479889 test_loss: 3.7325252532958983
epoch: 118 training_loss 3.889244010448456 test_loss: 3.717559051513672
epoch: 119 training_loss 3.7462757658958434 test_loss: 3.7059131622314454
epoch: 120 training_loss 3.7868637585639955 test_loss: 3.909706878662109
epoch: 121 training_loss 3.7277024364471436 test_loss: 4.036980819702149
epoch: 122 training_loss 3.6971519112586977 test_loss: 3.609088134765625
epoch: 123 training_loss 3.729746069908142 test_loss: 3.615121841430664
epoch: 124 training_loss 3.674042444229126 test_loss: 3.5333171844482423
epoch: 125 training_loss 3.6780234742164613 test_loss: 3.53863639831543
epoch: 126 training_loss 3.5617309403419495 test_loss: 3.619879150390625
epoch: 127 training_loss 3.6913312816619874 test_loss: 3.499653625488281
epoch: 128 training_loss 3.6735617661476136 test_loss: 3.5855266571044924
epoch: 129 training_loss 3.696379566192627 test_loss: 3.6767601013183593
epoch: 130 training_loss 3.6481211614608764 test_loss: 4.310959243774414
epoch: 131 training_loss 3.7219018864631654 test_loss: 3.8375545501708985
epoch: 132 training_loss 3.767951023578644 test_loss: 3.7002941131591798
epoch: 133 training_loss 3.486016936302185 test_loss: 3.6300357818603515
epoch: 134 training_loss 3.5922987413406373 test_loss: 3.7276050567626955
epoch: 135 training_loss 3.5618017172813414 test_loss: 3.519232177734375
epoch: 136 training_loss 3.7386935567855835 test_loss: 3.5804763793945313
epoch: 137 training_loss 3.6213695764541627 test_loss: 3.6509654998779295
epoch: 138 training_loss 3.6700484490394594 test_loss: 3.603382110595703
epoch: 139 training_loss 3.5174719762802122 test_loss: 3.464778518676758
epoch: 140 training_loss 3.583700020313263 test_loss: 3.665749740600586
epoch: 141 training_loss 3.552012951374054 test_loss: 3.495546340942383
epoch: 142 training_loss 3.524077181816101 test_loss: 3.3732219696044923
epoch: 143 training_loss 3.495715446472168 test_loss: 3.433202362060547
epoch: 144 training_loss 3.591309790611267 test_loss: 3.4241119384765626
epoch: 145 training_loss 3.5300902700424195 test_loss: 3.8181583404541017
epoch: 146 training_loss 3.602526698112488 test_loss: 3.604317855834961
epoch: 147 training_loss 3.54540785074234 test_loss: 3.3608680725097657
epoch: 148 training_loss 3.472708384990692 test_loss: 3.5310733795166014
epoch: 149 training_loss 3.739555928707123 test_loss: 3.5677330017089846
62.433686946360865
episode: 0 training return: tensor(15.9247, device='cuda:0')
episode: 1 training return: tensor(28.9905, device='cuda:0')
episode: 2 training return: tensor(19.3507, device='cuda:0')
episode: 3 training return: tensor(15.5770, device='cuda:0')
epoch: 1 test_true_pfm: 95.7902884490018 sim_pfm: 35.05171358918888
episode: 4 training return: tensor(16.7351, device='cuda:0')
episode: 5 training return: tensor(35.8109, device='cuda:0')
episode: 6 training return: tensor(16.6376, device='cuda:0')
episode: 7 training return: tensor(42.8009, device='cuda:0')
epoch: 2 test_true_pfm: 87.38002697202145 sim_pfm: 47.52985176774091
episode: 8 training return: tensor(35.1852, device='cuda:0')
episode: 9 training return: tensor(15.5658, device='cuda:0')
episode: 10 training return: tensor(36.8060, device='cuda:0')
episode: 11 training return: tensor(54.4504, device='cuda:0')
epoch: 3 test_true_pfm: 63.879851396765275 sim_pfm: 25.72970853410079
episode: 12 training return: tensor(66.0638, device='cuda:0')
episode: 13 training return: tensor(48.1957, device='cuda:0')
episode: 14 training return: tensor(19.4946, device='cuda:0')
episode: 15 training return: tensor(48.6081, device='cuda:0')
epoch: 4 test_true_pfm: 88.7055875831193 sim_pfm: 54.56071995641687
episode: 16 training return: tensor(79.5389, device='cuda:0')
episode: 17 training return: tensor(44.9366, device='cuda:0')
episode: 18 training return: tensor(51.3112, device='cuda:0')
episode: 19 training return: tensor(55.4635, device='cuda:0')
epoch: 5 test_true_pfm: 64.94379360924196 sim_pfm: 49.99927832567482
episode: 20 training return: tensor(14.7423, device='cuda:0')
episode: 21 training return: tensor(82.0130, device='cuda:0')
episode: 22 training return: tensor(61.9858, device='cuda:0')
episode: 23 training return: tensor(16.3332, device='cuda:0')
epoch: 6 test_true_pfm: 78.64292763087731 sim_pfm: 31.079930775013054
episode: 24 training return: tensor(16.6540, device='cuda:0')
episode: 25 training return: tensor(59.3963, device='cuda:0')
episode: 26 training return: tensor(76.5124, device='cuda:0')
episode: 27 training return: tensor(65.0681, device='cuda:0')
epoch: 7 test_true_pfm: 87.50145959301938 sim_pfm: 26.165133916027845
episode: 28 training return: tensor(55.2369, device='cuda:0')
episode: 29 training return: tensor(20.1559, device='cuda:0')
episode: 30 training return: tensor(53.9344, device='cuda:0')
episode: 31 training return: tensor(55.4507, device='cuda:0')
epoch: 8 test_true_pfm: 88.18001266380762 sim_pfm: 36.43195848671603
episode: 32 training return: tensor(59.2677, device='cuda:0')
episode: 33 training return: tensor(75.7134, device='cuda:0')
episode: 34 training return: tensor(17.0478, device='cuda:0')
episode: 35 training return: tensor(86.0892, device='cuda:0')
epoch: 9 test_true_pfm: 105.86741085922259 sim_pfm: 31.39833879370708
episode: 36 training return: tensor(89.7177, device='cuda:0')
episode: 37 training return: tensor(85.0884, device='cuda:0')
episode: 38 training return: tensor(83.4420, device='cuda:0')
episode: 39 training return: tensor(79.6631, device='cuda:0')
epoch: 10 test_true_pfm: 104.46317583002683 sim_pfm: 58.126432233373635
episode: 40 training return: tensor(83.5077, device='cuda:0')
episode: 41 training return: tensor(69.3517, device='cuda:0')
episode: 42 training return: tensor(59.3188, device='cuda:0')
episode: 43 training return: tensor(51.9825, device='cuda:0')
epoch: 11 test_true_pfm: 79.48344032541718 sim_pfm: 59.01466132903006
episode: 44 training return: tensor(22.0663, device='cuda:0')
episode: 45 training return: tensor(17.0309, device='cuda:0')
episode: 46 training return: tensor(20.9535, device='cuda:0')
episode: 47 training return: tensor(81.0085, device='cuda:0')
epoch: 12 test_true_pfm: 87.91278498771543 sim_pfm: 65.8657975454582
episode: 48 training return: tensor(22.8492, device='cuda:0')
episode: 49 training return: tensor(56.9648, device='cuda:0')
episode: 50 training return: tensor(12.2084, device='cuda:0')
episode: 51 training return: tensor(56.9241, device='cuda:0')
epoch: 13 test_true_pfm: 88.2219438614411 sim_pfm: 51.0153531035583
episode: 52 training return: tensor(29.7617, device='cuda:0')
episode: 53 training return: tensor(18.2607, device='cuda:0')
episode: 54 training return: tensor(52.1129, device='cuda:0')
episode: 55 training return: tensor(87.1685, device='cuda:0')
epoch: 14 test_true_pfm: 65.5636688678042 sim_pfm: 34.93429384719347
episode: 56 training return: tensor(12.4461, device='cuda:0')
episode: 57 training return: tensor(77.3809, device='cuda:0')
episode: 58 training return: tensor(16.4781, device='cuda:0')
episode: 59 training return: tensor(83.0281, device='cuda:0')
epoch: 15 test_true_pfm: 109.9703137676197 sim_pfm: 63.686856731807346
episode: 60 training return: tensor(55.2039, device='cuda:0')
episode: 61 training return: tensor(13.6824, device='cuda:0')
episode: 62 training return: tensor(60.1838, device='cuda:0')
episode: 63 training return: tensor(19.3398, device='cuda:0')
epoch: 16 test_true_pfm: 83.97017680826953 sim_pfm: 53.69971557332319
episode: 64 training return: tensor(51.9197, device='cuda:0')
episode: 65 training return: tensor(56.3627, device='cuda:0')
episode: 66 training return: tensor(56.8615, device='cuda:0')
episode: 67 training return: tensor(16.6086, device='cuda:0')
epoch: 17 test_true_pfm: 94.7510429709555 sim_pfm: 61.41550314365304
episode: 68 training return: tensor(78.5405, device='cuda:0')
episode: 69 training return: tensor(81.3437, device='cuda:0')
episode: 70 training return: tensor(81.8600, device='cuda:0')
episode: 71 training return: tensor(47.0072, device='cuda:0')
epoch: 18 test_true_pfm: 87.2567905428743 sim_pfm: 31.529443334037204
episode: 72 training return: tensor(78.1507, device='cuda:0')
episode: 73 training return: tensor(13.2995, device='cuda:0')
episode: 74 training return: tensor(86.5962, device='cuda:0')
episode: 75 training return: tensor(86.8630, device='cuda:0')
epoch: 19 test_true_pfm: 89.85630242543793 sim_pfm: 34.153216064424484
episode: 76 training return: tensor(38.7159, device='cuda:0')
episode: 77 training return: tensor(88.5701, device='cuda:0')
episode: 78 training return: tensor(19.2894, device='cuda:0')
episode: 79 training return: tensor(88.7930, device='cuda:0')
epoch: 20 test_true_pfm: 107.19551102895142 sim_pfm: 57.310375209437915
episode: 80 training return: tensor(89.5056, device='cuda:0')
episode: 81 training return: tensor(78.6249, device='cuda:0')
episode: 82 training return: tensor(90.3240, device='cuda:0')
episode: 83 training return: tensor(58.2279, device='cuda:0')
epoch: 21 test_true_pfm: 110.52220779673966 sim_pfm: 57.07456318846671
episode: 84 training return: tensor(83.4893, device='cuda:0')
episode: 85 training return: tensor(89.6994, device='cuda:0')
episode: 86 training return: tensor(85.0636, device='cuda:0')
episode: 87 training return: tensor(21.9456, device='cuda:0')
epoch: 22 test_true_pfm: 111.02289043752698 sim_pfm: 63.331272713432554
episode: 88 training return: tensor(20.1755, device='cuda:0')
episode: 89 training return: tensor(73.3230, device='cuda:0')
episode: 90 training return: tensor(31.1169, device='cuda:0')
episode: 91 training return: tensor(84.1510, device='cuda:0')
epoch: 23 test_true_pfm: 107.44320074409147 sim_pfm: 90.21926402477547
episode: 92 training return: tensor(92.9709, device='cuda:0')
episode: 93 training return: tensor(92.4306, device='cuda:0')
episode: 94 training return: tensor(79.1613, device='cuda:0')
episode: 95 training return: tensor(84.9144, device='cuda:0')
epoch: 24 test_true_pfm: 115.63970190959515 sim_pfm: 60.75373770071892
episode: 96 training return: tensor(11.6117, device='cuda:0')
episode: 97 training return: tensor(85.2760, device='cuda:0')
episode: 98 training return: tensor(86.8898, device='cuda:0')
episode: 99 training return: tensor(96.3696, device='cuda:0')
epoch: 25 test_true_pfm: 130.0801585750856 sim_pfm: 74.7551231757272
episode: 100 training return: tensor(78.7542, device='cuda:0')
episode: 101 training return: tensor(80.6731, device='cuda:0')
episode: 102 training return: tensor(89.3659, device='cuda:0')
episode: 103 training return: tensor(89.5615, device='cuda:0')
epoch: 26 test_true_pfm: 107.0654228033571 sim_pfm: 69.49596656310023
episode: 104 training return: tensor(80.7480, device='cuda:0')
episode: 105 training return: tensor(73.2983, device='cuda:0')
episode: 106 training return: tensor(77.2395, device='cuda:0')
episode: 107 training return: tensor(76.1431, device='cuda:0')
epoch: 27 test_true_pfm: 105.31196351402616 sim_pfm: 45.525125397124796
episode: 108 training return: tensor(60.9496, device='cuda:0')
episode: 109 training return: tensor(93.8717, device='cuda:0')
episode: 110 training return: tensor(80.5950, device='cuda:0')
episode: 111 training return: tensor(86.7844, device='cuda:0')
epoch: 28 test_true_pfm: 89.06676723915973 sim_pfm: 66.25307122928207
episode: 112 training return: tensor(78.3758, device='cuda:0')
episode: 113 training return: tensor(76.8614, device='cuda:0')
episode: 114 training return: tensor(92.3659, device='cuda:0')
episode: 115 training return: tensor(22.8033, device='cuda:0')
epoch: 29 test_true_pfm: 125.32287507239982 sim_pfm: 73.600657090015
episode: 116 training return: tensor(93.2621, device='cuda:0')
episode: 117 training return: tensor(40.0265, device='cuda:0')
episode: 118 training return: tensor(90.5959, device='cuda:0')
episode: 119 training return: tensor(88.1841, device='cuda:0')
epoch: 30 test_true_pfm: 92.04880567366331 sim_pfm: 53.58883580849506
episode: 120 training return: tensor(86.0831, device='cuda:0')
episode: 121 training return: tensor(81.7791, device='cuda:0')
episode: 122 training return: tensor(69.3185, device='cuda:0')
episode: 123 training return: tensor(88.1020, device='cuda:0')
epoch: 31 test_true_pfm: 98.05509805290157 sim_pfm: 87.1806506697263
episode: 124 training return: tensor(71.3608, device='cuda:0')
episode: 125 training return: tensor(88.2053, device='cuda:0')
episode: 126 training return: tensor(90.4868, device='cuda:0')
episode: 127 training return: tensor(80.1123, device='cuda:0')
epoch: 32 test_true_pfm: 91.16643107791175 sim_pfm: 82.82607495121775
episode: 128 training return: tensor(83.1943, device='cuda:0')
episode: 129 training return: tensor(18.0299, device='cuda:0')
episode: 130 training return: tensor(21.3278, device='cuda:0')
episode: 131 training return: tensor(87.4834, device='cuda:0')
epoch: 33 test_true_pfm: 119.05872427521963 sim_pfm: 80.21894795124535
episode: 132 training return: tensor(69.2677, device='cuda:0')
episode: 133 training return: tensor(32.0713, device='cuda:0')
episode: 134 training return: tensor(83.9458, device='cuda:0')
episode: 135 training return: tensor(81.8984, device='cuda:0')
epoch: 34 test_true_pfm: 94.73861433800292 sim_pfm: 54.715598928439434
episode: 136 training return: tensor(82.7429, device='cuda:0')
episode: 137 training return: tensor(70.1611, device='cuda:0')
episode: 138 training return: tensor(14.7490, device='cuda:0')
episode: 139 training return: tensor(85.3494, device='cuda:0')
epoch: 35 test_true_pfm: 108.11102069493674 sim_pfm: 81.37112457210314
episode: 140 training return: tensor(4.4762, device='cuda:0')
episode: 141 training return: tensor(9.3165, device='cuda:0')
episode: 142 training return: tensor(88.3033, device='cuda:0')
episode: 143 training return: tensor(79.4695, device='cuda:0')
epoch: 36 test_true_pfm: 110.8319661525945 sim_pfm: 70.78366890146863
episode: 144 training return: tensor(87.0987, device='cuda:0')
episode: 145 training return: tensor(66.7456, device='cuda:0')
episode: 146 training return: tensor(86.5687, device='cuda:0')
episode: 147 training return: tensor(91.7235, device='cuda:0')
epoch: 37 test_true_pfm: 114.88812012686512 sim_pfm: 63.000691870279844
episode: 148 training return: tensor(75.4535, device='cuda:0')
episode: 149 training return: tensor(88.4770, device='cuda:0')
episode: 150 training return: tensor(10.8792, device='cuda:0')
episode: 151 training return: tensor(90.8260, device='cuda:0')
epoch: 38 test_true_pfm: 102.62812343169401 sim_pfm: 69.65001567802392
episode: 152 training return: tensor(54.5988, device='cuda:0')
episode: 153 training return: tensor(81.3906, device='cuda:0')
episode: 154 training return: tensor(71.8363, device='cuda:0')
episode: 155 training return: tensor(34.0204, device='cuda:0')
epoch: 39 test_true_pfm: 88.89911228613815 sim_pfm: 38.12451054201229
episode: 156 training return: tensor(67.0809, device='cuda:0')
episode: 157 training return: tensor(77.9328, device='cuda:0')
episode: 158 training return: tensor(35.0308, device='cuda:0')
episode: 159 training return: tensor(6.0596, device='cuda:0')
epoch: 40 test_true_pfm: 125.17402451320424 sim_pfm: 77.73902541860589
episode: 160 training return: tensor(19.6178, device='cuda:0')
episode: 161 training return: tensor(69.8709, device='cuda:0')
episode: 162 training return: tensor(84.7068, device='cuda:0')
episode: 163 training return: tensor(34.8565, device='cuda:0')
epoch: 41 test_true_pfm: 125.82450909120917 sim_pfm: 46.16666753363097
episode: 164 training return: tensor(92.7967, device='cuda:0')
episode: 165 training return: tensor(76.3690, device='cuda:0')
episode: 166 training return: tensor(24.0053, device='cuda:0')
episode: 167 training return: tensor(78.1677, device='cuda:0')
epoch: 42 test_true_pfm: 122.81352254716435 sim_pfm: 69.44214255241096
episode: 168 training return: tensor(92.1644, device='cuda:0')
episode: 169 training return: tensor(81.5331, device='cuda:0')
episode: 170 training return: tensor(92.2431, device='cuda:0')
episode: 171 training return: tensor(12.5425, device='cuda:0')
epoch: 43 test_true_pfm: 82.12127992401291 sim_pfm: 47.55082073616795
episode: 172 training return: tensor(45.2127, device='cuda:0')
episode: 173 training return: tensor(76.0992, device='cuda:0')
episode: 174 training return: tensor(87.6963, device='cuda:0')
episode: 175 training return: tensor(9.7527, device='cuda:0')
epoch: 44 test_true_pfm: 103.88308731913632 sim_pfm: 87.13398330159835
episode: 176 training return: tensor(91.1391, device='cuda:0')
episode: 177 training return: tensor(88.6030, device='cuda:0')
episode: 178 training return: tensor(8.4028, device='cuda:0')
episode: 179 training return: tensor(90.8905, device='cuda:0')
epoch: 45 test_true_pfm: 123.17833878868828 sim_pfm: 60.24906683236477
episode: 180 training return: tensor(85.4028, device='cuda:0')
episode: 181 training return: tensor(36.8819, device='cuda:0')
episode: 182 training return: tensor(78.4322, device='cuda:0')
episode: 183 training return: tensor(82.8561, device='cuda:0')
epoch: 46 test_true_pfm: 74.60737072502822 sim_pfm: 58.10911907959962
episode: 184 training return: tensor(83.9709, device='cuda:0')
episode: 185 training return: tensor(85.0687, device='cuda:0')
episode: 186 training return: tensor(88.7405, device='cuda:0')
episode: 187 training return: tensor(86.6512, device='cuda:0')
epoch: 47 test_true_pfm: 121.46276000863436 sim_pfm: 81.7851580109098
episode: 188 training return: tensor(67.0541, device='cuda:0')
episode: 189 training return: tensor(86.6756, device='cuda:0')
episode: 190 training return: tensor(90.9865, device='cuda:0')
episode: 191 training return: tensor(85.4555, device='cuda:0')
epoch: 48 test_true_pfm: 61.68104481940723 sim_pfm: 48.155751882173355
episode: 192 training return: tensor(36.9571, device='cuda:0')
episode: 193 training return: tensor(21.4717, device='cuda:0')
episode: 194 training return: tensor(89.4588, device='cuda:0')
episode: 195 training return: tensor(87.5499, device='cuda:0')
epoch: 49 test_true_pfm: 124.84080707722819 sim_pfm: 78.20422966857441
episode: 196 training return: tensor(90.0395, device='cuda:0')
episode: 197 training return: tensor(13.4632, device='cuda:0')
episode: 198 training return: tensor(77.3638, device='cuda:0')
episode: 199 training return: tensor(21.5294, device='cuda:0')
epoch: 50 test_true_pfm: 121.79667648783133 sim_pfm: 67.99395634000538
episode: 200 training return: tensor(81.8700, device='cuda:0')
episode: 201 training return: tensor(89.8426, device='cuda:0')
episode: 202 training return: tensor(86.3365, device='cuda:0')
episode: 203 training return: tensor(83.2431, device='cuda:0')
epoch: 51 test_true_pfm: 95.4260849750893 sim_pfm: 47.62067160430597
episode: 204 training return: tensor(18.3671, device='cuda:0')
episode: 205 training return: tensor(39.2447, device='cuda:0')
episode: 206 training return: tensor(37.3233, device='cuda:0')
episode: 207 training return: tensor(89.3525, device='cuda:0')
epoch: 52 test_true_pfm: 113.08110976904698 sim_pfm: 88.38606211089063
episode: 208 training return: tensor(67.1920, device='cuda:0')
episode: 209 training return: tensor(12.9474, device='cuda:0')
episode: 210 training return: tensor(87.7290, device='cuda:0')
episode: 211 training return: tensor(67.9948, device='cuda:0')
epoch: 53 test_true_pfm: 114.61971219700322 sim_pfm: 71.93339575738064
episode: 212 training return: tensor(82.6623, device='cuda:0')
episode: 213 training return: tensor(82.7637, device='cuda:0')
episode: 214 training return: tensor(84.4676, device='cuda:0')
episode: 215 training return: tensor(41.7751, device='cuda:0')
epoch: 54 test_true_pfm: 123.28695359940816 sim_pfm: 77.59745451055933
episode: 216 training return: tensor(75.3482, device='cuda:0')
episode: 217 training return: tensor(42.5949, device='cuda:0')
episode: 218 training return: tensor(93.7311, device='cuda:0')
episode: 219 training return: tensor(86.3430, device='cuda:0')
epoch: 55 test_true_pfm: 112.87613552088251 sim_pfm: 87.79804469360388
episode: 220 training return: tensor(84.2682, device='cuda:0')
episode: 221 training return: tensor(82.4260, device='cuda:0')
episode: 222 training return: tensor(93.5942, device='cuda:0')
episode: 223 training return: tensor(90.2628, device='cuda:0')
epoch: 56 test_true_pfm: 96.43387097359127 sim_pfm: 82.23269454996917
episode: 224 training return: tensor(89.6836, device='cuda:0')
episode: 225 training return: tensor(76.8664, device='cuda:0')
episode: 226 training return: tensor(84.3768, device='cuda:0')
episode: 227 training return: tensor(90.4371, device='cuda:0')
epoch: 57 test_true_pfm: 99.46672664464151 sim_pfm: 73.02493183282786
episode: 228 training return: tensor(81.2999, device='cuda:0')
episode: 229 training return: tensor(11.4087, device='cuda:0')
episode: 230 training return: tensor(75.1492, device='cuda:0')
episode: 231 training return: tensor(98.6535, device='cuda:0')
epoch: 58 test_true_pfm: 81.40490539089042 sim_pfm: 85.45619766738965
episode: 232 training return: tensor(65.0435, device='cuda:0')
episode: 233 training return: tensor(39.0456, device='cuda:0')
episode: 234 training return: tensor(78.8176, device='cuda:0')
episode: 235 training return: tensor(91.4480, device='cuda:0')
epoch: 59 test_true_pfm: 113.17013206620882 sim_pfm: 43.3229913607589
episode: 236 training return: tensor(76.1963, device='cuda:0')
episode: 237 training return: tensor(87.0679, device='cuda:0')
episode: 238 training return: tensor(89.0065, device='cuda:0')
episode: 239 training return: tensor(81.2814, device='cuda:0')
epoch: 60 test_true_pfm: 129.0021499317562 sim_pfm: 85.6252364459273
episode: 240 training return: tensor(86.7205, device='cuda:0')
episode: 241 training return: tensor(88.5170, device='cuda:0')
episode: 242 training return: tensor(86.0822, device='cuda:0')
episode: 243 training return: tensor(91.0253, device='cuda:0')
epoch: 61 test_true_pfm: 110.3605784599655 sim_pfm: 61.490633096173404
episode: 244 training return: tensor(15.0774, device='cuda:0')
episode: 245 training return: tensor(88.5383, device='cuda:0')
episode: 246 training return: tensor(87.6760, device='cuda:0')
episode: 247 training return: tensor(82.1364, device='cuda:0')
epoch: 62 test_true_pfm: 120.33251307967994 sim_pfm: 79.55836652324652
episode: 248 training return: tensor(16.2496, device='cuda:0')
episode: 249 training return: tensor(69.3725, device='cuda:0')
episode: 250 training return: tensor(87.8662, device='cuda:0')
episode: 251 training return: tensor(87.7198, device='cuda:0')
epoch: 63 test_true_pfm: 115.97735921308683 sim_pfm: 74.43722474579117
episode: 252 training return: tensor(83.7465, device='cuda:0')
episode: 253 training return: tensor(93.0887, device='cuda:0')
episode: 254 training return: tensor(11.0933, device='cuda:0')
episode: 255 training return: tensor(93.2575, device='cuda:0')
epoch: 64 test_true_pfm: 125.99350564196796 sim_pfm: 82.70804928733851
episode: 256 training return: tensor(75.5578, device='cuda:0')
episode: 257 training return: tensor(11.7937, device='cuda:0')
episode: 258 training return: tensor(93.1608, device='cuda:0')
episode: 259 training return: tensor(88.7162, device='cuda:0')
epoch: 65 test_true_pfm: 106.44158243963125 sim_pfm: 72.06184748939123
episode: 260 training return: tensor(90.2421, device='cuda:0')
episode: 261 training return: tensor(90.5010, device='cuda:0')
episode: 262 training return: tensor(86.3924, device='cuda:0')
episode: 263 training return: tensor(75.6969, device='cuda:0')
epoch: 66 test_true_pfm: 123.51771580208938 sim_pfm: 58.06073617398506
episode: 264 training return: tensor(86.2400, device='cuda:0')
episode: 265 training return: tensor(71.9849, device='cuda:0')
episode: 266 training return: tensor(43.6584, device='cuda:0')
episode: 267 training return: tensor(18.1764, device='cuda:0')
epoch: 67 test_true_pfm: 123.95225986051989 sim_pfm: 45.07004626126145
episode: 268 training return: tensor(87.1062, device='cuda:0')
episode: 269 training return: tensor(88.2061, device='cuda:0')
episode: 270 training return: tensor(85.5404, device='cuda:0')
episode: 271 training return: tensor(82.5083, device='cuda:0')
epoch: 68 test_true_pfm: 120.50105457723745 sim_pfm: 77.2017207116296
episode: 272 training return: tensor(87.9141, device='cuda:0')
episode: 273 training return: tensor(86.1283, device='cuda:0')
episode: 274 training return: tensor(19.6185, device='cuda:0')
episode: 275 training return: tensor(16.4085, device='cuda:0')
epoch: 69 test_true_pfm: 123.12587936563118 sim_pfm: 89.45984552570735
episode: 276 training return: tensor(87.8625, device='cuda:0')
episode: 277 training return: tensor(79.4341, device='cuda:0')
episode: 278 training return: tensor(21.8797, device='cuda:0')
episode: 279 training return: tensor(89.2612, device='cuda:0')
epoch: 70 test_true_pfm: 123.28602895746398 sim_pfm: 86.16326508210622
episode: 280 training return: tensor(81.1514, device='cuda:0')
episode: 281 training return: tensor(84.2613, device='cuda:0')
episode: 282 training return: tensor(85.7559, device='cuda:0')
episode: 283 training return: tensor(85.8259, device='cuda:0')
epoch: 71 test_true_pfm: 98.57985418148397 sim_pfm: 44.78752519323025
episode: 284 training return: tensor(92.8004, device='cuda:0')
episode: 285 training return: tensor(79.6222, device='cuda:0')
episode: 286 training return: tensor(94.1635, device='cuda:0')
episode: 287 training return: tensor(92.1333, device='cuda:0')
epoch: 72 test_true_pfm: 126.09054785596531 sim_pfm: 90.38422332488699
episode: 288 training return: tensor(83.2199, device='cuda:0')
episode: 289 training return: tensor(94.7158, device='cuda:0')
episode: 290 training return: tensor(80.7018, device='cuda:0')
episode: 291 training return: tensor(83.5942, device='cuda:0')
epoch: 73 test_true_pfm: 128.32159618711856 sim_pfm: 72.85260514910333
episode: 292 training return: tensor(79.1884, device='cuda:0')
episode: 293 training return: tensor(89.3564, device='cuda:0')
episode: 294 training return: tensor(94.8039, device='cuda:0')
episode: 295 training return: tensor(96.9761, device='cuda:0')
epoch: 74 test_true_pfm: 124.45130378544454 sim_pfm: 45.898152669036065
episode: 296 training return: tensor(89.8167, device='cuda:0')
episode: 297 training return: tensor(96.1238, device='cuda:0')
episode: 298 training return: tensor(13.6423, device='cuda:0')
episode: 299 training return: tensor(75.3563, device='cuda:0')
epoch: 75 test_true_pfm: 126.49870098653378 sim_pfm: 83.41524818155449
episode: 300 training return: tensor(81.3448, device='cuda:0')
episode: 301 training return: tensor(15.8520, device='cuda:0')
episode: 302 training return: tensor(60.5940, device='cuda:0')
episode: 303 training return: tensor(90.9551, device='cuda:0')
epoch: 76 test_true_pfm: 121.41530658764842 sim_pfm: 69.30860897414387
episode: 304 training return: tensor(92.2575, device='cuda:0')
episode: 305 training return: tensor(69.4703, device='cuda:0')
episode: 306 training return: tensor(88.4584, device='cuda:0')
episode: 307 training return: tensor(98.5541, device='cuda:0')
epoch: 77 test_true_pfm: 106.78161532173326 sim_pfm: 73.30547199274879
episode: 308 training return: tensor(89.8026, device='cuda:0')
episode: 309 training return: tensor(86.0049, device='cuda:0')
episode: 310 training return: tensor(94.4126, device='cuda:0')
episode: 311 training return: tensor(80.9452, device='cuda:0')
epoch: 78 test_true_pfm: 127.87718053249584 sim_pfm: 90.0498898631602
episode: 312 training return: tensor(92.8645, device='cuda:0')
episode: 313 training return: tensor(54.8833, device='cuda:0')
episode: 314 training return: tensor(-5.7774, device='cuda:0')
episode: 315 training return: tensor(90.2110, device='cuda:0')
epoch: 79 test_true_pfm: 128.54472174492327 sim_pfm: 76.78987723102327
episode: 316 training return: tensor(88.4811, device='cuda:0')
episode: 317 training return: tensor(82.9433, device='cuda:0')
episode: 318 training return: tensor(87.5292, device='cuda:0')
episode: 319 training return: tensor(9.4988, device='cuda:0')
epoch: 80 test_true_pfm: 116.11203466661523 sim_pfm: 83.74939594949247
episode: 320 training return: tensor(-3.6809, device='cuda:0')
episode: 321 training return: tensor(78.7465, device='cuda:0')
episode: 322 training return: tensor(91.8417, device='cuda:0')
episode: 323 training return: tensor(89.5556, device='cuda:0')
epoch: 81 test_true_pfm: 126.95152863953342 sim_pfm: 57.55969047864782
episode: 324 training return: tensor(-7.2646, device='cuda:0')
episode: 325 training return: tensor(93.1814, device='cuda:0')
episode: 326 training return: tensor(85.7028, device='cuda:0')
episode: 327 training return: tensor(74.3569, device='cuda:0')
epoch: 82 test_true_pfm: 114.58396831789261 sim_pfm: 71.60494018733735
episode: 328 training return: tensor(81.1566, device='cuda:0')
episode: 329 training return: tensor(70.2709, device='cuda:0')
episode: 330 training return: tensor(75.5173, device='cuda:0')
episode: 331 training return: tensor(79.1908, device='cuda:0')
epoch: 83 test_true_pfm: 115.70013609934782 sim_pfm: 89.86974903357331
episode: 332 training return: tensor(78.3428, device='cuda:0')
episode: 333 training return: tensor(90.9244, device='cuda:0')
episode: 334 training return: tensor(88.3904, device='cuda:0')
episode: 335 training return: tensor(73.8193, device='cuda:0')
epoch: 84 test_true_pfm: 115.25880294731662 sim_pfm: 88.13723917357274
episode: 336 training return: tensor(82.0678, device='cuda:0')
episode: 337 training return: tensor(87.3974, device='cuda:0')
episode: 338 training return: tensor(87.7005, device='cuda:0')
episode: 339 training return: tensor(59.6988, device='cuda:0')
epoch: 85 test_true_pfm: 127.49309219305312 sim_pfm: 72.57775193725828
episode: 340 training return: tensor(69.4809, device='cuda:0')
episode: 341 training return: tensor(80.7701, device='cuda:0')
episode: 342 training return: tensor(90.8375, device='cuda:0')
episode: 343 training return: tensor(85.5914, device='cuda:0')
epoch: 86 test_true_pfm: 114.72071116031728 sim_pfm: 58.88364056918071
episode: 344 training return: tensor(17.9162, device='cuda:0')
episode: 345 training return: tensor(78.7089, device='cuda:0')
episode: 346 training return: tensor(90.9471, device='cuda:0')
episode: 347 training return: tensor(87.6538, device='cuda:0')
epoch: 87 test_true_pfm: 113.66022099440538 sim_pfm: 44.058915584097846
episode: 348 training return: tensor(96.6818, device='cuda:0')
episode: 349 training return: tensor(90.3271, device='cuda:0')
episode: 350 training return: tensor(90.7155, device='cuda:0')
episode: 351 training return: tensor(23.3667, device='cuda:0')
epoch: 88 test_true_pfm: 100.67113853919163 sim_pfm: 40.31311354043428
episode: 352 training return: tensor(19.0586, device='cuda:0')
episode: 353 training return: tensor(14.5667, device='cuda:0')
episode: 354 training return: tensor(81.7881, device='cuda:0')
episode: 355 training return: tensor(94.6617, device='cuda:0')
epoch: 89 test_true_pfm: 118.18283680748004 sim_pfm: 79.63158865705482
episode: 356 training return: tensor(80.9936, device='cuda:0')
episode: 357 training return: tensor(90.9592, device='cuda:0')
episode: 358 training return: tensor(89.0304, device='cuda:0')
episode: 359 training return: tensor(57.9737, device='cuda:0')
epoch: 90 test_true_pfm: 124.20047443941708 sim_pfm: 73.22613573143026
episode: 360 training return: tensor(81.0694, device='cuda:0')
episode: 361 training return: tensor(95.4869, device='cuda:0')
episode: 362 training return: tensor(89.2225, device='cuda:0')
episode: 363 training return: tensor(85.2892, device='cuda:0')
epoch: 91 test_true_pfm: 127.22935714406435 sim_pfm: 68.74543331740423
episode: 364 training return: tensor(93.0066, device='cuda:0')
episode: 365 training return: tensor(11.9578, device='cuda:0')
episode: 366 training return: tensor(76.1589, device='cuda:0')
episode: 367 training return: tensor(18.9516, device='cuda:0')
epoch: 92 test_true_pfm: 125.84071624118478 sim_pfm: 88.64446127486299
episode: 368 training return: tensor(90.8620, device='cuda:0')
episode: 369 training return: tensor(65.3663, device='cuda:0')
episode: 370 training return: tensor(86.4877, device='cuda:0')
episode: 371 training return: tensor(76.2613, device='cuda:0')
epoch: 93 test_true_pfm: 108.17864269120427 sim_pfm: 48.413070676691135
episode: 372 training return: tensor(42.6606, device='cuda:0')
episode: 373 training return: tensor(32.1490, device='cuda:0')
episode: 374 training return: tensor(20.1048, device='cuda:0')
episode: 375 training return: tensor(84.1768, device='cuda:0')
epoch: 94 test_true_pfm: 109.68829028859525 sim_pfm: 84.78234911222243
episode: 376 training return: tensor(72.1164, device='cuda:0')
episode: 377 training return: tensor(84.6448, device='cuda:0')
episode: 378 training return: tensor(69.0836, device='cuda:0')
episode: 379 training return: tensor(84.6298, device='cuda:0')
epoch: 95 test_true_pfm: 122.35259482794478 sim_pfm: 71.15972815232234
episode: 380 training return: tensor(87.4975, device='cuda:0')
episode: 381 training return: tensor(81.4590, device='cuda:0')
episode: 382 training return: tensor(80.9459, device='cuda:0')
episode: 383 training return: tensor(82.9233, device='cuda:0')
epoch: 96 test_true_pfm: 123.0336839155165 sim_pfm: 60.986709441628776
episode: 384 training return: tensor(90.1365, device='cuda:0')
episode: 385 training return: tensor(86.8542, device='cuda:0')
episode: 386 training return: tensor(86.6596, device='cuda:0')
episode: 387 training return: tensor(79.7943, device='cuda:0')
epoch: 97 test_true_pfm: 125.60961090425505 sim_pfm: 83.32914795682882
episode: 388 training return: tensor(5.0706, device='cuda:0')
episode: 389 training return: tensor(78.1528, device='cuda:0')
episode: 390 training return: tensor(18.8173, device='cuda:0')
episode: 391 training return: tensor(90.1869, device='cuda:0')
epoch: 98 test_true_pfm: 80.03889474356515 sim_pfm: 42.12484258931945
episode: 392 training return: tensor(85.1473, device='cuda:0')
episode: 393 training return: tensor(96.7001, device='cuda:0')
episode: 394 training return: tensor(75.2099, device='cuda:0')
episode: 395 training return: tensor(93.8907, device='cuda:0')
epoch: 99 test_true_pfm: 82.13804538245788 sim_pfm: 46.62949171796208
episode: 396 training return: tensor(5.5973, device='cuda:0')
episode: 397 training return: tensor(82.9839, device='cuda:0')
episode: 398 training return: tensor(92.0270, device='cuda:0')
episode: 399 training return: tensor(87.8742, device='cuda:0')
epoch: 100 test_true_pfm: 117.62456635170474 sim_pfm: 83.32222464522347
episode: 400 training return: tensor(6.9546, device='cuda:0')
episode: 401 training return: tensor(7.7145, device='cuda:0')
episode: 402 training return: tensor(94.1565, device='cuda:0')
episode: 403 training return: tensor(92.4482, device='cuda:0')
epoch: 101 test_true_pfm: 88.8265944157406 sim_pfm: 77.02371066796476
episode: 404 training return: tensor(38.5240, device='cuda:0')
episode: 405 training return: tensor(5.8621, device='cuda:0')
episode: 406 training return: tensor(87.6712, device='cuda:0')
episode: 407 training return: tensor(75.8260, device='cuda:0')
epoch: 102 test_true_pfm: 117.4939006855996 sim_pfm: 87.10338472130825
episode: 408 training return: tensor(94.6518, device='cuda:0')
episode: 409 training return: tensor(83.7307, device='cuda:0')
episode: 410 training return: tensor(87.1657, device='cuda:0')
episode: 411 training return: tensor(99.0639, device='cuda:0')
epoch: 103 test_true_pfm: 120.90977830544708 sim_pfm: 62.20759663376957
episode: 412 training return: tensor(97.9398, device='cuda:0')
episode: 413 training return: tensor(88.6487, device='cuda:0')
episode: 414 training return: tensor(88.7129, device='cuda:0')
episode: 415 training return: tensor(14.0603, device='cuda:0')
epoch: 104 test_true_pfm: 118.70795333428035 sim_pfm: 71.47473389775841
episode: 416 training return: tensor(71.4688, device='cuda:0')
episode: 417 training return: tensor(18.6126, device='cuda:0')
episode: 418 training return: tensor(18.2573, device='cuda:0')
episode: 419 training return: tensor(97.6873, device='cuda:0')
epoch: 105 test_true_pfm: 106.83204368130569 sim_pfm: 90.2848814979312
episode: 420 training return: tensor(78.8568, device='cuda:0')
episode: 421 training return: tensor(91.6260, device='cuda:0')
episode: 422 training return: tensor(88.1111, device='cuda:0')
episode: 423 training return: tensor(-0.0780, device='cuda:0')
epoch: 106 test_true_pfm: 125.51231461111738 sim_pfm: 90.06119057844626
episode: 424 training return: tensor(29.8498, device='cuda:0')
episode: 425 training return: tensor(86.4597, device='cuda:0')
episode: 426 training return: tensor(88.1642, device='cuda:0')
episode: 427 training return: tensor(19.5232, device='cuda:0')
epoch: 107 test_true_pfm: 128.49460176259586 sim_pfm: 61.10610255392967
episode: 428 training return: tensor(67.1837, device='cuda:0')
episode: 429 training return: tensor(68.7123, device='cuda:0')
episode: 430 training return: tensor(20.1476, device='cuda:0')
episode: 431 training return: tensor(18.3466, device='cuda:0')
epoch: 108 test_true_pfm: 111.69392950030635 sim_pfm: 86.75224744775915
episode: 432 training return: tensor(97.1199, device='cuda:0')
episode: 433 training return: tensor(76.0952, device='cuda:0')
episode: 434 training return: tensor(93.3292, device='cuda:0')
episode: 435 training return: tensor(89.3235, device='cuda:0')
epoch: 109 test_true_pfm: 104.42922904482286 sim_pfm: 74.92879309314885
episode: 436 training return: tensor(85.3313, device='cuda:0')
episode: 437 training return: tensor(82.8388, device='cuda:0')
episode: 438 training return: tensor(1.6375, device='cuda:0')
episode: 439 training return: tensor(88.1755, device='cuda:0')
epoch: 110 test_true_pfm: 123.78982801160535 sim_pfm: 63.67652788432315
episode: 440 training return: tensor(85.5239, device='cuda:0')
episode: 441 training return: tensor(78.1344, device='cuda:0')
episode: 442 training return: tensor(90.4788, device='cuda:0')
episode: 443 training return: tensor(84.9580, device='cuda:0')
epoch: 111 test_true_pfm: 99.96993056277464 sim_pfm: 73.94960461747833
episode: 444 training return: tensor(88.3196, device='cuda:0')
episode: 445 training return: tensor(89.5317, device='cuda:0')
episode: 446 training return: tensor(-3.7173, device='cuda:0')
episode: 447 training return: tensor(89.8671, device='cuda:0')
epoch: 112 test_true_pfm: 77.42845607857839 sim_pfm: 39.61729985640268
episode: 448 training return: tensor(15.4739, device='cuda:0')
episode: 449 training return: tensor(90.4980, device='cuda:0')
episode: 450 training return: tensor(78.1680, device='cuda:0')
episode: 451 training return: tensor(92.5669, device='cuda:0')
epoch: 113 test_true_pfm: 125.18203033114337 sim_pfm: 93.84808532690514
episode: 452 training return: tensor(22.6339, device='cuda:0')
episode: 453 training return: tensor(18.1567, device='cuda:0')
episode: 454 training return: tensor(69.2502, device='cuda:0')
episode: 455 training return: tensor(89.6906, device='cuda:0')
epoch: 114 test_true_pfm: 127.27354541272128 sim_pfm: 88.95730820969911
episode: 456 training return: tensor(88.4057, device='cuda:0')
episode: 457 training return: tensor(78.1507, device='cuda:0')
episode: 458 training return: tensor(89.2100, device='cuda:0')
episode: 459 training return: tensor(62.9792, device='cuda:0')
epoch: 115 test_true_pfm: 129.08830299499803 sim_pfm: 75.76968892231234
episode: 460 training return: tensor(87.5401, device='cuda:0')
episode: 461 training return: tensor(94.3509, device='cuda:0')
episode: 462 training return: tensor(96.2914, device='cuda:0')
episode: 463 training return: tensor(21.0264, device='cuda:0')
epoch: 116 test_true_pfm: 88.46120500012526 sim_pfm: 37.07032753644744
episode: 464 training return: tensor(13.9916, device='cuda:0')
episode: 465 training return: tensor(86.6619, device='cuda:0')
episode: 466 training return: tensor(6.1516, device='cuda:0')
episode: 467 training return: tensor(2.2508, device='cuda:0')
epoch: 117 test_true_pfm: 126.0977750525366 sim_pfm: 87.99098491198383
episode: 468 training return: tensor(89.0803, device='cuda:0')
episode: 469 training return: tensor(87.0872, device='cuda:0')
episode: 470 training return: tensor(89.9810, device='cuda:0')
episode: 471 training return: tensor(89.8882, device='cuda:0')
epoch: 118 test_true_pfm: 113.3328416560458 sim_pfm: 69.91319895864581
episode: 472 training return: tensor(85.5719, device='cuda:0')
episode: 473 training return: tensor(83.9222, device='cuda:0')
episode: 474 training return: tensor(90.1198, device='cuda:0')
episode: 475 training return: tensor(19.4912, device='cuda:0')
epoch: 119 test_true_pfm: 126.50315062952825 sim_pfm: 88.1673912281287
episode: 476 training return: tensor(0.7741, device='cuda:0')
episode: 477 training return: tensor(91.7222, device='cuda:0')
episode: 478 training return: tensor(86.5898, device='cuda:0')
episode: 479 training return: tensor(78.6651, device='cuda:0')
epoch: 120 test_true_pfm: 125.06054940262914 sim_pfm: 87.41205863179638
episode: 480 training return: tensor(89.2831, device='cuda:0')
episode: 481 training return: tensor(84.1329, device='cuda:0')
episode: 482 training return: tensor(88.8084, device='cuda:0')
episode: 483 training return: tensor(13.6981, device='cuda:0')
epoch: 121 test_true_pfm: 122.084409667348 sim_pfm: 87.51664129844866
episode: 484 training return: tensor(88.3446, device='cuda:0')
episode: 485 training return: tensor(87.8549, device='cuda:0')
episode: 486 training return: tensor(89.8146, device='cuda:0')
episode: 487 training return: tensor(87.8160, device='cuda:0')
epoch: 122 test_true_pfm: 123.12310847737442 sim_pfm: 77.78581759486697
episode: 488 training return: tensor(92.1210, device='cuda:0')
episode: 489 training return: tensor(85.6155, device='cuda:0')
episode: 490 training return: tensor(88.8357, device='cuda:0')
episode: 491 training return: tensor(73.3408, device='cuda:0')
epoch: 123 test_true_pfm: 92.37931769766297 sim_pfm: 62.69376356964349
episode: 492 training return: tensor(57.4883, device='cuda:0')
episode: 493 training return: tensor(88.9583, device='cuda:0')
episode: 494 training return: tensor(88.2312, device='cuda:0')
episode: 495 training return: tensor(93.2363, device='cuda:0')
epoch: 124 test_true_pfm: 112.11087960102671 sim_pfm: 89.40624885978177
episode: 496 training return: tensor(87.8269, device='cuda:0')
episode: 497 training return: tensor(96.4303, device='cuda:0')
episode: 498 training return: tensor(18.7429, device='cuda:0')
episode: 499 training return: tensor(20.4490, device='cuda:0')
epoch: 125 test_true_pfm: 115.24041535495576 sim_pfm: 88.30768319752534
episode: 500 training return: tensor(13.3330, device='cuda:0')
episode: 501 training return: tensor(87.4359, device='cuda:0')
episode: 502 training return: tensor(4.2556, device='cuda:0')
episode: 503 training return: tensor(81.2734, device='cuda:0')
epoch: 126 test_true_pfm: 71.79153490211527 sim_pfm: 28.768246239819565
episode: 504 training return: tensor(88.5115, device='cuda:0')
episode: 505 training return: tensor(14.9239, device='cuda:0')
episode: 506 training return: tensor(7.9732, device='cuda:0')
episode: 507 training return: tensor(21.2272, device='cuda:0')
epoch: 127 test_true_pfm: 111.9312731034388 sim_pfm: 27.667862281762062
episode: 508 training return: tensor(72.2032, device='cuda:0')
episode: 509 training return: tensor(64.5888, device='cuda:0')
episode: 510 training return: tensor(90.5828, device='cuda:0')
episode: 511 training return: tensor(18.3880, device='cuda:0')
epoch: 128 test_true_pfm: 127.27624767798031 sim_pfm: 73.3816718522692
episode: 512 training return: tensor(84.5937, device='cuda:0')
episode: 513 training return: tensor(90.8266, device='cuda:0')
episode: 514 training return: tensor(90.2453, device='cuda:0')
episode: 515 training return: tensor(45.4481, device='cuda:0')
epoch: 129 test_true_pfm: 81.39032629563219 sim_pfm: 45.21199462881195
episode: 516 training return: tensor(91.4634, device='cuda:0')
episode: 517 training return: tensor(89.5929, device='cuda:0')
episode: 518 training return: tensor(80.8558, device='cuda:0')
episode: 519 training return: tensor(89.1505, device='cuda:0')
epoch: 130 test_true_pfm: 108.83752408749399 sim_pfm: 59.23934300171677
episode: 520 training return: tensor(79.9115, device='cuda:0')
episode: 521 training return: tensor(91.9283, device='cuda:0')
episode: 522 training return: tensor(87.2649, device='cuda:0')
episode: 523 training return: tensor(91.4572, device='cuda:0')
epoch: 131 test_true_pfm: 92.2915810231984 sim_pfm: 64.16859868133906
episode: 524 training return: tensor(9.5376, device='cuda:0')
episode: 525 training return: tensor(18.8401, device='cuda:0')
episode: 526 training return: tensor(88.8462, device='cuda:0')
episode: 527 training return: tensor(23.6279, device='cuda:0')
epoch: 132 test_true_pfm: 100.68356214955972 sim_pfm: 63.348799448390494
episode: 528 training return: tensor(88.1969, device='cuda:0')
episode: 529 training return: tensor(18.4175, device='cuda:0')
episode: 530 training return: tensor(87.9149, device='cuda:0')
episode: 531 training return: tensor(84.8467, device='cuda:0')
epoch: 133 test_true_pfm: 116.07985924214229 sim_pfm: 47.26039998907945
episode: 532 training return: tensor(87.4030, device='cuda:0')
episode: 533 training return: tensor(76.0357, device='cuda:0')
episode: 534 training return: tensor(81.0105, device='cuda:0')
episode: 535 training return: tensor(89.4681, device='cuda:0')
epoch: 134 test_true_pfm: 76.79645930856182 sim_pfm: 47.93488742049667
episode: 536 training return: tensor(19.4459, device='cuda:0')
episode: 537 training return: tensor(89.7769, device='cuda:0')
episode: 538 training return: tensor(14.8637, device='cuda:0')
episode: 539 training return: tensor(87.5429, device='cuda:0')
epoch: 135 test_true_pfm: 111.10209650715055 sim_pfm: 88.17596618311363
episode: 540 training return: tensor(3.8205, device='cuda:0')
episode: 541 training return: tensor(88.5122, device='cuda:0')
episode: 542 training return: tensor(88.6239, device='cuda:0')
episode: 543 training return: tensor(84.5524, device='cuda:0')
epoch: 136 test_true_pfm: 71.64774662194868 sim_pfm: 66.71066136383452
episode: 544 training return: tensor(92.7482, device='cuda:0')
episode: 545 training return: tensor(0.6871, device='cuda:0')
episode: 546 training return: tensor(31.5397, device='cuda:0')
episode: 547 training return: tensor(0.0299, device='cuda:0')
epoch: 137 test_true_pfm: 94.27315875847064 sim_pfm: 57.294345964246894
episode: 548 training return: tensor(20.8416, device='cuda:0')
episode: 549 training return: tensor(20.3251, device='cuda:0')
episode: 550 training return: tensor(94.4480, device='cuda:0')
episode: 551 training return: tensor(89.5321, device='cuda:0')
epoch: 138 test_true_pfm: 126.79421173026522 sim_pfm: 87.71396992190276
episode: 552 training return: tensor(77.5411, device='cuda:0')
episode: 553 training return: tensor(19.4775, device='cuda:0')
episode: 554 training return: tensor(88.5226, device='cuda:0')
episode: 555 training return: tensor(75.2987, device='cuda:0')
epoch: 139 test_true_pfm: 115.8901012863884 sim_pfm: 86.75886715730886
episode: 556 training return: tensor(10.8339, device='cuda:0')
episode: 557 training return: tensor(90.3463, device='cuda:0')
episode: 558 training return: tensor(90.2108, device='cuda:0')
episode: 559 training return: tensor(85.7880, device='cuda:0')
epoch: 140 test_true_pfm: 98.81204065495157 sim_pfm: 60.28026078186231
episode: 560 training return: tensor(90.3083, device='cuda:0')
episode: 561 training return: tensor(72.1319, device='cuda:0')
episode: 562 training return: tensor(87.6125, device='cuda:0')
episode: 563 training return: tensor(70.4584, device='cuda:0')
epoch: 141 test_true_pfm: 127.55434665793564 sim_pfm: 82.77118895705208
episode: 564 training return: tensor(16.5456, device='cuda:0')
episode: 565 training return: tensor(80.2439, device='cuda:0')
episode: 566 training return: tensor(75.4515, device='cuda:0')
episode: 567 training return: tensor(90.1027, device='cuda:0')
epoch: 142 test_true_pfm: 113.5383159150413 sim_pfm: 82.59711795041221
episode: 568 training return: tensor(92.3713, device='cuda:0')
episode: 569 training return: tensor(2.4131, device='cuda:0')
episode: 570 training return: tensor(86.4601, device='cuda:0')
episode: 571 training return: tensor(89.9521, device='cuda:0')
epoch: 143 test_true_pfm: 128.53874988494744 sim_pfm: 87.472708388709
episode: 572 training return: tensor(12.9970, device='cuda:0')
episode: 573 training return: tensor(19.9727, device='cuda:0')
episode: 574 training return: tensor(15.1084, device='cuda:0')
episode: 575 training return: tensor(87.8656, device='cuda:0')
epoch: 144 test_true_pfm: 118.48950323957195 sim_pfm: 73.5556726609997
episode: 576 training return: tensor(64.0864, device='cuda:0')
episode: 577 training return: tensor(88.7479, device='cuda:0')
episode: 578 training return: tensor(91.8881, device='cuda:0')
episode: 579 training return: tensor(93.2367, device='cuda:0')
epoch: 145 test_true_pfm: 114.83713753084385 sim_pfm: 75.10741922653979
episode: 580 training return: tensor(90.0334, device='cuda:0')
episode: 581 training return: tensor(86.0272, device='cuda:0')
episode: 582 training return: tensor(20.1862, device='cuda:0')
episode: 583 training return: tensor(97.9124, device='cuda:0')
epoch: 146 test_true_pfm: 125.33845299008269 sim_pfm: 69.70898276785738
episode: 584 training return: tensor(55.6247, device='cuda:0')
episode: 585 training return: tensor(73.9829, device='cuda:0')
episode: 586 training return: tensor(80.4138, device='cuda:0')
episode: 587 training return: tensor(47.6867, device='cuda:0')
epoch: 147 test_true_pfm: 129.62917671901945 sim_pfm: 76.3223873020499
episode: 588 training return: tensor(23.2554, device='cuda:0')
episode: 589 training return: tensor(78.1529, device='cuda:0')
episode: 590 training return: tensor(87.2373, device='cuda:0')
episode: 591 training return: tensor(89.0931, device='cuda:0')
epoch: 148 test_true_pfm: 128.26438223048427 sim_pfm: 87.16248481447693
episode: 592 training return: tensor(71.1241, device='cuda:0')
episode: 593 training return: tensor(94.8763, device='cuda:0')
episode: 594 training return: tensor(80.3940, device='cuda:0')
episode: 595 training return: tensor(93.5260, device='cuda:0')
epoch: 149 test_true_pfm: 125.47058266504571 sim_pfm: 87.45981471372652
episode: 596 training return: tensor(89.2002, device='cuda:0')
episode: 597 training return: tensor(92.0828, device='cuda:0')
episode: 598 training return: tensor(91.0640, device='cuda:0')
episode: 599 training return: tensor(84.6213, device='cuda:0')
epoch: 150 test_true_pfm: 128.08215201625416 sim_pfm: 86.56697782985866
