['--alg', 'ddpg', '--env', 'Swimmer-v3', '--learn', 'behavior', '--traj', 'mixed', '--seed', '2', '--data', '100000']
epoch: 0 training_loss 0.4179554791748524 test_loss: 0.39531745910644533
epoch: 1 training_loss 0.35632179796695707 test_loss: 0.345088267326355
epoch: 2 training_loss 0.358438650816679 test_loss: 0.36484575271606445
epoch: 3 training_loss 0.36120554149150846 test_loss: 0.3567216157913208
epoch: 4 training_loss 0.34757976651191713 test_loss: 0.366086745262146
epoch: 5 training_loss 0.35962797090411186 test_loss: 0.3344056844711304
epoch: 6 training_loss 0.3462876108288765 test_loss: 0.38024766445159913
epoch: 7 training_loss 0.3526015667617321 test_loss: 0.3704641103744507
epoch: 8 training_loss 0.3617442859709263 test_loss: 0.344757080078125
epoch: 9 training_loss 0.3477437664568424 test_loss: 0.3489216804504395
epoch: 10 training_loss 0.34473950758576394 test_loss: 0.3526738405227661
epoch: 11 training_loss 0.35127571314573286 test_loss: 0.3514174222946167
epoch: 12 training_loss 0.3502225488424301 test_loss: 0.35206403732299807
epoch: 13 training_loss 0.35705125108361246 test_loss: 0.3328528642654419
epoch: 14 training_loss 0.34107376947999 test_loss: 0.3441797733306885
epoch: 15 training_loss 0.33172201231122017 test_loss: 0.3647376537322998
epoch: 16 training_loss 0.33683883979916573 test_loss: 0.34818761348724364
epoch: 17 training_loss 0.3450373883545399 test_loss: 0.34720840454101565
epoch: 18 training_loss 0.34425928831100466 test_loss: 0.3393567562103271
epoch: 19 training_loss 0.3389462992548943 test_loss: 0.37427217960357667
epoch: 20 training_loss 0.3332843270897865 test_loss: 0.3273235559463501
epoch: 21 training_loss 0.3488488633930683 test_loss: 0.36479260921478274
epoch: 22 training_loss 0.34712659642100335 test_loss: 0.34388036727905275
epoch: 23 training_loss 0.34795867338776587 test_loss: 0.3401538372039795
epoch: 24 training_loss 0.34771184280514716 test_loss: 0.35719192028045654
epoch: 25 training_loss 0.34791106298565866 test_loss: 0.32908880710601807
epoch: 26 training_loss 0.33711057096719743 test_loss: 0.35684938430786134
epoch: 27 training_loss 0.33955725878477094 test_loss: 0.32151503562927247
epoch: 28 training_loss 0.34428181827068327 test_loss: 0.3456265926361084
epoch: 29 training_loss 0.34749537989497187 test_loss: 0.32007763385772703
epoch: 30 training_loss 0.35433500960469244 test_loss: 0.3879114627838135
epoch: 31 training_loss 0.3486485396325588 test_loss: 0.3225877046585083
epoch: 32 training_loss 0.34799477517604827 test_loss: 0.3304833173751831
epoch: 33 training_loss 0.3492366972565651 test_loss: 0.36076452732086184
epoch: 34 training_loss 0.34967116847634316 test_loss: 0.325355076789856
epoch: 35 training_loss 0.3487636642158031 test_loss: 0.35268464088439944
epoch: 36 training_loss 0.34465897306799886 test_loss: 0.33974180221557615
epoch: 37 training_loss 0.3399438801407814 test_loss: 0.32657852172851565
epoch: 38 training_loss 0.34854124069213865 test_loss: 0.33551051616668703
epoch: 39 training_loss 0.3441036993265152 test_loss: 0.34992222785949706
epoch: 40 training_loss 0.34244754567742347 test_loss: 0.3296440839767456
epoch: 41 training_loss 0.34243491277098653 test_loss: 0.321270227432251
epoch: 42 training_loss 0.34741423413157463 test_loss: 0.35447258949279786
epoch: 43 training_loss 0.34057681143283847 test_loss: 0.3304255962371826
epoch: 44 training_loss 0.34117003992199896 test_loss: 0.353121018409729
epoch: 45 training_loss 0.34389241501688955 test_loss: 0.3282217502593994
epoch: 46 training_loss 0.34175670698285104 test_loss: 0.33571226596832277
epoch: 47 training_loss 0.3408311741054058 test_loss: 0.33459718227386476
epoch: 48 training_loss 0.334857232272625 test_loss: 0.34632794857025145
epoch: 49 training_loss 0.3317502281069756 test_loss: 0.3407782554626465
epoch: 50 training_loss 0.33881844237446784 test_loss: 0.32792823314666747
epoch: 51 training_loss 0.34527165815234184 test_loss: 0.33826208114624023
epoch: 52 training_loss 0.3491602206230164 test_loss: 0.3616062879562378
epoch: 53 training_loss 0.33783395409584044 test_loss: 0.3606647253036499
epoch: 54 training_loss 0.34777265444397926 test_loss: 0.3416962385177612
epoch: 55 training_loss 0.331949237883091 test_loss: 0.3436077117919922
epoch: 56 training_loss 0.34717915996909143 test_loss: 0.32395663261413576
epoch: 57 training_loss 0.3383374512195587 test_loss: 0.33843963146209716
epoch: 58 training_loss 0.33511272087693217 test_loss: 0.331708025932312
epoch: 59 training_loss 0.3377034910023212 test_loss: 0.3509207010269165
epoch: 60 training_loss 0.3496811906993389 test_loss: 0.33062598705291746
epoch: 61 training_loss 0.34047475814819333 test_loss: 0.362357497215271
epoch: 62 training_loss 0.3433841113746166 test_loss: 0.31892368793487547
epoch: 63 training_loss 0.3351293496787548 test_loss: 0.3573823690414429
epoch: 64 training_loss 0.347116676568985 test_loss: 0.34852802753448486
epoch: 65 training_loss 0.3401776303350925 test_loss: 0.3558738470077515
epoch: 66 training_loss 0.336798782646656 test_loss: 0.3486882448196411
epoch: 67 training_loss 0.3507115000486374 test_loss: 0.3579072952270508
epoch: 68 training_loss 0.3366353565454483 test_loss: 0.3292426347732544
epoch: 69 training_loss 0.3414522087574005 test_loss: 0.3396268606185913
epoch: 70 training_loss 0.3299316868185997 test_loss: 0.3225162267684937
epoch: 71 training_loss 0.3289070773124695 test_loss: 0.3349276304244995
epoch: 72 training_loss 0.33949620261788366 test_loss: 0.3110891103744507
epoch: 73 training_loss 0.3307307140529156 test_loss: 0.3347936153411865
epoch: 74 training_loss 0.34437515154480935 test_loss: 0.3360504150390625
epoch: 75 training_loss 0.33294600412249564 test_loss: 0.3452477931976318
epoch: 76 training_loss 0.3574028745293617 test_loss: 0.35307037830352783
epoch: 77 training_loss 0.3438472853600979 test_loss: 0.3333609104156494
epoch: 78 training_loss 0.33917861074209216 test_loss: 0.3084273338317871
epoch: 79 training_loss 0.3459674070775509 test_loss: 0.3467445135116577
epoch: 80 training_loss 0.3399631790816784 test_loss: 0.33183655738830564
epoch: 81 training_loss 0.33959558308124543 test_loss: 0.35150320529937745
epoch: 82 training_loss 0.3368447172641754 test_loss: 0.34257757663726807
epoch: 83 training_loss 0.34408180579543113 test_loss: 0.3287086486816406
epoch: 84 training_loss 0.3413975803554058 test_loss: 0.3561618089675903
epoch: 85 training_loss 0.32957968443632124 test_loss: 0.3584944009780884
epoch: 86 training_loss 0.3500939881801605 test_loss: 0.3369161605834961
epoch: 87 training_loss 0.3482754172384739 test_loss: 0.36587798595428467
epoch: 88 training_loss 0.3379891626536846 test_loss: 0.3501262903213501
epoch: 89 training_loss 0.3346865773200989 test_loss: 0.35883073806762694
epoch: 90 training_loss 0.32930693209171297 test_loss: 0.319644832611084
epoch: 91 training_loss 0.3439052280783653 test_loss: 0.3230064153671265
epoch: 92 training_loss 0.33491814121603963 test_loss: 0.355254602432251
epoch: 93 training_loss 0.3358298486471176 test_loss: 0.3560349941253662
epoch: 94 training_loss 0.33606240674853327 test_loss: 0.34081830978393557
epoch: 95 training_loss 0.34506170094013217 test_loss: 0.3388146162033081
epoch: 96 training_loss 0.34028165459632875 test_loss: 0.35191617012023924
epoch: 97 training_loss 0.34049029752612114 test_loss: 0.3492424011230469
epoch: 98 training_loss 0.34530199095606806 test_loss: 0.35034899711608886
epoch: 99 training_loss 0.34293438658118247 test_loss: 0.319242525100708
epoch: 100 training_loss 0.3319040906429291 test_loss: 0.3553212881088257
epoch: 101 training_loss 0.3277464956045151 test_loss: 0.33748617172241213
epoch: 102 training_loss 0.3437728467583656 test_loss: 0.3470998525619507
epoch: 103 training_loss 0.3392977610230446 test_loss: 0.33573362827301023
epoch: 104 training_loss 0.3426861104369163 test_loss: 0.3539836645126343
epoch: 105 training_loss 0.3384419658780098 test_loss: 0.33988816738128663
epoch: 106 training_loss 0.34533860728144644 test_loss: 0.3623995304107666
epoch: 107 training_loss 0.34120792865753174 test_loss: 0.30769524574279783
epoch: 108 training_loss 0.33197858929634094 test_loss: 0.3303900957107544
epoch: 109 training_loss 0.3332311473786831 test_loss: 0.3274958848953247
epoch: 110 training_loss 0.33883139342069624 test_loss: 0.3490215063095093
epoch: 111 training_loss 0.3477823433279991 test_loss: 0.3379767656326294
epoch: 112 training_loss 0.340949754267931 test_loss: 0.3494703769683838
epoch: 113 training_loss 0.3446311692893505 test_loss: 0.3325359582901001
epoch: 114 training_loss 0.34178814172744754 test_loss: 0.3311347007751465
epoch: 115 training_loss 0.33474533051252364 test_loss: 0.3265904188156128
epoch: 116 training_loss 0.3376509176194668 test_loss: 0.3657732725143433
epoch: 117 training_loss 0.3484605810046196 test_loss: 0.32068941593170164
epoch: 118 training_loss 0.3420074769854546 test_loss: 0.3287289381027222
epoch: 119 training_loss 0.340223993062973 test_loss: 0.3400683641433716
epoch: 120 training_loss 0.3406595638394356 test_loss: 0.35882470607757566
epoch: 121 training_loss 0.3323657910525799 test_loss: 0.33482894897460935
epoch: 122 training_loss 0.33872224628925324 test_loss: 0.3318163394927979
epoch: 123 training_loss 0.3439951865375042 test_loss: 0.3498192310333252
epoch: 124 training_loss 0.3438338416814804 test_loss: 0.3272474050521851
epoch: 125 training_loss 0.33126148745417594 test_loss: 0.3856191635131836
epoch: 126 training_loss 0.3470916847884655 test_loss: 0.36303446292877195
epoch: 127 training_loss 0.33533655673265456 test_loss: 0.30618581771850584
epoch: 128 training_loss 0.34362426966428755 test_loss: 0.3232255220413208
epoch: 129 training_loss 0.3431223946809769 test_loss: 0.32590737342834475
epoch: 130 training_loss 0.34442911997437475 test_loss: 0.33360719680786133
epoch: 131 training_loss 0.3433317944407463 test_loss: 0.352449369430542
epoch: 132 training_loss 0.3382597504556179 test_loss: 0.3449385166168213
epoch: 133 training_loss 0.34093675285577774 test_loss: 0.3287464380264282
epoch: 134 training_loss 0.345057060867548 test_loss: 0.3432114839553833
epoch: 135 training_loss 0.34701447546482084 test_loss: 0.34367055892944337
epoch: 136 training_loss 0.3484730756282806 test_loss: 0.38347103595733645
epoch: 137 training_loss 0.335994531661272 test_loss: 0.33571667671203614
epoch: 138 training_loss 0.3420007872581482 test_loss: 0.3094093561172485
epoch: 139 training_loss 0.3358949844539165 test_loss: 0.3257213354110718
epoch: 140 training_loss 0.34339264541864395 test_loss: 0.33140685558319094
epoch: 141 training_loss 0.3497731857001781 test_loss: 0.32438366413116454
epoch: 142 training_loss 0.33420842126011846 test_loss: 0.33206562995910643
epoch: 143 training_loss 0.34407227352261543 test_loss: 0.32561564445495605
epoch: 144 training_loss 0.33723538264632225 test_loss: 0.34663259983062744
epoch: 145 training_loss 0.33971657454967497 test_loss: 0.33150300979614256
epoch: 146 training_loss 0.3347575278580189 test_loss: 0.3449759006500244
epoch: 147 training_loss 0.3387452618777752 test_loss: 0.33048832416534424
epoch: 148 training_loss 0.3322338517010212 test_loss: 0.34636540412902833
epoch: 149 training_loss 0.33557042479515076 test_loss: 0.3339117765426636
epoch: 0 training_loss 37.92976566314697 test_loss: 27.501522827148438
epoch: 1 training_loss 22.979511661529543 test_loss: 20.41119842529297
epoch: 2 training_loss 18.31808903694153 test_loss: 16.955831909179686
epoch: 3 training_loss 15.854498281478882 test_loss: 15.33282470703125
epoch: 4 training_loss 14.459192533493042 test_loss: 13.349758911132813
epoch: 5 training_loss 13.023728008270263 test_loss: 12.787922668457032
epoch: 6 training_loss 12.516365985870362 test_loss: 12.53804702758789
epoch: 7 training_loss 11.657318506240845 test_loss: 11.925954437255859
epoch: 8 training_loss 11.133137083053589 test_loss: 11.139389038085938
epoch: 9 training_loss 10.79639157295227 test_loss: 10.685445404052734
epoch: 10 training_loss 10.422944793701172 test_loss: 10.556027221679688
epoch: 11 training_loss 10.069821701049804 test_loss: 9.867195129394531
epoch: 12 training_loss 9.833708553314208 test_loss: 9.173135375976562
epoch: 13 training_loss 9.510948810577393 test_loss: 9.561917877197265
epoch: 14 training_loss 9.308019995689392 test_loss: 8.531282806396485
epoch: 15 training_loss 8.95081431865692 test_loss: 8.546012115478515
epoch: 16 training_loss 8.752495541572571 test_loss: 8.558847808837891
epoch: 17 training_loss 8.63565848827362 test_loss: 8.549649810791015
epoch: 18 training_loss 8.294107737541198 test_loss: 8.230906677246093
epoch: 19 training_loss 8.285108127593993 test_loss: 8.243440246582031
epoch: 20 training_loss 8.138158864974976 test_loss: 7.868132781982422
epoch: 21 training_loss 7.917744140625 test_loss: 8.033647918701172
epoch: 22 training_loss 7.736214809417724 test_loss: 7.569730377197265
epoch: 23 training_loss 7.570531907081604 test_loss: 7.552196502685547
epoch: 24 training_loss 7.520911726951599 test_loss: 7.193592071533203
epoch: 25 training_loss 7.384000830650329 test_loss: 7.411123657226563
epoch: 26 training_loss 7.271244683265686 test_loss: 7.063676452636718
epoch: 27 training_loss 7.144171357154846 test_loss: 6.8973533630371096
epoch: 28 training_loss 7.001857876777649 test_loss: 6.890474700927735
epoch: 29 training_loss 6.81292757987976 test_loss: 6.7025093078613285
epoch: 30 training_loss 6.614652872085571 test_loss: 6.501840972900391
epoch: 31 training_loss 6.610082187652588 test_loss: 6.824533081054687
epoch: 32 training_loss 6.509790859222412 test_loss: 6.87515869140625
epoch: 33 training_loss 6.221223940849304 test_loss: 6.3724830627441404
epoch: 34 training_loss 6.225291743278503 test_loss: 6.205321502685547
epoch: 35 training_loss 6.082159328460693 test_loss: 5.955117034912109
epoch: 36 training_loss 6.05917763710022 test_loss: 5.896692276000977
epoch: 37 training_loss 6.083280138969421 test_loss: 6.084774780273437
epoch: 38 training_loss 5.957246661186218 test_loss: 5.859320831298828
epoch: 39 training_loss 5.9495779371261595 test_loss: 6.522678375244141
epoch: 40 training_loss 6.019295873641968 test_loss: 5.87464485168457
epoch: 41 training_loss 5.830013933181763 test_loss: 5.829360580444336
epoch: 42 training_loss 5.68889087677002 test_loss: 5.796245574951172
epoch: 43 training_loss 5.643208560943603 test_loss: 5.756511306762695
epoch: 44 training_loss 5.71432918548584 test_loss: 5.690057754516602
epoch: 45 training_loss 5.613525996208191 test_loss: 5.6577198028564455
epoch: 46 training_loss 5.549776258468628 test_loss: 5.514701080322266
epoch: 47 training_loss 5.478307671546936 test_loss: 5.343599319458008
epoch: 48 training_loss 5.295514116287231 test_loss: 5.445382690429687
epoch: 49 training_loss 5.429510836601257 test_loss: 5.379556274414062
epoch: 50 training_loss 5.156300868988037 test_loss: 5.212706756591797
epoch: 51 training_loss 5.126888117790222 test_loss: 5.271503829956055
epoch: 52 training_loss 5.209841289520264 test_loss: 5.249044799804688
epoch: 53 training_loss 5.0446346521377565 test_loss: 4.987522125244141
epoch: 54 training_loss 4.958143947124481 test_loss: 5.004484939575195
epoch: 55 training_loss 4.886893806457519 test_loss: 5.475481796264648
epoch: 56 training_loss 5.04426342010498 test_loss: 5.341773986816406
epoch: 57 training_loss 4.874858694076538 test_loss: 4.88543815612793
epoch: 58 training_loss 4.906691153049469 test_loss: 4.575583267211914
epoch: 59 training_loss 4.793303463459015 test_loss: 4.85633659362793
epoch: 60 training_loss 4.84259850025177 test_loss: 4.976401138305664
epoch: 61 training_loss 4.965335831642151 test_loss: 4.981619262695313
epoch: 62 training_loss 4.797069637775421 test_loss: 4.8185874938964846
epoch: 63 training_loss 4.754256489276886 test_loss: 5.1887260437011715
epoch: 64 training_loss 4.720438871383667 test_loss: 5.2436267852783205
epoch: 65 training_loss 4.706562607288361 test_loss: 4.716387939453125
epoch: 66 training_loss 4.523992431163788 test_loss: 4.491958236694336
epoch: 67 training_loss 4.672938306331634 test_loss: 4.751815414428711
epoch: 68 training_loss 4.7930012536048885 test_loss: 4.785612487792969
epoch: 69 training_loss 4.558514335155487 test_loss: 4.34770393371582
epoch: 70 training_loss 4.6149384093284604 test_loss: 4.814999389648437
epoch: 71 training_loss 4.547862033843995 test_loss: 4.38763542175293
epoch: 72 training_loss 4.490274305343628 test_loss: 4.411119842529297
epoch: 73 training_loss 4.41574273109436 test_loss: 4.561363983154297
epoch: 74 training_loss 4.5827745342254635 test_loss: 4.3376506805419925
epoch: 75 training_loss 4.403002915382385 test_loss: 4.597086334228516
epoch: 76 training_loss 4.443374652862548 test_loss: 4.659233093261719
epoch: 77 training_loss 4.421994140148163 test_loss: 4.434476470947265
epoch: 78 training_loss 4.410992541313171 test_loss: 4.220873260498047
epoch: 79 training_loss 4.2732075548171995 test_loss: 4.258465957641602
epoch: 80 training_loss 4.352364583015442 test_loss: 4.088544082641602
epoch: 81 training_loss 4.366295685768128 test_loss: 4.500923919677734
epoch: 82 training_loss 4.299954657554626 test_loss: 4.384490203857422
epoch: 83 training_loss 4.246318891048431 test_loss: 4.2977745056152346
epoch: 84 training_loss 4.3899991726875305 test_loss: 4.519182205200195
epoch: 85 training_loss 4.277469851970673 test_loss: 4.290862274169922
epoch: 86 training_loss 4.1319786334037785 test_loss: 4.241145324707031
epoch: 87 training_loss 4.39144095659256 test_loss: 4.340365219116211
epoch: 88 training_loss 4.112595899105072 test_loss: 4.155033111572266
epoch: 89 training_loss 4.245190105438232 test_loss: 4.172112655639649
epoch: 90 training_loss 4.1597483706474305 test_loss: 4.134933853149414
epoch: 91 training_loss 4.184250802993774 test_loss: 4.521740341186524
epoch: 92 training_loss 4.089595303535462 test_loss: 4.06513671875
epoch: 93 training_loss 4.069625408649444 test_loss: 4.259075164794922
epoch: 94 training_loss 4.142416603565216 test_loss: 4.260769653320312
epoch: 95 training_loss 4.047689683437348 test_loss: 4.230464172363281
epoch: 96 training_loss 4.321150238513947 test_loss: 4.1859687805175785
epoch: 97 training_loss 4.06573944568634 test_loss: 4.131603622436524
epoch: 98 training_loss 4.109284157752991 test_loss: 4.0077659606933596
epoch: 99 training_loss 3.980027129650116 test_loss: 4.048661422729492
epoch: 100 training_loss 3.9664838790893553 test_loss: 3.914590835571289
epoch: 101 training_loss 4.3735262751579285 test_loss: 4.406067657470703
epoch: 102 training_loss 3.8871258091926575 test_loss: 3.9863162994384767
epoch: 103 training_loss 3.956521735191345 test_loss: 3.9816123962402346
epoch: 104 training_loss 3.977360117435455 test_loss: 3.921855926513672
epoch: 105 training_loss 3.8638853931427004 test_loss: 3.949142837524414
epoch: 106 training_loss 3.991991581916809 test_loss: 3.770320510864258
epoch: 107 training_loss 4.074935512542725 test_loss: 3.8176002502441406
epoch: 108 training_loss 3.8327482199668883 test_loss: 3.9851741790771484
epoch: 109 training_loss 3.9332241797447205 test_loss: 3.853913116455078
epoch: 110 training_loss 4.006248049736023 test_loss: 4.8599906921386715
epoch: 111 training_loss 3.9688832902908326 test_loss: 3.8851165771484375
epoch: 112 training_loss 3.883883686065674 test_loss: 4.551829528808594
epoch: 113 training_loss 3.98716822385788 test_loss: 4.048805236816406
epoch: 114 training_loss 3.808984360694885 test_loss: 3.7912708282470704
epoch: 115 training_loss 3.7876299834251403 test_loss: 3.956402587890625
epoch: 116 training_loss 3.937542004585266 test_loss: 3.9029136657714845
epoch: 117 training_loss 3.846151955127716 test_loss: 3.5433521270751953
epoch: 118 training_loss 3.813666579723358 test_loss: 3.974025344848633
epoch: 119 training_loss 3.763344740867615 test_loss: 3.9794033050537108
epoch: 120 training_loss 3.8359551501274107 test_loss: 3.5493000030517576
epoch: 121 training_loss 3.951519980430603 test_loss: 3.758335494995117
epoch: 122 training_loss 3.775532262325287 test_loss: 3.786937713623047
epoch: 123 training_loss 3.80559974193573 test_loss: 3.5318626403808593
epoch: 124 training_loss 3.81586243391037 test_loss: 3.698212432861328
epoch: 125 training_loss 3.6652603554725647 test_loss: 3.830032730102539
epoch: 126 training_loss 3.76061110496521 test_loss: 3.7378353118896483
epoch: 127 training_loss 3.690367805957794 test_loss: 3.9279830932617186
epoch: 128 training_loss 3.779550364017487 test_loss: 3.936764144897461
epoch: 129 training_loss 3.719321131706238 test_loss: 3.9057601928710937
epoch: 130 training_loss 3.679938564300537 test_loss: 3.8359294891357423
epoch: 131 training_loss 3.706268286705017 test_loss: 3.799458312988281
epoch: 132 training_loss 3.6803900051116942 test_loss: 3.59039306640625
epoch: 133 training_loss 3.6697139620780943 test_loss: 3.663946533203125
epoch: 134 training_loss 3.672658152580261 test_loss: 3.579182815551758
epoch: 135 training_loss 3.879569997787476 test_loss: 3.7852081298828124
epoch: 136 training_loss 3.7420798349380493 test_loss: 3.732711410522461
epoch: 137 training_loss 3.585556151866913 test_loss: 3.657151794433594
epoch: 138 training_loss 3.6513446307182313 test_loss: 3.662047576904297
epoch: 139 training_loss 3.6005744099617005 test_loss: 3.5422183990478517
epoch: 140 training_loss 3.6171215653419493 test_loss: 3.939104461669922
epoch: 141 training_loss 3.6713887739181517 test_loss: 3.7584197998046873
epoch: 142 training_loss 3.5284275150299074 test_loss: 3.5290058135986326
epoch: 143 training_loss 3.4025928378105164 test_loss: 3.6704883575439453
epoch: 144 training_loss 3.667237777709961 test_loss: 3.4063671112060545
epoch: 145 training_loss 3.4893315052986145 test_loss: 3.6724197387695314
epoch: 146 training_loss 3.4723807334899903 test_loss: 3.4800189971923827
epoch: 147 training_loss 3.6620858240127565 test_loss: 3.504054641723633
epoch: 148 training_loss 3.4897651529312133 test_loss: 3.6646774291992186
epoch: 149 training_loss 3.4789367508888245 test_loss: 3.586153030395508
63.91491139347888
episode: 0 training return: tensor(64.6434, device='cuda:0')
episode: 1 training return: tensor(43.5394, device='cuda:0')
episode: 2 training return: tensor(43.0486, device='cuda:0')
episode: 3 training return: tensor(62.8427, device='cuda:0')
epoch: 1 test_true_pfm: 102.68283047880486 sim_pfm: 51.61048273997731
episode: 4 training return: tensor(10.1325, device='cuda:0')
episode: 5 training return: tensor(50.1547, device='cuda:0')
episode: 6 training return: tensor(71.4845, device='cuda:0')
episode: 7 training return: tensor(50.0561, device='cuda:0')
epoch: 2 test_true_pfm: 102.01841321816211 sim_pfm: 65.21504513942054
episode: 8 training return: tensor(44.2184, device='cuda:0')
episode: 9 training return: tensor(11.6349, device='cuda:0')
episode: 10 training return: tensor(8.6993, device='cuda:0')
episode: 11 training return: tensor(11.7276, device='cuda:0')
epoch: 3 test_true_pfm: 100.775840311407 sim_pfm: 50.95398541954346
episode: 12 training return: tensor(40.7138, device='cuda:0')
episode: 13 training return: tensor(75.8721, device='cuda:0')
episode: 14 training return: tensor(79.8239, device='cuda:0')
episode: 15 training return: tensor(58.7373, device='cuda:0')
epoch: 4 test_true_pfm: 78.56758456691551 sim_pfm: 51.1489642227185
episode: 16 training return: tensor(52.3288, device='cuda:0')
episode: 17 training return: tensor(72.3213, device='cuda:0')
episode: 18 training return: tensor(13.0287, device='cuda:0')
episode: 19 training return: tensor(14.2584, device='cuda:0')
epoch: 5 test_true_pfm: 81.60481453794925 sim_pfm: 33.735483448154994
episode: 20 training return: tensor(72.7946, device='cuda:0')
episode: 21 training return: tensor(81.7852, device='cuda:0')
episode: 22 training return: tensor(38.3218, device='cuda:0')
episode: 23 training return: tensor(36.1696, device='cuda:0')
epoch: 6 test_true_pfm: 103.95949313797011 sim_pfm: 55.26538197104819
episode: 24 training return: tensor(80.7521, device='cuda:0')
episode: 25 training return: tensor(10.0986, device='cuda:0')
episode: 26 training return: tensor(81.7464, device='cuda:0')
episode: 27 training return: tensor(83.1095, device='cuda:0')
epoch: 7 test_true_pfm: 86.21343017490013 sim_pfm: 70.13079995453009
episode: 28 training return: tensor(42.7743, device='cuda:0')
episode: 29 training return: tensor(84.0725, device='cuda:0')
episode: 30 training return: tensor(45.6943, device='cuda:0')
episode: 31 training return: tensor(38.6966, device='cuda:0')
epoch: 8 test_true_pfm: 103.79683751052755 sim_pfm: 55.859347719227664
episode: 32 training return: tensor(41.0293, device='cuda:0')
episode: 33 training return: tensor(79.9247, device='cuda:0')
episode: 34 training return: tensor(85.6759, device='cuda:0')
episode: 35 training return: tensor(46.0701, device='cuda:0')
epoch: 9 test_true_pfm: 119.5324039673342 sim_pfm: 66.41183535809978
episode: 36 training return: tensor(73.0242, device='cuda:0')
episode: 37 training return: tensor(78.0346, device='cuda:0')
episode: 38 training return: tensor(80.9143, device='cuda:0')
episode: 39 training return: tensor(83.8661, device='cuda:0')
epoch: 10 test_true_pfm: 117.65117759255813 sim_pfm: 65.55684904842056
episode: 40 training return: tensor(85.3457, device='cuda:0')
episode: 41 training return: tensor(79.2096, device='cuda:0')
episode: 42 training return: tensor(84.0836, device='cuda:0')
episode: 43 training return: tensor(85.0696, device='cuda:0')
epoch: 11 test_true_pfm: 125.00566569368179 sim_pfm: 80.79627306624897
episode: 44 training return: tensor(12.3622, device='cuda:0')
episode: 45 training return: tensor(44.5347, device='cuda:0')
episode: 46 training return: tensor(42.9013, device='cuda:0')
episode: 47 training return: tensor(82.6230, device='cuda:0')
epoch: 12 test_true_pfm: 115.1297129275913 sim_pfm: 73.6886162493145
episode: 48 training return: tensor(88.3900, device='cuda:0')
episode: 49 training return: tensor(9.2438, device='cuda:0')
episode: 50 training return: tensor(82.0578, device='cuda:0')
episode: 51 training return: tensor(79.1260, device='cuda:0')
epoch: 13 test_true_pfm: 129.68553633612441 sim_pfm: 55.12804848723463
episode: 52 training return: tensor(77.7277, device='cuda:0')
episode: 53 training return: tensor(84.3332, device='cuda:0')
episode: 54 training return: tensor(77.5637, device='cuda:0')
episode: 55 training return: tensor(87.1086, device='cuda:0')
epoch: 14 test_true_pfm: 92.07058092066947 sim_pfm: 49.415604501281635
episode: 56 training return: tensor(46.4515, device='cuda:0')
episode: 57 training return: tensor(83.6029, device='cuda:0')
episode: 58 training return: tensor(75.1664, device='cuda:0')
episode: 59 training return: tensor(25.4801, device='cuda:0')
epoch: 15 test_true_pfm: 80.82232132039029 sim_pfm: 58.444643565651496
episode: 60 training return: tensor(81.4579, device='cuda:0')
episode: 61 training return: tensor(40.9919, device='cuda:0')
episode: 62 training return: tensor(78.5679, device='cuda:0')
episode: 63 training return: tensor(14.6871, device='cuda:0')
epoch: 16 test_true_pfm: 111.35310830951312 sim_pfm: 37.09155872482806
episode: 64 training return: tensor(44.6172, device='cuda:0')
episode: 65 training return: tensor(46.4205, device='cuda:0')
episode: 66 training return: tensor(55.1003, device='cuda:0')
episode: 67 training return: tensor(44.5942, device='cuda:0')
epoch: 17 test_true_pfm: 112.20402474050177 sim_pfm: 72.17830819950322
episode: 68 training return: tensor(78.7348, device='cuda:0')
episode: 69 training return: tensor(65.6292, device='cuda:0')
episode: 70 training return: tensor(82.0625, device='cuda:0')
episode: 71 training return: tensor(80.5398, device='cuda:0')
epoch: 18 test_true_pfm: 110.39731584434685 sim_pfm: 34.17662276969058
episode: 72 training return: tensor(12.0361, device='cuda:0')
episode: 73 training return: tensor(79.9274, device='cuda:0')
episode: 74 training return: tensor(46.8146, device='cuda:0')
episode: 75 training return: tensor(86.4828, device='cuda:0')
epoch: 19 test_true_pfm: 109.80410769457292 sim_pfm: 74.4885782327503
episode: 76 training return: tensor(77.4569, device='cuda:0')
episode: 77 training return: tensor(66.1307, device='cuda:0')
episode: 78 training return: tensor(87.4573, device='cuda:0')
episode: 79 training return: tensor(81.9592, device='cuda:0')
epoch: 20 test_true_pfm: 115.93221131718255 sim_pfm: 70.9486372076266
episode: 80 training return: tensor(80.7807, device='cuda:0')
episode: 81 training return: tensor(51.3231, device='cuda:0')
episode: 82 training return: tensor(76.2146, device='cuda:0')
episode: 83 training return: tensor(71.3658, device='cuda:0')
epoch: 21 test_true_pfm: 95.92457866034675 sim_pfm: 63.58699003090733
episode: 84 training return: tensor(13.2846, device='cuda:0')
episode: 85 training return: tensor(78.8479, device='cuda:0')
episode: 86 training return: tensor(82.1381, device='cuda:0')
episode: 87 training return: tensor(69.3541, device='cuda:0')
epoch: 22 test_true_pfm: 123.55116440823447 sim_pfm: 58.49824985089945
episode: 88 training return: tensor(89.6691, device='cuda:0')
episode: 89 training return: tensor(85.5562, device='cuda:0')
episode: 90 training return: tensor(61.5430, device='cuda:0')
episode: 91 training return: tensor(82.1690, device='cuda:0')
epoch: 23 test_true_pfm: 95.22697950402983 sim_pfm: 53.661335023638095
episode: 92 training return: tensor(86.0293, device='cuda:0')
episode: 93 training return: tensor(87.4415, device='cuda:0')
episode: 94 training return: tensor(44.2337, device='cuda:0')
episode: 95 training return: tensor(44.5508, device='cuda:0')
epoch: 24 test_true_pfm: 112.14390416386682 sim_pfm: 58.021999979199606
episode: 96 training return: tensor(69.7204, device='cuda:0')
episode: 97 training return: tensor(84.9103, device='cuda:0')
episode: 98 training return: tensor(11.6569, device='cuda:0')
episode: 99 training return: tensor(15.1431, device='cuda:0')
epoch: 25 test_true_pfm: 58.81707013252126 sim_pfm: 34.277576511766526
episode: 100 training return: tensor(11.5268, device='cuda:0')
episode: 101 training return: tensor(13.3177, device='cuda:0')
episode: 102 training return: tensor(82.7982, device='cuda:0')
episode: 103 training return: tensor(62.6520, device='cuda:0')
epoch: 26 test_true_pfm: 65.445535517439 sim_pfm: 14.809363826585468
episode: 104 training return: tensor(38.5625, device='cuda:0')
episode: 105 training return: tensor(8.3192, device='cuda:0')
episode: 106 training return: tensor(47.3974, device='cuda:0')
episode: 107 training return: tensor(44.3922, device='cuda:0')
epoch: 27 test_true_pfm: 108.72201626884831 sim_pfm: 29.45908881817013
episode: 108 training return: tensor(85.1932, device='cuda:0')
episode: 109 training return: tensor(38.6949, device='cuda:0')
episode: 110 training return: tensor(10.6411, device='cuda:0')
episode: 111 training return: tensor(37.0969, device='cuda:0')
epoch: 28 test_true_pfm: 66.78373301018027 sim_pfm: 42.68908282686025
episode: 112 training return: tensor(13.2910, device='cuda:0')
episode: 113 training return: tensor(45.6775, device='cuda:0')
episode: 114 training return: tensor(83.2636, device='cuda:0')
episode: 115 training return: tensor(11.5558, device='cuda:0')
epoch: 29 test_true_pfm: 84.62838158047433 sim_pfm: 45.29451103575993
episode: 116 training return: tensor(47.1632, device='cuda:0')
episode: 117 training return: tensor(41.8232, device='cuda:0')
episode: 118 training return: tensor(42.3446, device='cuda:0')
episode: 119 training return: tensor(69.8584, device='cuda:0')
epoch: 30 test_true_pfm: 108.57999157729944 sim_pfm: 64.40808637546142
episode: 120 training return: tensor(29.2870, device='cuda:0')
episode: 121 training return: tensor(39.9218, device='cuda:0')
episode: 122 training return: tensor(72.6287, device='cuda:0')
episode: 123 training return: tensor(40.8955, device='cuda:0')
epoch: 31 test_true_pfm: 99.99838763794295 sim_pfm: 64.19180982502294
episode: 124 training return: tensor(66.3308, device='cuda:0')
episode: 125 training return: tensor(32.0544, device='cuda:0')
episode: 126 training return: tensor(31.9655, device='cuda:0')
episode: 127 training return: tensor(43.1012, device='cuda:0')
epoch: 32 test_true_pfm: 133.88476462922165 sim_pfm: 78.09515188115184
episode: 128 training return: tensor(47.4446, device='cuda:0')
episode: 129 training return: tensor(82.2163, device='cuda:0')
episode: 130 training return: tensor(46.3759, device='cuda:0')
episode: 131 training return: tensor(66.0964, device='cuda:0')
epoch: 33 test_true_pfm: 94.76874109512391 sim_pfm: 61.61084662397625
episode: 132 training return: tensor(82.5813, device='cuda:0')
episode: 133 training return: tensor(54.9589, device='cuda:0')
episode: 134 training return: tensor(79.8071, device='cuda:0')
episode: 135 training return: tensor(44.1461, device='cuda:0')
epoch: 34 test_true_pfm: 116.44078898280421 sim_pfm: 82.82096018411684
episode: 136 training return: tensor(82.8229, device='cuda:0')
episode: 137 training return: tensor(49.0210, device='cuda:0')
episode: 138 training return: tensor(51.0496, device='cuda:0')
episode: 139 training return: tensor(47.5046, device='cuda:0')
epoch: 35 test_true_pfm: 87.0303661629732 sim_pfm: 73.84939397238777
episode: 140 training return: tensor(13.2385, device='cuda:0')
episode: 141 training return: tensor(45.6457, device='cuda:0')
episode: 142 training return: tensor(59.9160, device='cuda:0')
episode: 143 training return: tensor(87.8503, device='cuda:0')
epoch: 36 test_true_pfm: 94.17353947426788 sim_pfm: 72.42517655707198
episode: 144 training return: tensor(87.9207, device='cuda:0')
episode: 145 training return: tensor(13.0163, device='cuda:0')
episode: 146 training return: tensor(73.6871, device='cuda:0')
episode: 147 training return: tensor(86.0478, device='cuda:0')
epoch: 37 test_true_pfm: 126.56618270729044 sim_pfm: 57.93167661885964
episode: 148 training return: tensor(79.3771, device='cuda:0')
episode: 149 training return: tensor(48.7023, device='cuda:0')
episode: 150 training return: tensor(45.4986, device='cuda:0')
episode: 151 training return: tensor(82.8441, device='cuda:0')
epoch: 38 test_true_pfm: 103.37080007834948 sim_pfm: 41.00599113260978
episode: 152 training return: tensor(69.7910, device='cuda:0')
episode: 153 training return: tensor(78.7840, device='cuda:0')
episode: 154 training return: tensor(81.3186, device='cuda:0')
episode: 155 training return: tensor(46.9543, device='cuda:0')
epoch: 39 test_true_pfm: 120.49642994699052 sim_pfm: 71.57151010253583
episode: 156 training return: tensor(13.1931, device='cuda:0')
episode: 157 training return: tensor(57.5224, device='cuda:0')
episode: 158 training return: tensor(42.9929, device='cuda:0')
episode: 159 training return: tensor(75.1524, device='cuda:0')
epoch: 40 test_true_pfm: 114.8566781266313 sim_pfm: 75.46136777937063
episode: 160 training return: tensor(16.6374, device='cuda:0')
episode: 161 training return: tensor(80.7416, device='cuda:0')
episode: 162 training return: tensor(59.8482, device='cuda:0')
episode: 163 training return: tensor(80.0040, device='cuda:0')
epoch: 41 test_true_pfm: 110.7003313644806 sim_pfm: 58.46463933159248
episode: 164 training return: tensor(56.8358, device='cuda:0')
episode: 165 training return: tensor(75.8622, device='cuda:0')
episode: 166 training return: tensor(81.6936, device='cuda:0')
episode: 167 training return: tensor(36.0601, device='cuda:0')
epoch: 42 test_true_pfm: 107.32730711875577 sim_pfm: 77.87701639602892
episode: 168 training return: tensor(86.1215, device='cuda:0')
episode: 169 training return: tensor(65.5476, device='cuda:0')
episode: 170 training return: tensor(66.5156, device='cuda:0')
episode: 171 training return: tensor(92.9791, device='cuda:0')
epoch: 43 test_true_pfm: 114.09517614026849 sim_pfm: 73.42988198643434
episode: 172 training return: tensor(90.9656, device='cuda:0')
episode: 173 training return: tensor(82.7603, device='cuda:0')
episode: 174 training return: tensor(82.4058, device='cuda:0')
episode: 175 training return: tensor(87.0555, device='cuda:0')
epoch: 44 test_true_pfm: 127.33873664330304 sim_pfm: 78.13904698541155
episode: 176 training return: tensor(84.4833, device='cuda:0')
episode: 177 training return: tensor(31.2241, device='cuda:0')
episode: 178 training return: tensor(10.1508, device='cuda:0')
episode: 179 training return: tensor(84.4048, device='cuda:0')
epoch: 45 test_true_pfm: 108.49987350531342 sim_pfm: 61.57210873531294
episode: 180 training return: tensor(64.6817, device='cuda:0')
episode: 181 training return: tensor(74.1390, device='cuda:0')
episode: 182 training return: tensor(46.3090, device='cuda:0')
episode: 183 training return: tensor(82.9895, device='cuda:0')
epoch: 46 test_true_pfm: 122.01987172740753 sim_pfm: 73.0529047854885
episode: 184 training return: tensor(87.6367, device='cuda:0')
episode: 185 training return: tensor(82.7872, device='cuda:0')
episode: 186 training return: tensor(84.1981, device='cuda:0')
episode: 187 training return: tensor(78.8029, device='cuda:0')
epoch: 47 test_true_pfm: 121.26231358666625 sim_pfm: 70.7506710231537
episode: 188 training return: tensor(76.4363, device='cuda:0')
episode: 189 training return: tensor(84.6393, device='cuda:0')
episode: 190 training return: tensor(47.9618, device='cuda:0')
episode: 191 training return: tensor(84.8443, device='cuda:0')
epoch: 48 test_true_pfm: 125.1043713834786 sim_pfm: 79.90359936675523
episode: 192 training return: tensor(86.2099, device='cuda:0')
episode: 193 training return: tensor(83.5816, device='cuda:0')
episode: 194 training return: tensor(89.8335, device='cuda:0')
episode: 195 training return: tensor(76.1303, device='cuda:0')
epoch: 49 test_true_pfm: 126.26283688237083 sim_pfm: 78.25898029576055
episode: 196 training return: tensor(69.8931, device='cuda:0')
episode: 197 training return: tensor(69.1736, device='cuda:0')
episode: 198 training return: tensor(54.3377, device='cuda:0')
episode: 199 training return: tensor(83.3564, device='cuda:0')
epoch: 50 test_true_pfm: 106.84031754470178 sim_pfm: 74.13127256958978
episode: 200 training return: tensor(88.2162, device='cuda:0')
episode: 201 training return: tensor(3.4149, device='cuda:0')
episode: 202 training return: tensor(83.5580, device='cuda:0')
episode: 203 training return: tensor(72.7307, device='cuda:0')
epoch: 51 test_true_pfm: 117.15512425538927 sim_pfm: 77.14491820079857
episode: 204 training return: tensor(76.1715, device='cuda:0')
episode: 205 training return: tensor(74.1613, device='cuda:0')
episode: 206 training return: tensor(14.7681, device='cuda:0')
episode: 207 training return: tensor(86.8783, device='cuda:0')
epoch: 52 test_true_pfm: 126.15947996743964 sim_pfm: 75.67960722975549
episode: 208 training return: tensor(85.2351, device='cuda:0')
episode: 209 training return: tensor(80.4887, device='cuda:0')
episode: 210 training return: tensor(13.7442, device='cuda:0')
episode: 211 training return: tensor(47.0853, device='cuda:0')
epoch: 53 test_true_pfm: 111.7528234800936 sim_pfm: 85.57558496688144
episode: 212 training return: tensor(78.5026, device='cuda:0')
episode: 213 training return: tensor(75.0983, device='cuda:0')
episode: 214 training return: tensor(84.6367, device='cuda:0')
episode: 215 training return: tensor(84.4534, device='cuda:0')
epoch: 54 test_true_pfm: 117.20300971925545 sim_pfm: 84.84267386628781
episode: 216 training return: tensor(82.7358, device='cuda:0')
episode: 217 training return: tensor(71.8130, device='cuda:0')
episode: 218 training return: tensor(85.5606, device='cuda:0')
episode: 219 training return: tensor(56.3222, device='cuda:0')
epoch: 55 test_true_pfm: 115.96271560838068 sim_pfm: 76.52838496176409
episode: 220 training return: tensor(52.8077, device='cuda:0')
episode: 221 training return: tensor(76.0900, device='cuda:0')
episode: 222 training return: tensor(84.6906, device='cuda:0')
episode: 223 training return: tensor(82.7951, device='cuda:0')
epoch: 56 test_true_pfm: 127.00855137388854 sim_pfm: 73.38434816363733
episode: 224 training return: tensor(77.0003, device='cuda:0')
episode: 225 training return: tensor(55.0599, device='cuda:0')
episode: 226 training return: tensor(72.4287, device='cuda:0')
episode: 227 training return: tensor(75.3440, device='cuda:0')
epoch: 57 test_true_pfm: 111.1001068118389 sim_pfm: 79.45150456085102
episode: 228 training return: tensor(71.0524, device='cuda:0')
episode: 229 training return: tensor(11.2602, device='cuda:0')
episode: 230 training return: tensor(54.9967, device='cuda:0')
episode: 231 training return: tensor(63.9492, device='cuda:0')
epoch: 58 test_true_pfm: 125.23445163895398 sim_pfm: 78.43574892568286
episode: 232 training return: tensor(43.9977, device='cuda:0')
episode: 233 training return: tensor(85.3210, device='cuda:0')
episode: 234 training return: tensor(47.0892, device='cuda:0')
episode: 235 training return: tensor(50.8426, device='cuda:0')
epoch: 59 test_true_pfm: 117.37900879140084 sim_pfm: 79.06013402913231
episode: 236 training return: tensor(3.1723, device='cuda:0')
episode: 237 training return: tensor(79.6786, device='cuda:0')
episode: 238 training return: tensor(50.9588, device='cuda:0')
episode: 239 training return: tensor(77.9527, device='cuda:0')
epoch: 60 test_true_pfm: 120.5416653097204 sim_pfm: 77.94999636545545
episode: 240 training return: tensor(78.7936, device='cuda:0')
episode: 241 training return: tensor(86.8908, device='cuda:0')
episode: 242 training return: tensor(82.6511, device='cuda:0')
episode: 243 training return: tensor(86.5124, device='cuda:0')
epoch: 61 test_true_pfm: 117.04230624648531 sim_pfm: 78.25668541807681
episode: 244 training return: tensor(76.8056, device='cuda:0')
episode: 245 training return: tensor(58.0377, device='cuda:0')
episode: 246 training return: tensor(5.4368, device='cuda:0')
episode: 247 training return: tensor(85.1799, device='cuda:0')
epoch: 62 test_true_pfm: 125.6284559175006 sim_pfm: 75.72723265402601
episode: 248 training return: tensor(86.6112, device='cuda:0')
episode: 249 training return: tensor(88.4005, device='cuda:0')
episode: 250 training return: tensor(69.6414, device='cuda:0')
episode: 251 training return: tensor(56.6302, device='cuda:0')
epoch: 63 test_true_pfm: 119.8668333909537 sim_pfm: 83.87603399295476
episode: 252 training return: tensor(45.9509, device='cuda:0')
episode: 253 training return: tensor(79.4227, device='cuda:0')
episode: 254 training return: tensor(12.2944, device='cuda:0')
episode: 255 training return: tensor(70.9485, device='cuda:0')
epoch: 64 test_true_pfm: 123.54771712334897 sim_pfm: 73.38057724883546
episode: 256 training return: tensor(79.4445, device='cuda:0')
episode: 257 training return: tensor(73.1781, device='cuda:0')
episode: 258 training return: tensor(77.6349, device='cuda:0')
episode: 259 training return: tensor(75.4545, device='cuda:0')
epoch: 65 test_true_pfm: 113.50034542110959 sim_pfm: 80.75828859023167
episode: 260 training return: tensor(80.5458, device='cuda:0')
episode: 261 training return: tensor(86.9108, device='cuda:0')
episode: 262 training return: tensor(81.9678, device='cuda:0')
episode: 263 training return: tensor(55.4766, device='cuda:0')
epoch: 66 test_true_pfm: 125.22185900588345 sim_pfm: 78.0169990288443
episode: 264 training return: tensor(85.0235, device='cuda:0')
episode: 265 training return: tensor(47.5036, device='cuda:0')
episode: 266 training return: tensor(93.1272, device='cuda:0')
episode: 267 training return: tensor(81.7239, device='cuda:0')
epoch: 67 test_true_pfm: 118.0006302674935 sim_pfm: 81.72524073269452
episode: 268 training return: tensor(56.4699, device='cuda:0')
episode: 269 training return: tensor(74.6131, device='cuda:0')
episode: 270 training return: tensor(55.4230, device='cuda:0')
episode: 271 training return: tensor(59.9282, device='cuda:0')
epoch: 68 test_true_pfm: 117.14681168926072 sim_pfm: 82.19320066756336
episode: 272 training return: tensor(55.2401, device='cuda:0')
episode: 273 training return: tensor(13.8296, device='cuda:0')
episode: 274 training return: tensor(68.3571, device='cuda:0')
episode: 275 training return: tensor(80.4362, device='cuda:0')
epoch: 69 test_true_pfm: 116.64239525110085 sim_pfm: 79.41709509452339
episode: 276 training return: tensor(88.0242, device='cuda:0')
episode: 277 training return: tensor(59.9435, device='cuda:0')
episode: 278 training return: tensor(85.4221, device='cuda:0')
episode: 279 training return: tensor(77.1464, device='cuda:0')
epoch: 70 test_true_pfm: 116.6083869772257 sim_pfm: 69.63607265535975
episode: 280 training return: tensor(85.9683, device='cuda:0')
episode: 281 training return: tensor(69.7826, device='cuda:0')
episode: 282 training return: tensor(78.7409, device='cuda:0')
episode: 283 training return: tensor(81.3047, device='cuda:0')
epoch: 71 test_true_pfm: 122.98939137248779 sim_pfm: 79.34236043361598
episode: 284 training return: tensor(81.3753, device='cuda:0')
episode: 285 training return: tensor(80.8251, device='cuda:0')
episode: 286 training return: tensor(67.0359, device='cuda:0')
episode: 287 training return: tensor(69.8885, device='cuda:0')
epoch: 72 test_true_pfm: 117.5683864707594 sim_pfm: 75.05545387397869
episode: 288 training return: tensor(82.3652, device='cuda:0')
episode: 289 training return: tensor(85.9059, device='cuda:0')
episode: 290 training return: tensor(79.9114, device='cuda:0')
episode: 291 training return: tensor(86.8336, device='cuda:0')
epoch: 73 test_true_pfm: 122.11139080881799 sim_pfm: 84.84806435351493
episode: 292 training return: tensor(71.8118, device='cuda:0')
episode: 293 training return: tensor(75.4146, device='cuda:0')
episode: 294 training return: tensor(72.2543, device='cuda:0')
episode: 295 training return: tensor(76.9481, device='cuda:0')
epoch: 74 test_true_pfm: 119.82876873230722 sim_pfm: 63.97603582531447
episode: 296 training return: tensor(15.4206, device='cuda:0')
episode: 297 training return: tensor(77.2128, device='cuda:0')
episode: 298 training return: tensor(82.3175, device='cuda:0')
episode: 299 training return: tensor(74.6590, device='cuda:0')
epoch: 75 test_true_pfm: 98.75724735617109 sim_pfm: 70.18410006526975
episode: 300 training return: tensor(50.0638, device='cuda:0')
episode: 301 training return: tensor(46.9509, device='cuda:0')
episode: 302 training return: tensor(11.7382, device='cuda:0')
episode: 303 training return: tensor(86.1286, device='cuda:0')
epoch: 76 test_true_pfm: 116.01066952949296 sim_pfm: 75.08158033499494
episode: 304 training return: tensor(55.7333, device='cuda:0')
episode: 305 training return: tensor(82.1661, device='cuda:0')
episode: 306 training return: tensor(75.0111, device='cuda:0')
episode: 307 training return: tensor(85.0349, device='cuda:0')
epoch: 77 test_true_pfm: 113.69241080897648 sim_pfm: 73.70525977968937
episode: 308 training return: tensor(48.9228, device='cuda:0')
episode: 309 training return: tensor(54.0622, device='cuda:0')
episode: 310 training return: tensor(84.3067, device='cuda:0')
episode: 311 training return: tensor(56.2057, device='cuda:0')
epoch: 78 test_true_pfm: 117.0974483451154 sim_pfm: 83.3939384887286
episode: 312 training return: tensor(74.8989, device='cuda:0')
episode: 313 training return: tensor(69.9349, device='cuda:0')
episode: 314 training return: tensor(75.9472, device='cuda:0')
episode: 315 training return: tensor(90.2510, device='cuda:0')
epoch: 79 test_true_pfm: 112.4464845439115 sim_pfm: 84.06993610304198
episode: 316 training return: tensor(80.2510, device='cuda:0')
episode: 317 training return: tensor(80.2034, device='cuda:0')
episode: 318 training return: tensor(63.6707, device='cuda:0')
episode: 319 training return: tensor(12.4590, device='cuda:0')
epoch: 80 test_true_pfm: 116.93129419279292 sim_pfm: 71.78043815465062
episode: 320 training return: tensor(87.7307, device='cuda:0')
episode: 321 training return: tensor(87.8996, device='cuda:0')
episode: 322 training return: tensor(68.0338, device='cuda:0')
episode: 323 training return: tensor(69.1415, device='cuda:0')
epoch: 81 test_true_pfm: 116.79130383633267 sim_pfm: 76.73757224539295
episode: 324 training return: tensor(79.9384, device='cuda:0')
episode: 325 training return: tensor(75.3324, device='cuda:0')
episode: 326 training return: tensor(91.3913, device='cuda:0')
episode: 327 training return: tensor(47.2647, device='cuda:0')
epoch: 82 test_true_pfm: 120.6341426709732 sim_pfm: 77.70685831949814
episode: 328 training return: tensor(86.7008, device='cuda:0')
episode: 329 training return: tensor(82.4026, device='cuda:0')
episode: 330 training return: tensor(72.7912, device='cuda:0')
episode: 331 training return: tensor(71.5001, device='cuda:0')
epoch: 83 test_true_pfm: 121.67077960519723 sim_pfm: 75.52491994603187
episode: 332 training return: tensor(77.5000, device='cuda:0')
episode: 333 training return: tensor(41.6325, device='cuda:0')
episode: 334 training return: tensor(60.2020, device='cuda:0')
episode: 335 training return: tensor(57.3189, device='cuda:0')
epoch: 84 test_true_pfm: 129.01816916654872 sim_pfm: 69.68790095236037
episode: 336 training return: tensor(73.1033, device='cuda:0')
episode: 337 training return: tensor(84.9479, device='cuda:0')
episode: 338 training return: tensor(77.1562, device='cuda:0')
episode: 339 training return: tensor(67.6953, device='cuda:0')
epoch: 85 test_true_pfm: 113.15355004709542 sim_pfm: 73.14214907231508
episode: 340 training return: tensor(80.8815, device='cuda:0')
episode: 341 training return: tensor(82.5271, device='cuda:0')
episode: 342 training return: tensor(77.7376, device='cuda:0')
episode: 343 training return: tensor(81.2309, device='cuda:0')
epoch: 86 test_true_pfm: 121.00631355948993 sim_pfm: 73.80584377881023
episode: 344 training return: tensor(86.2316, device='cuda:0')
episode: 345 training return: tensor(50.2886, device='cuda:0')
episode: 346 training return: tensor(81.5349, device='cuda:0')
episode: 347 training return: tensor(54.7228, device='cuda:0')
epoch: 87 test_true_pfm: 113.71619435087 sim_pfm: 77.69313443677966
episode: 348 training return: tensor(77.9484, device='cuda:0')
episode: 349 training return: tensor(83.8823, device='cuda:0')
episode: 350 training return: tensor(85.0508, device='cuda:0')
episode: 351 training return: tensor(84.7098, device='cuda:0')
epoch: 88 test_true_pfm: 127.84227741017348 sim_pfm: 80.94954408789636
episode: 352 training return: tensor(72.7768, device='cuda:0')
episode: 353 training return: tensor(79.0941, device='cuda:0')
episode: 354 training return: tensor(95.1176, device='cuda:0')
episode: 355 training return: tensor(75.6132, device='cuda:0')
epoch: 89 test_true_pfm: 116.12272259027819 sim_pfm: 79.93052722003195
episode: 356 training return: tensor(86.4786, device='cuda:0')
episode: 357 training return: tensor(60.8106, device='cuda:0')
episode: 358 training return: tensor(71.3228, device='cuda:0')
episode: 359 training return: tensor(71.0943, device='cuda:0')
epoch: 90 test_true_pfm: 113.2949128795664 sim_pfm: 75.8253933693748
episode: 360 training return: tensor(50.6635, device='cuda:0')
episode: 361 training return: tensor(81.9500, device='cuda:0')
episode: 362 training return: tensor(48.6101, device='cuda:0')
episode: 363 training return: tensor(73.5651, device='cuda:0')
epoch: 91 test_true_pfm: 117.58097307372036 sim_pfm: 68.7335001251311
episode: 364 training return: tensor(66.4007, device='cuda:0')
episode: 365 training return: tensor(80.8753, device='cuda:0')
episode: 366 training return: tensor(81.3124, device='cuda:0')
episode: 367 training return: tensor(71.6910, device='cuda:0')
epoch: 92 test_true_pfm: 121.72727251289523 sim_pfm: 83.55373370379093
episode: 368 training return: tensor(81.7078, device='cuda:0')
episode: 369 training return: tensor(50.1998, device='cuda:0')
episode: 370 training return: tensor(78.9432, device='cuda:0')
episode: 371 training return: tensor(84.2720, device='cuda:0')
epoch: 93 test_true_pfm: 106.64822567410764 sim_pfm: 71.97200829479262
episode: 372 training return: tensor(61.1251, device='cuda:0')
episode: 373 training return: tensor(76.1793, device='cuda:0')
episode: 374 training return: tensor(85.8636, device='cuda:0')
episode: 375 training return: tensor(82.2988, device='cuda:0')
epoch: 94 test_true_pfm: 114.36743786721186 sim_pfm: 75.22098605736974
episode: 376 training return: tensor(51.5762, device='cuda:0')
episode: 377 training return: tensor(79.8158, device='cuda:0')
episode: 378 training return: tensor(84.1141, device='cuda:0')
episode: 379 training return: tensor(78.4517, device='cuda:0')
epoch: 95 test_true_pfm: 108.70997303224351 sim_pfm: 77.11984983990551
episode: 380 training return: tensor(72.2216, device='cuda:0')
episode: 381 training return: tensor(87.6591, device='cuda:0')
episode: 382 training return: tensor(85.8136, device='cuda:0')
episode: 383 training return: tensor(88.1556, device='cuda:0')
epoch: 96 test_true_pfm: 121.80069725623814 sim_pfm: 86.77584229924832
episode: 384 training return: tensor(82.8415, device='cuda:0')
episode: 385 training return: tensor(89.3206, device='cuda:0')
episode: 386 training return: tensor(81.0211, device='cuda:0')
episode: 387 training return: tensor(69.5513, device='cuda:0')
epoch: 97 test_true_pfm: 123.34752493847766 sim_pfm: 77.48592349034152
episode: 388 training return: tensor(76.8747, device='cuda:0')
episode: 389 training return: tensor(54.8546, device='cuda:0')
episode: 390 training return: tensor(73.2379, device='cuda:0')
episode: 391 training return: tensor(73.0953, device='cuda:0')
epoch: 98 test_true_pfm: 107.9403034244236 sim_pfm: 76.24133776095114
episode: 392 training return: tensor(80.6935, device='cuda:0')
episode: 393 training return: tensor(72.4821, device='cuda:0')
episode: 394 training return: tensor(87.3137, device='cuda:0')
episode: 395 training return: tensor(78.2415, device='cuda:0')
epoch: 99 test_true_pfm: 114.65154825668756 sim_pfm: 71.47929239578662
episode: 396 training return: tensor(72.4918, device='cuda:0')
episode: 397 training return: tensor(72.2824, device='cuda:0')
episode: 398 training return: tensor(87.4696, device='cuda:0')
episode: 399 training return: tensor(62.4683, device='cuda:0')
epoch: 100 test_true_pfm: 107.062588665781 sim_pfm: 67.84320767785539
episode: 400 training return: tensor(52.9548, device='cuda:0')
episode: 401 training return: tensor(64.7858, device='cuda:0')
episode: 402 training return: tensor(78.7772, device='cuda:0')
episode: 403 training return: tensor(79.6965, device='cuda:0')
epoch: 101 test_true_pfm: 109.7961375366021 sim_pfm: 69.03379180140328
episode: 404 training return: tensor(68.9559, device='cuda:0')
episode: 405 training return: tensor(69.2653, device='cuda:0')
episode: 406 training return: tensor(68.4910, device='cuda:0')
episode: 407 training return: tensor(71.9109, device='cuda:0')
epoch: 102 test_true_pfm: 107.24777991668722 sim_pfm: 75.17152985900756
episode: 408 training return: tensor(79.7276, device='cuda:0')
episode: 409 training return: tensor(62.2320, device='cuda:0')
episode: 410 training return: tensor(8.1830, device='cuda:0')
episode: 411 training return: tensor(44.3118, device='cuda:0')
epoch: 103 test_true_pfm: 112.03341084525478 sim_pfm: 83.50169261503615
episode: 412 training return: tensor(41.5503, device='cuda:0')
episode: 413 training return: tensor(81.6693, device='cuda:0')
episode: 414 training return: tensor(67.9450, device='cuda:0')
episode: 415 training return: tensor(55.0623, device='cuda:0')
epoch: 104 test_true_pfm: 116.12505623651013 sim_pfm: 68.94739254123415
episode: 416 training return: tensor(87.6048, device='cuda:0')
episode: 417 training return: tensor(69.1376, device='cuda:0')
episode: 418 training return: tensor(49.3260, device='cuda:0')
episode: 419 training return: tensor(65.0016, device='cuda:0')
epoch: 105 test_true_pfm: 123.67915195186796 sim_pfm: 77.78978469452704
episode: 420 training return: tensor(85.1990, device='cuda:0')
episode: 421 training return: tensor(13.4008, device='cuda:0')
episode: 422 training return: tensor(79.1446, device='cuda:0')
episode: 423 training return: tensor(81.9253, device='cuda:0')
epoch: 106 test_true_pfm: 120.54247630806219 sim_pfm: 82.26357326494181
episode: 424 training return: tensor(84.8957, device='cuda:0')
episode: 425 training return: tensor(72.3049, device='cuda:0')
episode: 426 training return: tensor(87.2943, device='cuda:0')
episode: 427 training return: tensor(88.2778, device='cuda:0')
epoch: 107 test_true_pfm: 119.3251413383924 sim_pfm: 77.71808600457152
episode: 428 training return: tensor(71.7949, device='cuda:0')
episode: 429 training return: tensor(81.8006, device='cuda:0')
episode: 430 training return: tensor(87.5005, device='cuda:0')
episode: 431 training return: tensor(76.2839, device='cuda:0')
epoch: 108 test_true_pfm: 110.73240687333484 sim_pfm: 73.53817601759219
episode: 432 training return: tensor(89.9196, device='cuda:0')
episode: 433 training return: tensor(76.7703, device='cuda:0')
episode: 434 training return: tensor(71.9291, device='cuda:0')
episode: 435 training return: tensor(75.0374, device='cuda:0')
epoch: 109 test_true_pfm: 112.40912413655758 sim_pfm: 76.1735731736815
episode: 436 training return: tensor(77.9931, device='cuda:0')
episode: 437 training return: tensor(83.2147, device='cuda:0')
episode: 438 training return: tensor(86.6533, device='cuda:0')
episode: 439 training return: tensor(74.3491, device='cuda:0')
epoch: 110 test_true_pfm: 95.92553970615515 sim_pfm: 85.67729823665577
episode: 440 training return: tensor(56.8156, device='cuda:0')
episode: 441 training return: tensor(78.8207, device='cuda:0')
episode: 442 training return: tensor(74.6747, device='cuda:0')
episode: 443 training return: tensor(75.4925, device='cuda:0')
epoch: 111 test_true_pfm: 114.66863574779686 sim_pfm: 78.18311069238698
episode: 444 training return: tensor(85.7427, device='cuda:0')
episode: 445 training return: tensor(81.4564, device='cuda:0')
episode: 446 training return: tensor(74.7603, device='cuda:0')
episode: 447 training return: tensor(87.3814, device='cuda:0')
epoch: 112 test_true_pfm: 105.86805990127691 sim_pfm: 68.71272080437629
episode: 448 training return: tensor(89.5424, device='cuda:0')
episode: 449 training return: tensor(81.4174, device='cuda:0')
episode: 450 training return: tensor(62.1148, device='cuda:0')
episode: 451 training return: tensor(90.0325, device='cuda:0')
epoch: 113 test_true_pfm: 97.14209897680777 sim_pfm: 64.02175098729204
episode: 452 training return: tensor(79.8385, device='cuda:0')
episode: 453 training return: tensor(70.6739, device='cuda:0')
episode: 454 training return: tensor(84.5269, device='cuda:0')
episode: 455 training return: tensor(68.2341, device='cuda:0')
epoch: 114 test_true_pfm: 112.31429059616934 sim_pfm: 85.82041836209828
episode: 456 training return: tensor(78.3137, device='cuda:0')
episode: 457 training return: tensor(87.6180, device='cuda:0')
episode: 458 training return: tensor(79.5751, device='cuda:0')
episode: 459 training return: tensor(76.6385, device='cuda:0')
epoch: 115 test_true_pfm: 113.49025042926625 sim_pfm: 83.82463481490268
episode: 460 training return: tensor(53.2499, device='cuda:0')
episode: 461 training return: tensor(68.2495, device='cuda:0')
episode: 462 training return: tensor(88.8134, device='cuda:0')
episode: 463 training return: tensor(89.2308, device='cuda:0')
epoch: 116 test_true_pfm: 120.9636273908292 sim_pfm: 85.07122848646831
episode: 464 training return: tensor(84.9895, device='cuda:0')
episode: 465 training return: tensor(83.8660, device='cuda:0')
episode: 466 training return: tensor(81.7188, device='cuda:0')
episode: 467 training return: tensor(77.4904, device='cuda:0')
epoch: 117 test_true_pfm: 122.27087465341023 sim_pfm: 65.85401557132136
episode: 468 training return: tensor(78.0088, device='cuda:0')
episode: 469 training return: tensor(85.8532, device='cuda:0')
episode: 470 training return: tensor(81.6850, device='cuda:0')
episode: 471 training return: tensor(69.2328, device='cuda:0')
epoch: 118 test_true_pfm: 122.046820418397 sim_pfm: 77.39627572636236
episode: 472 training return: tensor(80.5304, device='cuda:0')
episode: 473 training return: tensor(85.7782, device='cuda:0')
episode: 474 training return: tensor(83.0245, device='cuda:0')
episode: 475 training return: tensor(78.4460, device='cuda:0')
epoch: 119 test_true_pfm: 122.81020650406359 sim_pfm: 82.42907474021777
episode: 476 training return: tensor(80.0822, device='cuda:0')
episode: 477 training return: tensor(53.8234, device='cuda:0')
episode: 478 training return: tensor(26.5835, device='cuda:0')
episode: 479 training return: tensor(78.0507, device='cuda:0')
epoch: 120 test_true_pfm: 117.49485884326054 sim_pfm: 77.86856072890222
episode: 480 training return: tensor(84.1517, device='cuda:0')
episode: 481 training return: tensor(78.2220, device='cuda:0')
episode: 482 training return: tensor(88.6856, device='cuda:0')
episode: 483 training return: tensor(86.3360, device='cuda:0')
epoch: 121 test_true_pfm: 115.53852015019754 sim_pfm: 75.86786602967186
episode: 484 training return: tensor(81.4696, device='cuda:0')
episode: 485 training return: tensor(86.7759, device='cuda:0')
episode: 486 training return: tensor(75.6407, device='cuda:0')
episode: 487 training return: tensor(74.5253, device='cuda:0')
epoch: 122 test_true_pfm: 109.09564344938397 sim_pfm: 64.59871630814159
episode: 488 training return: tensor(90.5399, device='cuda:0')
episode: 489 training return: tensor(89.1705, device='cuda:0')
episode: 490 training return: tensor(74.9349, device='cuda:0')
episode: 491 training return: tensor(90.3702, device='cuda:0')
epoch: 123 test_true_pfm: 105.83793615927013 sim_pfm: 85.73096777892206
episode: 492 training return: tensor(83.6241, device='cuda:0')
episode: 493 training return: tensor(91.0234, device='cuda:0')
episode: 494 training return: tensor(72.0019, device='cuda:0')
episode: 495 training return: tensor(85.9272, device='cuda:0')
epoch: 124 test_true_pfm: 123.53134743423607 sim_pfm: 83.39525569029502
episode: 496 training return: tensor(79.4697, device='cuda:0')
episode: 497 training return: tensor(75.1835, device='cuda:0')
episode: 498 training return: tensor(83.4789, device='cuda:0')
episode: 499 training return: tensor(73.8210, device='cuda:0')
epoch: 125 test_true_pfm: 120.16189210698477 sim_pfm: 85.28728612508857
episode: 500 training return: tensor(89.4447, device='cuda:0')
episode: 501 training return: tensor(89.6095, device='cuda:0')
episode: 502 training return: tensor(86.0545, device='cuda:0')
episode: 503 training return: tensor(84.9781, device='cuda:0')
epoch: 126 test_true_pfm: 122.94782170520739 sim_pfm: 80.48328297974076
episode: 504 training return: tensor(77.4728, device='cuda:0')
episode: 505 training return: tensor(85.3650, device='cuda:0')
episode: 506 training return: tensor(86.8662, device='cuda:0')
episode: 507 training return: tensor(90.1139, device='cuda:0')
epoch: 127 test_true_pfm: 125.82674794677273 sim_pfm: 85.53262212691479
episode: 508 training return: tensor(7.2835, device='cuda:0')
episode: 509 training return: tensor(93.2745, device='cuda:0')
episode: 510 training return: tensor(88.3232, device='cuda:0')
episode: 511 training return: tensor(59.2294, device='cuda:0')
epoch: 128 test_true_pfm: 117.05021411337425 sim_pfm: 84.09994245067355
episode: 512 training return: tensor(79.8039, device='cuda:0')
episode: 513 training return: tensor(84.0240, device='cuda:0')
episode: 514 training return: tensor(77.2643, device='cuda:0')
episode: 515 training return: tensor(78.2490, device='cuda:0')
epoch: 129 test_true_pfm: 124.34105090317306 sim_pfm: 78.26289715332678
episode: 516 training return: tensor(79.9606, device='cuda:0')
episode: 517 training return: tensor(77.8355, device='cuda:0')
episode: 518 training return: tensor(61.8296, device='cuda:0')
episode: 519 training return: tensor(54.4477, device='cuda:0')
epoch: 130 test_true_pfm: 112.0732110538385 sim_pfm: 82.05208641111385
episode: 520 training return: tensor(78.0100, device='cuda:0')
episode: 521 training return: tensor(88.0797, device='cuda:0')
episode: 522 training return: tensor(72.3225, device='cuda:0')
episode: 523 training return: tensor(71.9073, device='cuda:0')
epoch: 131 test_true_pfm: 114.4148767649726 sim_pfm: 80.3793331930763
episode: 524 training return: tensor(87.2485, device='cuda:0')
episode: 525 training return: tensor(81.2737, device='cuda:0')
episode: 526 training return: tensor(72.5271, device='cuda:0')
episode: 527 training return: tensor(84.1938, device='cuda:0')
epoch: 132 test_true_pfm: 117.4421120522644 sim_pfm: 69.94854278701241
episode: 528 training return: tensor(59.7561, device='cuda:0')
episode: 529 training return: tensor(72.8034, device='cuda:0')
episode: 530 training return: tensor(75.1034, device='cuda:0')
episode: 531 training return: tensor(77.8625, device='cuda:0')
epoch: 133 test_true_pfm: 109.31901882317261 sim_pfm: 68.62861401506234
episode: 532 training return: tensor(73.3007, device='cuda:0')
episode: 533 training return: tensor(87.8920, device='cuda:0')
episode: 534 training return: tensor(72.0263, device='cuda:0')
episode: 535 training return: tensor(67.2295, device='cuda:0')
epoch: 134 test_true_pfm: 121.81443814057813 sim_pfm: 75.59787736847065
episode: 536 training return: tensor(66.9641, device='cuda:0')
episode: 537 training return: tensor(80.5963, device='cuda:0')
episode: 538 training return: tensor(81.4350, device='cuda:0')
episode: 539 training return: tensor(14.1765, device='cuda:0')
epoch: 135 test_true_pfm: 123.44328965454672 sim_pfm: 71.73033983221394
episode: 540 training return: tensor(90.2741, device='cuda:0')
episode: 541 training return: tensor(89.7134, device='cuda:0')
episode: 542 training return: tensor(42.0087, device='cuda:0')
episode: 543 training return: tensor(57.8756, device='cuda:0')
epoch: 136 test_true_pfm: 126.66084620573602 sim_pfm: 65.72243629295845
episode: 544 training return: tensor(85.7907, device='cuda:0')
episode: 545 training return: tensor(85.4267, device='cuda:0')
episode: 546 training return: tensor(56.8311, device='cuda:0')
episode: 547 training return: tensor(41.6754, device='cuda:0')
epoch: 137 test_true_pfm: 129.07925944285478 sim_pfm: 81.08374575595371
episode: 548 training return: tensor(85.6420, device='cuda:0')
episode: 549 training return: tensor(66.3096, device='cuda:0')
episode: 550 training return: tensor(86.6336, device='cuda:0')
episode: 551 training return: tensor(87.8884, device='cuda:0')
epoch: 138 test_true_pfm: 119.97302719214693 sim_pfm: 83.02767967868131
episode: 552 training return: tensor(78.3887, device='cuda:0')
episode: 553 training return: tensor(87.0403, device='cuda:0')
episode: 554 training return: tensor(87.7419, device='cuda:0')
episode: 555 training return: tensor(84.7484, device='cuda:0')
epoch: 139 test_true_pfm: 128.82293631723127 sim_pfm: 86.97531640496454
episode: 556 training return: tensor(86.6522, device='cuda:0')
episode: 557 training return: tensor(12.8662, device='cuda:0')
episode: 558 training return: tensor(88.8106, device='cuda:0')
episode: 559 training return: tensor(82.3518, device='cuda:0')
epoch: 140 test_true_pfm: 126.49574214659773 sim_pfm: 85.82399896638235
episode: 560 training return: tensor(79.5487, device='cuda:0')
episode: 561 training return: tensor(84.4060, device='cuda:0')
episode: 562 training return: tensor(89.3214, device='cuda:0')
episode: 563 training return: tensor(87.9324, device='cuda:0')
epoch: 141 test_true_pfm: 121.68402946620476 sim_pfm: 81.9304654488922
episode: 564 training return: tensor(85.5521, device='cuda:0')
episode: 565 training return: tensor(84.8913, device='cuda:0')
episode: 566 training return: tensor(88.5560, device='cuda:0')
episode: 567 training return: tensor(83.7662, device='cuda:0')
epoch: 142 test_true_pfm: 112.09312525567182 sim_pfm: 84.30398181558704
episode: 568 training return: tensor(87.9555, device='cuda:0')
episode: 569 training return: tensor(80.3475, device='cuda:0')
episode: 570 training return: tensor(71.2522, device='cuda:0')
episode: 571 training return: tensor(68.7958, device='cuda:0')
epoch: 143 test_true_pfm: 107.9026002203728 sim_pfm: 77.1860278253851
episode: 572 training return: tensor(75.1602, device='cuda:0')
episode: 573 training return: tensor(82.6740, device='cuda:0')
episode: 574 training return: tensor(85.8596, device='cuda:0')
episode: 575 training return: tensor(57.3715, device='cuda:0')
epoch: 144 test_true_pfm: 119.87529841757592 sim_pfm: 81.88620267995285
episode: 576 training return: tensor(54.9795, device='cuda:0')
episode: 577 training return: tensor(85.0202, device='cuda:0')
episode: 578 training return: tensor(52.8167, device='cuda:0')
episode: 579 training return: tensor(81.6464, device='cuda:0')
epoch: 145 test_true_pfm: 126.0811444344782 sim_pfm: 81.83774028176558
episode: 580 training return: tensor(57.1094, device='cuda:0')
episode: 581 training return: tensor(84.6603, device='cuda:0')
episode: 582 training return: tensor(77.7997, device='cuda:0')
episode: 583 training return: tensor(76.2939, device='cuda:0')
epoch: 146 test_true_pfm: 124.00688294841476 sim_pfm: 75.83066593115218
episode: 584 training return: tensor(85.3878, device='cuda:0')
episode: 585 training return: tensor(87.1708, device='cuda:0')
episode: 586 training return: tensor(86.0451, device='cuda:0')
episode: 587 training return: tensor(66.4566, device='cuda:0')
epoch: 147 test_true_pfm: 114.99232733782151 sim_pfm: 73.70860890842741
episode: 588 training return: tensor(62.0283, device='cuda:0')
episode: 589 training return: tensor(87.5946, device='cuda:0')
episode: 590 training return: tensor(87.2172, device='cuda:0')
episode: 591 training return: tensor(12.8581, device='cuda:0')
epoch: 148 test_true_pfm: 130.52485772312326 sim_pfm: 81.1328683789994
episode: 592 training return: tensor(84.2426, device='cuda:0')
episode: 593 training return: tensor(85.0945, device='cuda:0')
episode: 594 training return: tensor(88.6042, device='cuda:0')
episode: 595 training return: tensor(76.7316, device='cuda:0')
epoch: 149 test_true_pfm: 114.438473018279 sim_pfm: 78.10279634801554
episode: 596 training return: tensor(82.3548, device='cuda:0')
episode: 597 training return: tensor(87.9172, device='cuda:0')
episode: 598 training return: tensor(81.7143, device='cuda:0')
episode: 599 training return: tensor(83.8023, device='cuda:0')
epoch: 150 test_true_pfm: 129.67157756828436 sim_pfm: 78.63954038619995
