['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '1']
epoch: 0 training_loss 0.24011441446840764 test_loss: 0.1504555106163025
epoch: 1 training_loss 0.14947170540690422 test_loss: 0.14856094121932983
epoch: 2 training_loss 0.1336916361004114 test_loss: 0.11894928216934204
epoch: 3 training_loss 0.12039160320535303 test_loss: 0.08937380909919738
epoch: 4 training_loss 0.11338136084377766 test_loss: 0.10610986948013305
epoch: 5 training_loss 0.11423898104578256 test_loss: 0.1094565749168396
epoch: 6 training_loss 0.11377335274592042 test_loss: 0.11846741437911987
epoch: 7 training_loss 0.10857697624713182 test_loss: 0.11341234445571899
epoch: 8 training_loss 0.1124535708874464 test_loss: 0.1100051999092102
epoch: 9 training_loss 0.11000208193436264 test_loss: 0.1169883131980896
epoch: 10 training_loss 0.10200525231659413 test_loss: 0.10652469396591187
epoch: 11 training_loss 0.10048793798312544 test_loss: 0.09946109056472778
epoch: 12 training_loss 0.10207386903464793 test_loss: 0.09986405968666076
epoch: 13 training_loss 0.10400758173316717 test_loss: 0.10465284585952758
epoch: 14 training_loss 0.10543608395382761 test_loss: 0.11623611450195312
epoch: 15 training_loss 0.10663120865821839 test_loss: 0.11317198276519776
epoch: 16 training_loss 0.10375500099733471 test_loss: 0.10505167245864869
epoch: 17 training_loss 0.10583540171384812 test_loss: 0.10280358791351318
epoch: 18 training_loss 0.09691831151023507 test_loss: 0.09512425065040589
epoch: 19 training_loss 0.10017495376989245 test_loss: 0.11115846633911133
epoch: 20 training_loss 0.10437543328851462 test_loss: 0.11033568382263184
epoch: 21 training_loss 0.09605734823271632 test_loss: 0.11056319475173951
epoch: 22 training_loss 0.10063677247613668 test_loss: 0.11292362213134766
epoch: 23 training_loss 0.0977187842130661 test_loss: 0.10253063440322877
epoch: 24 training_loss 0.09980614006519317 test_loss: 0.0884907066822052
epoch: 25 training_loss 0.09888389384374023 test_loss: 0.08772488236427307
epoch: 26 training_loss 0.09917862392961979 test_loss: 0.09762622117996216
epoch: 27 training_loss 0.09724192760884762 test_loss: 0.0901308000087738
epoch: 28 training_loss 0.0914879865758121 test_loss: 0.08657636642456054
epoch: 29 training_loss 0.10118556464090943 test_loss: 0.11420565843582153
epoch: 30 training_loss 0.1011488465219736 test_loss: 0.09402753710746765
epoch: 31 training_loss 0.09253978829830885 test_loss: 0.09466096758842468
epoch: 32 training_loss 0.10064478974789381 test_loss: 0.10518752336502075
epoch: 33 training_loss 0.08854522908106446 test_loss: 0.10167058706283569
epoch: 34 training_loss 0.09682255327701568 test_loss: 0.1052517294883728
epoch: 35 training_loss 0.09407219894230366 test_loss: 0.105769681930542
epoch: 36 training_loss 0.09822637287899852 test_loss: 0.10988953113555908
epoch: 37 training_loss 0.09811852240934968 test_loss: 0.09374010562896729
epoch: 38 training_loss 0.0920073215290904 test_loss: 0.09709951877593995
epoch: 39 training_loss 0.09325016748160124 test_loss: 0.11893506050109863
epoch: 40 training_loss 0.09653113152831792 test_loss: 0.09782307744026184
epoch: 41 training_loss 0.10243480410426856 test_loss: 0.08643180131912231
epoch: 42 training_loss 0.08603232380002737 test_loss: 0.09246354103088379
epoch: 43 training_loss 0.09276380483061075 test_loss: 0.0895220696926117
epoch: 44 training_loss 0.09190220570191741 test_loss: 0.08543692827224732
epoch: 45 training_loss 0.09572337752208114 test_loss: 0.09354597926139832
epoch: 46 training_loss 0.0977101704850793 test_loss: 0.1045310378074646
epoch: 47 training_loss 0.10144733190536499 test_loss: 0.1050007700920105
epoch: 48 training_loss 0.09172402165830135 test_loss: 0.09557361006736756
epoch: 49 training_loss 0.09698204608634114 test_loss: 0.1015125036239624
epoch: 50 training_loss 0.09820488976314663 test_loss: 0.10580984354019166
epoch: 51 training_loss 0.09142115388065576 test_loss: 0.08876977562904358
epoch: 52 training_loss 0.08946688029915094 test_loss: 0.08674916625022888
epoch: 53 training_loss 0.09104031527414919 test_loss: 0.08294337391853332
epoch: 54 training_loss 0.09163576994091273 test_loss: 0.08367202877998352
epoch: 55 training_loss 0.08801666725426913 test_loss: 0.09864342212677002
epoch: 56 training_loss 0.08939272191375494 test_loss: 0.08812031745910645
epoch: 57 training_loss 0.09115476591512561 test_loss: 0.0916302740573883
epoch: 58 training_loss 0.08855942882597446 test_loss: 0.09949502348899841
epoch: 59 training_loss 0.08923770571127534 test_loss: 0.10156258344650268
epoch: 60 training_loss 0.08873715149238706 test_loss: 0.09370580911636353
epoch: 61 training_loss 0.08591626711189747 test_loss: 0.07819731831550598
epoch: 62 training_loss 0.08526495460420846 test_loss: 0.10491112470626832
epoch: 63 training_loss 0.09314624473452568 test_loss: 0.09819936156272888
epoch: 64 training_loss 0.08507265436463057 test_loss: 0.08642451763153076
epoch: 65 training_loss 0.09225033232942223 test_loss: 0.08765206933021545
epoch: 66 training_loss 0.09123919364064932 test_loss: 0.08309566974639893
epoch: 67 training_loss 0.09386872977018357 test_loss: 0.09009522795677186
epoch: 68 training_loss 0.08800479350611567 test_loss: 0.09476141929626465
epoch: 69 training_loss 0.09095816664397717 test_loss: 0.09862527251243591
epoch: 70 training_loss 0.09232086151838302 test_loss: 0.09916426539421082
epoch: 71 training_loss 0.09170243468135596 test_loss: 0.08982375264167786
epoch: 72 training_loss 0.09595497930422425 test_loss: 0.08044386506080628
epoch: 73 training_loss 0.09175370037555694 test_loss: 0.09017043113708496
epoch: 74 training_loss 0.08923717934638262 test_loss: 0.10144729614257812
epoch: 75 training_loss 0.08900230782106519 test_loss: 0.09067313671112061
epoch: 76 training_loss 0.09308923060074449 test_loss: 0.10951437950134277
epoch: 77 training_loss 0.0972075311653316 test_loss: 0.10039764642715454
epoch: 78 training_loss 0.08997010748833417 test_loss: 0.09122715592384338
epoch: 79 training_loss 0.09004349058493971 test_loss: 0.09454504251480103
epoch: 80 training_loss 0.08492503324523568 test_loss: 0.09014632105827332
epoch: 81 training_loss 0.09139746613800526 test_loss: 0.09596313834190369
epoch: 82 training_loss 0.08804256599396468 test_loss: 0.09099462032318115
epoch: 83 training_loss 0.09163865367881954 test_loss: 0.09565935134887696
epoch: 84 training_loss 0.09007609869353474 test_loss: 0.0786783218383789
epoch: 85 training_loss 0.08722702547907829 test_loss: 0.08809277415275574
epoch: 86 training_loss 0.09143094126135111 test_loss: 0.09868230223655701
epoch: 87 training_loss 0.09178460398688912 test_loss: 0.07403684854507446
epoch: 88 training_loss 0.08677471544593572 test_loss: 0.08797813653945923
epoch: 89 training_loss 0.09759844803251326 test_loss: 0.08859930038452149
epoch: 90 training_loss 0.09405565410852432 test_loss: 0.08219215273857117
epoch: 91 training_loss 0.08350655171088875 test_loss: 0.0780284583568573
epoch: 92 training_loss 0.09003244178369642 test_loss: 0.08046453595161437
epoch: 93 training_loss 0.08575239034369588 test_loss: 0.10626493692398072
epoch: 94 training_loss 0.09648532582446932 test_loss: 0.09477681517601014
epoch: 95 training_loss 0.08988349333405495 test_loss: 0.0776896357536316
epoch: 96 training_loss 0.09088985346257687 test_loss: 0.08004113435745239
epoch: 97 training_loss 0.09162516566924751 test_loss: 0.0989489197731018
epoch: 98 training_loss 0.08947046680375934 test_loss: 0.09180541634559632
epoch: 99 training_loss 0.08928825285285712 test_loss: 0.09686752557754516
epoch: 100 training_loss 0.08842124765738844 test_loss: 0.10057430267333985
epoch: 101 training_loss 0.08688532082363963 test_loss: 0.0809269368648529
epoch: 102 training_loss 0.090510566085577 test_loss: 0.0983924388885498
epoch: 103 training_loss 0.09313016179949045 test_loss: 0.09233890175819397
epoch: 104 training_loss 0.0899301545508206 test_loss: 0.08886510133743286
epoch: 105 training_loss 0.08444821024313569 test_loss: 0.08605453372001648
epoch: 106 training_loss 0.09294425748288632 test_loss: 0.09849593043327332
epoch: 107 training_loss 0.0935698825865984 test_loss: 0.09115955233573914
epoch: 108 training_loss 0.08682907495647668 test_loss: 0.09617398977279663
epoch: 109 training_loss 0.09081784034147859 test_loss: 0.0781139612197876
epoch: 110 training_loss 0.09584164189174771 test_loss: 0.1048965334892273
epoch: 111 training_loss 0.08791230926290154 test_loss: 0.08630790710449218
epoch: 112 training_loss 0.08520417399704457 test_loss: 0.08500381708145141
epoch: 113 training_loss 0.09033566456288099 test_loss: 0.09975613355636596
epoch: 114 training_loss 0.0920079769194126 test_loss: 0.09936472773551941
epoch: 115 training_loss 0.08721660800278187 test_loss: 0.07925930023193359
epoch: 116 training_loss 0.08578676862642169 test_loss: 0.10058531761169434
epoch: 117 training_loss 0.09478265712037683 test_loss: 0.09371479749679565
epoch: 118 training_loss 0.0900905035622418 test_loss: 0.08367938995361328
epoch: 119 training_loss 0.09246966578066348 test_loss: 0.09970452189445496
epoch: 120 training_loss 0.08721924781799316 test_loss: 0.09399313926696777
epoch: 121 training_loss 0.08710357200354338 test_loss: 0.09142711758613586
epoch: 122 training_loss 0.09116861045360565 test_loss: 0.08042253255844116
epoch: 123 training_loss 0.08704074518755078 test_loss: 0.10167477130889893
epoch: 124 training_loss 0.09451057950034737 test_loss: 0.10213562250137329
epoch: 125 training_loss 0.08239506760612131 test_loss: 0.09008266925811767
epoch: 126 training_loss 0.09212722882628441 test_loss: 0.09031165838241577
epoch: 127 training_loss 0.08709447255358099 test_loss: 0.08269039392471314
epoch: 128 training_loss 0.08909334972500801 test_loss: 0.09659759402275085
epoch: 129 training_loss 0.08345394186675549 test_loss: 0.08291178941726685
epoch: 130 training_loss 0.08940189328044652 test_loss: 0.07725586295127869
epoch: 131 training_loss 0.09050461012870073 test_loss: 0.08565782308578491
epoch: 132 training_loss 0.09103874318301677 test_loss: 0.10184050798416137
epoch: 133 training_loss 0.08670107236132026 test_loss: 0.09140946865081787
epoch: 134 training_loss 0.09353826846927404 test_loss: 0.08689946532249451
epoch: 135 training_loss 0.09259150302037597 test_loss: 0.08902039527893066
epoch: 136 training_loss 0.08872056614607572 test_loss: 0.08545392155647277
epoch: 137 training_loss 0.09156664438545704 test_loss: 0.08618381023406982
epoch: 138 training_loss 0.0907885705307126 test_loss: 0.09638173580169677
epoch: 139 training_loss 0.09045460291206836 test_loss: 0.08917410373687744
epoch: 140 training_loss 0.08332532523199916 test_loss: 0.09155392050743102
epoch: 141 training_loss 0.08896945750340819 test_loss: 0.08243014812469482
epoch: 142 training_loss 0.08441314542666077 test_loss: 0.08697528839111328
epoch: 143 training_loss 0.09000102698802948 test_loss: 0.10211867094039917
epoch: 144 training_loss 0.07927577827125788 test_loss: 0.08355128169059753
epoch: 145 training_loss 0.08767259588465094 test_loss: 0.08829705715179444
epoch: 146 training_loss 0.08726487625390292 test_loss: 0.09054933190345764
epoch: 147 training_loss 0.09048736449331045 test_loss: 0.08208950161933899
epoch: 148 training_loss 0.09208998121321202 test_loss: 0.09487544298171997
epoch: 149 training_loss 0.08164776774123311 test_loss: 0.09581954479217529
epoch: 0 training_loss 37.328264961242674 test_loss: 19.272439575195314
epoch: 1 training_loss 16.10554169654846 test_loss: 13.357673645019531
epoch: 2 training_loss 12.20294135093689 test_loss: 10.701675415039062
epoch: 3 training_loss 10.019336290359497 test_loss: 9.576096343994141
epoch: 4 training_loss 9.126861076354981 test_loss: 8.237288665771484
epoch: 5 training_loss 8.035269865989685 test_loss: 7.739117431640625
epoch: 6 training_loss 7.305651865005493 test_loss: 7.0790283203125
epoch: 7 training_loss 6.962463765144348 test_loss: 6.852239990234375
epoch: 8 training_loss 6.516073350906372 test_loss: 6.603216552734375
epoch: 9 training_loss 6.325783314704895 test_loss: 6.5381523132324215
epoch: 10 training_loss 6.030272274017334 test_loss: 5.995117950439453
epoch: 11 training_loss 5.5820529699325565 test_loss: 5.642606735229492
epoch: 12 training_loss 5.440162210464478 test_loss: 5.245734405517578
epoch: 13 training_loss 5.199039804935455 test_loss: 5.139976501464844
epoch: 14 training_loss 5.108735265731812 test_loss: 5.160255813598633
epoch: 15 training_loss 4.9848679304122925 test_loss: 4.954293441772461
epoch: 16 training_loss 4.98678150177002 test_loss: 5.149503326416015
epoch: 17 training_loss 4.7715550136566165 test_loss: 4.746823501586914
epoch: 18 training_loss 4.525906660556793 test_loss: 4.378254699707031
epoch: 19 training_loss 4.418577296733856 test_loss: 4.640715789794922
epoch: 20 training_loss 4.414205212593078 test_loss: 4.267473220825195
epoch: 21 training_loss 4.234745883941651 test_loss: 4.2587837219238285
epoch: 22 training_loss 4.208030753135681 test_loss: 4.15321159362793
epoch: 23 training_loss 4.1762019371986385 test_loss: 4.209595108032227
epoch: 24 training_loss 4.0274759078025815 test_loss: 4.047282791137695
epoch: 25 training_loss 4.013633546829223 test_loss: 4.013718032836914
epoch: 26 training_loss 3.9942363834381105 test_loss: 4.169198226928711
epoch: 27 training_loss 3.8145391869544985 test_loss: 3.926169204711914
epoch: 28 training_loss 3.8560702538490297 test_loss: 3.9722625732421877
epoch: 29 training_loss 3.6889854311943053 test_loss: 3.860548400878906
epoch: 30 training_loss 3.7728597807884214 test_loss: 3.5779464721679686
epoch: 31 training_loss 3.6211513257026673 test_loss: 3.6365936279296873
epoch: 32 training_loss 3.5839920449256897 test_loss: 3.678277587890625
epoch: 33 training_loss 3.64670686006546 test_loss: 3.4992786407470704
epoch: 34 training_loss 3.5820011734962462 test_loss: 3.5623905181884767
epoch: 35 training_loss 3.5716533041000367 test_loss: 3.5019474029541016
epoch: 36 training_loss 3.4620991492271425 test_loss: 3.4013843536376953
epoch: 37 training_loss 3.3439396739006044 test_loss: 3.3082324981689455
epoch: 38 training_loss 3.4334378910064696 test_loss: 3.3953907012939455
epoch: 39 training_loss 3.3650055575370788 test_loss: 3.4819488525390625
epoch: 40 training_loss 3.35717205286026 test_loss: 3.3629440307617187
epoch: 41 training_loss 3.2408250522613526 test_loss: 3.408921813964844
epoch: 42 training_loss 3.2513002133369446 test_loss: 3.3523597717285156
epoch: 43 training_loss 3.2831548166275026 test_loss: 3.3144386291503904
epoch: 44 training_loss 3.2526893734931948 test_loss: 3.1650678634643556
epoch: 45 training_loss 3.198041160106659 test_loss: 3.3054412841796874
epoch: 46 training_loss 3.168184366226196 test_loss: 3.2803619384765623
epoch: 47 training_loss 3.1546397399902344 test_loss: 3.19798583984375
epoch: 48 training_loss 3.1160504961013795 test_loss: 3.003130340576172
epoch: 49 training_loss 3.115617246627808 test_loss: 3.2609584808349608
epoch: 50 training_loss 3.0391148686408997 test_loss: 3.090833854675293
epoch: 51 training_loss 3.0356728410720826 test_loss: 3.0159088134765626
epoch: 52 training_loss 3.0196273851394655 test_loss: 2.987720489501953
epoch: 53 training_loss 3.0244591999053956 test_loss: 2.9942087173461913
epoch: 54 training_loss 2.91674156665802 test_loss: 2.95076847076416
epoch: 55 training_loss 2.978279824256897 test_loss: 2.755179595947266
epoch: 56 training_loss 2.911189680099487 test_loss: 3.0068033218383787
epoch: 57 training_loss 2.873216197490692 test_loss: 2.931373405456543
epoch: 58 training_loss 2.88423091173172 test_loss: 2.909082221984863
epoch: 59 training_loss 2.8994544649124148 test_loss: 2.898115539550781
epoch: 60 training_loss 2.950438091754913 test_loss: 2.868456268310547
epoch: 61 training_loss 2.8541977262496947 test_loss: 2.815570068359375
epoch: 62 training_loss 2.906842007637024 test_loss: 2.878828239440918
epoch: 63 training_loss 2.8620285248756407 test_loss: 2.8119295120239256
epoch: 64 training_loss 2.7965803694725038 test_loss: 2.755916404724121
epoch: 65 training_loss 2.802047996520996 test_loss: 2.9093809127807617
epoch: 66 training_loss 2.7996675610542296 test_loss: 2.7166366577148438
epoch: 67 training_loss 2.7424037623405457 test_loss: 2.7400768280029295
epoch: 68 training_loss 2.8108353757858278 test_loss: 2.698670768737793
epoch: 69 training_loss 2.7608254194259643 test_loss: 2.805848503112793
epoch: 70 training_loss 2.7278171730041505 test_loss: 2.6618621826171873
epoch: 71 training_loss 2.7557281494140624 test_loss: 2.849732780456543
epoch: 72 training_loss 2.7170590925216676 test_loss: 2.789316749572754
epoch: 73 training_loss 2.7226486253738402 test_loss: 2.7404775619506836
epoch: 74 training_loss 2.7479754447937013 test_loss: 2.7104507446289063
epoch: 75 training_loss 2.6959301352500917 test_loss: 2.7557729721069335
epoch: 76 training_loss 2.6871539294719695 test_loss: 2.799834632873535
epoch: 77 training_loss 2.7020010566711425 test_loss: 2.8347537994384764
epoch: 78 training_loss 2.629685814380646 test_loss: 2.699873924255371
epoch: 79 training_loss 2.623192174434662 test_loss: 2.7202211380004884
epoch: 80 training_loss 2.688470778465271 test_loss: 2.6193126678466796
epoch: 81 training_loss 2.61084538936615 test_loss: 2.6030994415283204
epoch: 82 training_loss 2.6147143936157224 test_loss: 2.6256298065185546
epoch: 83 training_loss 2.653578839302063 test_loss: 2.6722301483154296
epoch: 84 training_loss 2.6019886040687563 test_loss: 2.595204734802246
epoch: 85 training_loss 2.5910759568214417 test_loss: 2.682674026489258
epoch: 86 training_loss 2.5421228992938993 test_loss: 2.718208122253418
epoch: 87 training_loss 2.5646387231349945 test_loss: 2.582760810852051
epoch: 88 training_loss 2.5254989206790923 test_loss: 2.657893180847168
epoch: 89 training_loss 2.550080269575119 test_loss: 2.627496337890625
epoch: 90 training_loss 2.605019235610962 test_loss: 2.5205656051635743
epoch: 91 training_loss 2.544547746181488 test_loss: 2.662356948852539
epoch: 92 training_loss 2.600985736846924 test_loss: 2.6363288879394533
epoch: 93 training_loss 2.5406246054172517 test_loss: 2.500611686706543
epoch: 94 training_loss 2.483423297405243 test_loss: 2.7569753646850588
epoch: 95 training_loss 2.5901551496982576 test_loss: 2.690512466430664
epoch: 96 training_loss 2.504431369304657 test_loss: 2.3751638412475584
epoch: 97 training_loss 2.5344642162323 test_loss: 2.639395904541016
epoch: 98 training_loss 2.496958907842636 test_loss: 2.5647558212280273
epoch: 99 training_loss 2.5052462899684906 test_loss: 2.5583932876586912
epoch: 100 training_loss 2.441134227514267 test_loss: 2.615988540649414
epoch: 101 training_loss 2.4557438504695894 test_loss: 2.590740203857422
epoch: 102 training_loss 2.520974106788635 test_loss: 2.5338869094848633
epoch: 103 training_loss 2.491902644634247 test_loss: 2.504680061340332
epoch: 104 training_loss 2.494978049993515 test_loss: 2.4204761505126955
epoch: 105 training_loss 2.4459111070632935 test_loss: 2.331217575073242
epoch: 106 training_loss 2.478136019706726 test_loss: 2.4098154067993165
epoch: 107 training_loss 2.4307777965068817 test_loss: 2.5407621383666994
epoch: 108 training_loss 2.434144401550293 test_loss: 2.3751096725463867
epoch: 109 training_loss 2.4017672872543336 test_loss: 2.551237869262695
epoch: 110 training_loss 2.37081368803978 test_loss: 2.4571308135986327
epoch: 111 training_loss 2.4136331355571747 test_loss: 2.5236028671264648
epoch: 112 training_loss 2.4108004450798033 test_loss: 2.4329397201538088
epoch: 113 training_loss 2.4171698832511903 test_loss: 2.389447784423828
epoch: 114 training_loss 2.3903811168670654 test_loss: 2.4051536560058593
epoch: 115 training_loss 2.4506767809391024 test_loss: 2.3797836303710938
epoch: 116 training_loss 2.4126861190795896 test_loss: 2.4496068954467773
epoch: 117 training_loss 2.3795509195327758 test_loss: 2.3769695281982424
epoch: 118 training_loss 2.3521787321567533 test_loss: 2.377385139465332
epoch: 119 training_loss 2.3639862370491027 test_loss: 2.320020294189453
epoch: 120 training_loss 2.3474542784690855 test_loss: 2.3149959564208986
epoch: 121 training_loss 2.3407086217403412 test_loss: 2.3591127395629883
epoch: 122 training_loss 2.2771089363098143 test_loss: 2.431859588623047
epoch: 123 training_loss 2.3580077409744264 test_loss: 2.446546173095703
epoch: 124 training_loss 2.359690479040146 test_loss: 2.3413787841796876
epoch: 125 training_loss 2.3356024539470672 test_loss: 2.279246139526367
epoch: 126 training_loss 2.3691704618930816 test_loss: 2.4111379623413085
epoch: 127 training_loss 2.3075650787353514 test_loss: 2.287205696105957
epoch: 128 training_loss 2.3225100660324096 test_loss: 2.3494720458984375
epoch: 129 training_loss 2.3970259761810304 test_loss: 2.431532859802246
epoch: 130 training_loss 2.3375876808166502 test_loss: 2.3490066528320312
epoch: 131 training_loss 2.2718762385845186 test_loss: 2.409777069091797
epoch: 132 training_loss 2.2884226548671722 test_loss: 2.409931182861328
epoch: 133 training_loss 2.296000368595123 test_loss: 2.3450464248657226
epoch: 134 training_loss 2.2888879668712616 test_loss: 2.341292953491211
epoch: 135 training_loss 2.3092396688461303 test_loss: 2.3446880340576173
epoch: 136 training_loss 2.3358721935749056 test_loss: 2.2864456176757812
epoch: 137 training_loss 2.3210338044166563 test_loss: 2.3607675552368166
epoch: 138 training_loss 2.302597668170929 test_loss: 2.3713125228881835
epoch: 139 training_loss 2.2311052668094633 test_loss: 2.31326847076416
epoch: 140 training_loss 2.2670428729057313 test_loss: 2.190207290649414
epoch: 141 training_loss 2.206946831941605 test_loss: 2.3288785934448244
epoch: 142 training_loss 2.27034862279892 test_loss: 2.2552822113037108
epoch: 143 training_loss 2.3078699946403503 test_loss: 2.2929153442382812
epoch: 144 training_loss 2.267488285303116 test_loss: 2.313339424133301
epoch: 145 training_loss 2.27253812789917 test_loss: 2.252488136291504
epoch: 146 training_loss 2.249122042655945 test_loss: 2.301543426513672
epoch: 147 training_loss 2.162581137418747 test_loss: 2.2876260757446287
epoch: 148 training_loss 2.207119357585907 test_loss: 2.2615982055664063
epoch: 149 training_loss 2.268630763292313 test_loss: 2.2521228790283203
2326.5251959400366
episode: 0 training return: tensor(289.8479, device='cuda:0')
episode: 1 training return: tensor(310.7550, device='cuda:0')
episode: 2 training return: tensor(-272.3759, device='cuda:0')
episode: 3 training return: tensor(-76.7416, device='cuda:0')
epoch: 1 test_true_pfm: 2770.464364344396 sim_pfm: 154.7157789171712
episode: 4 training return: tensor(-220.0010, device='cuda:0')
episode: 5 training return: tensor(307.6111, device='cuda:0')
episode: 6 training return: tensor(112.7033, device='cuda:0')
episode: 7 training return: tensor(409.9958, device='cuda:0')
epoch: 2 test_true_pfm: 3276.71216090322 sim_pfm: 293.36288453722955
episode: 8 training return: tensor(202.4347, device='cuda:0')
episode: 9 training return: tensor(313.8076, device='cuda:0')
episode: 10 training return: tensor(-69.6859, device='cuda:0')
episode: 11 training return: tensor(339.1557, device='cuda:0')
epoch: 3 test_true_pfm: 3369.6081549713913 sim_pfm: 253.0601492888139
episode: 12 training return: tensor(247.0641, device='cuda:0')
episode: 13 training return: tensor(358.1331, device='cuda:0')
episode: 14 training return: tensor(30.3305, device='cuda:0')
episode: 15 training return: tensor(-40.7405, device='cuda:0')
epoch: 4 test_true_pfm: 2284.899235320235 sim_pfm: -32.546186552615836
episode: 16 training return: tensor(357.9607, device='cuda:0')
episode: 17 training return: tensor(160.5366, device='cuda:0')
episode: 18 training return: tensor(371.9011, device='cuda:0')
episode: 19 training return: tensor(-237.2018, device='cuda:0')
epoch: 5 test_true_pfm: 1705.1832047540581 sim_pfm: -35.92450994868219
episode: 20 training return: tensor(-242.1416, device='cuda:0')
episode: 21 training return: tensor(-214.2866, device='cuda:0')
episode: 22 training return: tensor(295.6217, device='cuda:0')
episode: 23 training return: tensor(87.6243, device='cuda:0')
epoch: 6 test_true_pfm: 3223.1922610504494 sim_pfm: -25.63889021166445
episode: 24 training return: tensor(331.5549, device='cuda:0')
episode: 25 training return: tensor(331.5103, device='cuda:0')
episode: 26 training return: tensor(301.5965, device='cuda:0')
episode: 27 training return: tensor(399.2907, device='cuda:0')
epoch: 7 test_true_pfm: 1911.7916898298402 sim_pfm: 165.84125675272662
episode: 28 training return: tensor(332.4098, device='cuda:0')
episode: 29 training return: tensor(322.3831, device='cuda:0')
episode: 30 training return: tensor(336.5363, device='cuda:0')
episode: 31 training return: tensor(341.1072, device='cuda:0')
epoch: 8 test_true_pfm: 2390.0152529484785 sim_pfm: 160.66463799138242
episode: 32 training return: tensor(-235.1041, device='cuda:0')
episode: 33 training return: tensor(-216.9012, device='cuda:0')
episode: 34 training return: tensor(-165.0126, device='cuda:0')
episode: 35 training return: tensor(215.8658, device='cuda:0')
epoch: 9 test_true_pfm: 2410.2550098931047 sim_pfm: -173.74934316116074
episode: 36 training return: tensor(362.7785, device='cuda:0')
episode: 37 training return: tensor(401.9423, device='cuda:0')
episode: 38 training return: tensor(-36.8193, device='cuda:0')
episode: 39 training return: tensor(-122.7666, device='cuda:0')
epoch: 10 test_true_pfm: 3054.2803122413266 sim_pfm: 227.5904733738862
episode: 40 training return: tensor(353.7037, device='cuda:0')
episode: 41 training return: tensor(34.7401, device='cuda:0')
episode: 42 training return: tensor(-16.9551, device='cuda:0')
episode: 43 training return: tensor(314.5211, device='cuda:0')
epoch: 11 test_true_pfm: 3042.9765177678364 sim_pfm: 44.03620461073782
episode: 44 training return: tensor(-202.1452, device='cuda:0')
episode: 45 training return: tensor(308.3349, device='cuda:0')
episode: 46 training return: tensor(301.9397, device='cuda:0')
episode: 47 training return: tensor(337.6063, device='cuda:0')
epoch: 12 test_true_pfm: 3038.8112435327403 sim_pfm: 325.2164160022124
episode: 48 training return: tensor(356.1594, device='cuda:0')
episode: 49 training return: tensor(356.0104, device='cuda:0')
episode: 50 training return: tensor(342.8976, device='cuda:0')
episode: 51 training return: tensor(398.9488, device='cuda:0')
epoch: 13 test_true_pfm: 3206.353331327242 sim_pfm: 379.6982701704449
episode: 52 training return: tensor(356.0331, device='cuda:0')
episode: 53 training return: tensor(72.8191, device='cuda:0')
episode: 54 training return: tensor(371.8408, device='cuda:0')
episode: 55 training return: tensor(365.3150, device='cuda:0')
epoch: 14 test_true_pfm: 2542.469454893919 sim_pfm: 213.59759209976377
episode: 56 training return: tensor(-3.7469, device='cuda:0')
episode: 57 training return: tensor(330.1413, device='cuda:0')
episode: 58 training return: tensor(284.9894, device='cuda:0')
episode: 59 training return: tensor(317.7618, device='cuda:0')
epoch: 15 test_true_pfm: 2889.4738398411705 sim_pfm: 144.89273979987288
episode: 60 training return: tensor(342.4007, device='cuda:0')
episode: 61 training return: tensor(360.3621, device='cuda:0')
episode: 62 training return: tensor(-102.1756, device='cuda:0')
episode: 63 training return: tensor(336.5142, device='cuda:0')
epoch: 16 test_true_pfm: 3317.7239735998005 sim_pfm: 234.7202632742119
episode: 64 training return: tensor(350.3850, device='cuda:0')
episode: 65 training return: tensor(280.0636, device='cuda:0')
episode: 66 training return: tensor(55.3788, device='cuda:0')
episode: 67 training return: tensor(335.6567, device='cuda:0')
epoch: 17 test_true_pfm: 3411.1875507610475 sim_pfm: 273.984862382611
episode: 68 training return: tensor(346.9459, device='cuda:0')
episode: 69 training return: tensor(356.9050, device='cuda:0')
episode: 70 training return: tensor(344.3611, device='cuda:0')
episode: 71 training return: tensor(325.7739, device='cuda:0')
epoch: 18 test_true_pfm: 3412.477207336644 sim_pfm: 361.5911902507166
episode: 72 training return: tensor(364.2663, device='cuda:0')
episode: 73 training return: tensor(-96.7274, device='cuda:0')
episode: 74 training return: tensor(349.5915, device='cuda:0')
episode: 75 training return: tensor(-322.6544, device='cuda:0')
epoch: 19 test_true_pfm: 2863.060661902278 sim_pfm: 194.23718685117396
episode: 76 training return: tensor(30.4831, device='cuda:0')
episode: 77 training return: tensor(354.3961, device='cuda:0')
episode: 78 training return: tensor(310.9350, device='cuda:0')
episode: 79 training return: tensor(418.2428, device='cuda:0')
epoch: 20 test_true_pfm: 2880.289063221018 sim_pfm: 405.74346538962953
episode: 80 training return: tensor(257.4834, device='cuda:0')
episode: 81 training return: tensor(29.4254, device='cuda:0')
episode: 82 training return: tensor(-138.2493, device='cuda:0')
episode: 83 training return: tensor(290.3944, device='cuda:0')
epoch: 21 test_true_pfm: 3372.2942492260267 sim_pfm: 335.7351403828846
episode: 84 training return: tensor(284.4555, device='cuda:0')
episode: 85 training return: tensor(327.8419, device='cuda:0')
episode: 86 training return: tensor(281.0988, device='cuda:0')
episode: 87 training return: tensor(105.9159, device='cuda:0')
epoch: 22 test_true_pfm: 3380.2285740508414 sim_pfm: 252.31537948269397
episode: 88 training return: tensor(-195.4134, device='cuda:0')
episode: 89 training return: tensor(328.8623, device='cuda:0')
episode: 90 training return: tensor(296.6298, device='cuda:0')
episode: 91 training return: tensor(-46.9570, device='cuda:0')
epoch: 23 test_true_pfm: 3406.903296783621 sim_pfm: 347.3919820805119
episode: 92 training return: tensor(377.5451, device='cuda:0')
episode: 93 training return: tensor(295.0555, device='cuda:0')
episode: 94 training return: tensor(415.5404, device='cuda:0')
episode: 95 training return: tensor(335.8337, device='cuda:0')
epoch: 24 test_true_pfm: 3408.119470716 sim_pfm: 360.61410350739607
episode: 96 training return: tensor(68.5123, device='cuda:0')
episode: 97 training return: tensor(369.7010, device='cuda:0')
episode: 98 training return: tensor(289.9704, device='cuda:0')
episode: 99 training return: tensor(265.6494, device='cuda:0')
epoch: 25 test_true_pfm: 3387.3718118457123 sim_pfm: 370.81810821942054
episode: 100 training return: tensor(378.2094, device='cuda:0')
episode: 101 training return: tensor(282.5673, device='cuda:0')
episode: 102 training return: tensor(384.8556, device='cuda:0')
episode: 103 training return: tensor(385.0902, device='cuda:0')
epoch: 26 test_true_pfm: 3458.0391335620866 sim_pfm: 355.1507382138613
episode: 104 training return: tensor(326.1255, device='cuda:0')
episode: 105 training return: tensor(373.6526, device='cuda:0')
episode: 106 training return: tensor(-191.1220, device='cuda:0')
episode: 107 training return: tensor(363.7838, device='cuda:0')
epoch: 27 test_true_pfm: 3068.7992507339545 sim_pfm: 225.41825576072247
episode: 108 training return: tensor(318.2776, device='cuda:0')
episode: 109 training return: tensor(342.8649, device='cuda:0')
episode: 110 training return: tensor(325.7370, device='cuda:0')
episode: 111 training return: tensor(286.5715, device='cuda:0')
epoch: 28 test_true_pfm: 3458.2762342178535 sim_pfm: 388.44316442511627
episode: 112 training return: tensor(121.3411, device='cuda:0')
episode: 113 training return: tensor(340.2631, device='cuda:0')
episode: 114 training return: tensor(315.8919, device='cuda:0')
episode: 115 training return: tensor(364.3618, device='cuda:0')
epoch: 29 test_true_pfm: 3468.822836358693 sim_pfm: 424.6732099263075
episode: 116 training return: tensor(-42.6105, device='cuda:0')
episode: 117 training return: tensor(285.5259, device='cuda:0')
episode: 118 training return: tensor(343.3022, device='cuda:0')
episode: 119 training return: tensor(399.7441, device='cuda:0')
epoch: 30 test_true_pfm: 3383.807902265294 sim_pfm: 336.64474871224957
episode: 120 training return: tensor(395.2699, device='cuda:0')
episode: 121 training return: tensor(298.4706, device='cuda:0')
episode: 122 training return: tensor(330.5823, device='cuda:0')
episode: 123 training return: tensor(378.1619, device='cuda:0')
epoch: 31 test_true_pfm: 3379.996426212412 sim_pfm: 385.7464892162534
episode: 124 training return: tensor(383.0565, device='cuda:0')
episode: 125 training return: tensor(407.3893, device='cuda:0')
episode: 126 training return: tensor(370.2740, device='cuda:0')
episode: 127 training return: tensor(342.4556, device='cuda:0')
epoch: 32 test_true_pfm: 3405.6639578395298 sim_pfm: 338.8887483724781
episode: 128 training return: tensor(331.7845, device='cuda:0')
episode: 129 training return: tensor(323.1327, device='cuda:0')
episode: 130 training return: tensor(373.9221, device='cuda:0')
episode: 131 training return: tensor(327.3718, device='cuda:0')
epoch: 33 test_true_pfm: 3371.2302800386387 sim_pfm: 379.4620327956897
episode: 132 training return: tensor(291.2282, device='cuda:0')
episode: 133 training return: tensor(349.0590, device='cuda:0')
episode: 134 training return: tensor(387.0978, device='cuda:0')
episode: 135 training return: tensor(345.3913, device='cuda:0')
epoch: 34 test_true_pfm: 3386.1663710687844 sim_pfm: 376.29166275569395
episode: 136 training return: tensor(287.8465, device='cuda:0')
episode: 137 training return: tensor(359.6898, device='cuda:0')
episode: 138 training return: tensor(338.4648, device='cuda:0')
episode: 139 training return: tensor(272.3083, device='cuda:0')
epoch: 35 test_true_pfm: 3407.0909494689463 sim_pfm: 398.5530397121135
episode: 140 training return: tensor(384.4618, device='cuda:0')
episode: 141 training return: tensor(326.0391, device='cuda:0')
episode: 142 training return: tensor(338.5952, device='cuda:0')
episode: 143 training return: tensor(371.8320, device='cuda:0')
epoch: 36 test_true_pfm: 3357.7393816784675 sim_pfm: 384.889710757571
episode: 144 training return: tensor(294.3005, device='cuda:0')
episode: 145 training return: tensor(337.8335, device='cuda:0')
episode: 146 training return: tensor(271.4181, device='cuda:0')
episode: 147 training return: tensor(415.8777, device='cuda:0')
epoch: 37 test_true_pfm: 3367.757996243205 sim_pfm: 382.49661658498616
episode: 148 training return: tensor(319.4339, device='cuda:0')
episode: 149 training return: tensor(341.5939, device='cuda:0')
episode: 150 training return: tensor(387.8234, device='cuda:0')
episode: 151 training return: tensor(326.1446, device='cuda:0')
epoch: 38 test_true_pfm: 3431.541606187293 sim_pfm: 376.44832927942235
episode: 152 training return: tensor(309.4730, device='cuda:0')
episode: 153 training return: tensor(369.9157, device='cuda:0')
episode: 154 training return: tensor(347.6602, device='cuda:0')
episode: 155 training return: tensor(343.9209, device='cuda:0')
epoch: 39 test_true_pfm: 3368.2328193527833 sim_pfm: 368.03533728373196
episode: 156 training return: tensor(403.0894, device='cuda:0')
episode: 157 training return: tensor(317.9139, device='cuda:0')
episode: 158 training return: tensor(312.5547, device='cuda:0')
episode: 159 training return: tensor(387.2393, device='cuda:0')
epoch: 40 test_true_pfm: 3364.0583746780867 sim_pfm: 368.03982493368676
episode: 160 training return: tensor(364.7701, device='cuda:0')
episode: 161 training return: tensor(392.9813, device='cuda:0')
episode: 162 training return: tensor(396.8762, device='cuda:0')
episode: 163 training return: tensor(349.5591, device='cuda:0')
epoch: 41 test_true_pfm: 3325.795167729366 sim_pfm: 352.3728368160082
episode: 164 training return: tensor(241.6534, device='cuda:0')
episode: 165 training return: tensor(331.4390, device='cuda:0')
episode: 166 training return: tensor(309.7765, device='cuda:0')
episode: 167 training return: tensor(-137.3154, device='cuda:0')
epoch: 42 test_true_pfm: 3411.397333546973 sim_pfm: 363.22462656964007
episode: 168 training return: tensor(322.3336, device='cuda:0')
episode: 169 training return: tensor(410.3689, device='cuda:0')
episode: 170 training return: tensor(315.2907, device='cuda:0')
episode: 171 training return: tensor(367.8795, device='cuda:0')
epoch: 43 test_true_pfm: 3407.528867531681 sim_pfm: 352.70292840146186
episode: 172 training return: tensor(359.7888, device='cuda:0')
episode: 173 training return: tensor(364.4446, device='cuda:0')
episode: 174 training return: tensor(291.4305, device='cuda:0')
episode: 175 training return: tensor(424.1852, device='cuda:0')
epoch: 44 test_true_pfm: 3383.941938394211 sim_pfm: 361.86753779899055
episode: 176 training return: tensor(259.7965, device='cuda:0')
episode: 177 training return: tensor(319.4128, device='cuda:0')
episode: 178 training return: tensor(377.2047, device='cuda:0')
episode: 179 training return: tensor(376.9125, device='cuda:0')
epoch: 45 test_true_pfm: 3431.2363441139714 sim_pfm: 392.7611492155217
episode: 180 training return: tensor(371.1701, device='cuda:0')
episode: 181 training return: tensor(309.2748, device='cuda:0')
episode: 182 training return: tensor(352.3360, device='cuda:0')
episode: 183 training return: tensor(383.4091, device='cuda:0')
epoch: 46 test_true_pfm: 3381.6442196665025 sim_pfm: 373.6670424578867
episode: 184 training return: tensor(-15.0204, device='cuda:0')
episode: 185 training return: tensor(381.5208, device='cuda:0')
episode: 186 training return: tensor(276.9597, device='cuda:0')
episode: 187 training return: tensor(410.9101, device='cuda:0')
epoch: 47 test_true_pfm: 3373.1173848145277 sim_pfm: 376.58515589030384
episode: 188 training return: tensor(311.3836, device='cuda:0')
episode: 189 training return: tensor(370.8812, device='cuda:0')
episode: 190 training return: tensor(362.4758, device='cuda:0')
episode: 191 training return: tensor(341.7649, device='cuda:0')
epoch: 48 test_true_pfm: 3398.7536903490072 sim_pfm: 322.0705868474324
episode: 192 training return: tensor(347.9917, device='cuda:0')
episode: 193 training return: tensor(326.7793, device='cuda:0')
episode: 194 training return: tensor(273.5607, device='cuda:0')
episode: 195 training return: tensor(346.7578, device='cuda:0')
epoch: 49 test_true_pfm: 3345.489884770253 sim_pfm: 326.93403774208855
episode: 196 training return: tensor(350.5756, device='cuda:0')
episode: 197 training return: tensor(199.9234, device='cuda:0')
episode: 198 training return: tensor(327.7967, device='cuda:0')
episode: 199 training return: tensor(342.8282, device='cuda:0')
epoch: 50 test_true_pfm: 3389.53137120736 sim_pfm: 389.1588315035333
episode: 200 training return: tensor(341.6062, device='cuda:0')
episode: 201 training return: tensor(251.2039, device='cuda:0')
episode: 202 training return: tensor(-101.4266, device='cuda:0')
episode: 203 training return: tensor(361.0011, device='cuda:0')
epoch: 51 test_true_pfm: 3308.1374700042784 sim_pfm: 328.1933532123803
episode: 204 training return: tensor(-51.9032, device='cuda:0')
episode: 205 training return: tensor(391.8222, device='cuda:0')
episode: 206 training return: tensor(401.1393, device='cuda:0')
episode: 207 training return: tensor(337.6408, device='cuda:0')
epoch: 52 test_true_pfm: 3403.3416627296792 sim_pfm: 356.86412103295635
episode: 208 training return: tensor(-80.1973, device='cuda:0')
episode: 209 training return: tensor(338.6571, device='cuda:0')
episode: 210 training return: tensor(349.5751, device='cuda:0')
episode: 211 training return: tensor(363.9814, device='cuda:0')
epoch: 53 test_true_pfm: 3360.9235734512545 sim_pfm: 368.5903527754999
episode: 212 training return: tensor(301.8529, device='cuda:0')
episode: 213 training return: tensor(397.1952, device='cuda:0')
episode: 214 training return: tensor(424.7125, device='cuda:0')
episode: 215 training return: tensor(362.8318, device='cuda:0')
epoch: 54 test_true_pfm: 3356.3781482214267 sim_pfm: 361.80106251186226
episode: 216 training return: tensor(-174.5218, device='cuda:0')
episode: 217 training return: tensor(309.5487, device='cuda:0')
episode: 218 training return: tensor(358.7955, device='cuda:0')
episode: 219 training return: tensor(377.2511, device='cuda:0')
epoch: 55 test_true_pfm: 3353.1014517466087 sim_pfm: 364.7288711857206
episode: 220 training return: tensor(420.9527, device='cuda:0')
episode: 221 training return: tensor(393.5871, device='cuda:0')
episode: 222 training return: tensor(372.6473, device='cuda:0')
episode: 223 training return: tensor(335.4625, device='cuda:0')
epoch: 56 test_true_pfm: 3381.7272713976868 sim_pfm: 305.9869597892296
episode: 224 training return: tensor(294.1284, device='cuda:0')
episode: 225 training return: tensor(344.8809, device='cuda:0')
episode: 226 training return: tensor(386.4568, device='cuda:0')
episode: 227 training return: tensor(386.1776, device='cuda:0')
epoch: 57 test_true_pfm: 3409.7018146484083 sim_pfm: 384.8376841188292
episode: 228 training return: tensor(371.3827, device='cuda:0')
episode: 229 training return: tensor(353.5653, device='cuda:0')
episode: 230 training return: tensor(343.1892, device='cuda:0')
episode: 231 training return: tensor(52.9524, device='cuda:0')
epoch: 58 test_true_pfm: 3343.055583800624 sim_pfm: 348.8770210290095
episode: 232 training return: tensor(361.9581, device='cuda:0')
episode: 233 training return: tensor(227.5001, device='cuda:0')
episode: 234 training return: tensor(319.0806, device='cuda:0')
episode: 235 training return: tensor(336.6218, device='cuda:0')
epoch: 59 test_true_pfm: 3054.3475281841306 sim_pfm: 339.6178268167617
episode: 236 training return: tensor(342.4971, device='cuda:0')
episode: 237 training return: tensor(319.0753, device='cuda:0')
episode: 238 training return: tensor(414.8607, device='cuda:0')
episode: 239 training return: tensor(219.1028, device='cuda:0')
epoch: 60 test_true_pfm: 3339.654053986953 sim_pfm: 335.6111199411777
episode: 240 training return: tensor(360.6767, device='cuda:0')
episode: 241 training return: tensor(349.3857, device='cuda:0')
episode: 242 training return: tensor(308.5741, device='cuda:0')
episode: 243 training return: tensor(325.2668, device='cuda:0')
epoch: 61 test_true_pfm: 3415.3706496174395 sim_pfm: 338.52507036689593
episode: 244 training return: tensor(383.7786, device='cuda:0')
episode: 245 training return: tensor(325.0105, device='cuda:0')
episode: 246 training return: tensor(283.7553, device='cuda:0')
episode: 247 training return: tensor(373.4282, device='cuda:0')
epoch: 62 test_true_pfm: 3383.7611116767985 sim_pfm: 385.87450904891983
episode: 248 training return: tensor(285.9770, device='cuda:0')
episode: 249 training return: tensor(341.3087, device='cuda:0')
episode: 250 training return: tensor(305.3221, device='cuda:0')
episode: 251 training return: tensor(367.9345, device='cuda:0')
epoch: 63 test_true_pfm: 3436.5277229678154 sim_pfm: 381.2995421946592
episode: 252 training return: tensor(333.4128, device='cuda:0')
episode: 253 training return: tensor(305.3657, device='cuda:0')
episode: 254 training return: tensor(358.6985, device='cuda:0')
episode: 255 training return: tensor(387.0754, device='cuda:0')
epoch: 64 test_true_pfm: 3376.4468593316046 sim_pfm: 396.26083496017964
episode: 256 training return: tensor(329.2646, device='cuda:0')
episode: 257 training return: tensor(342.0069, device='cuda:0')
episode: 258 training return: tensor(378.1360, device='cuda:0')
episode: 259 training return: tensor(437.4415, device='cuda:0')
epoch: 65 test_true_pfm: 3394.3906775134205 sim_pfm: 342.3342442104865
episode: 260 training return: tensor(331.1557, device='cuda:0')
episode: 261 training return: tensor(357.7569, device='cuda:0')
episode: 262 training return: tensor(327.0969, device='cuda:0')
episode: 263 training return: tensor(-43.1333, device='cuda:0')
epoch: 66 test_true_pfm: 2964.0762971613954 sim_pfm: 367.3197812314417
episode: 264 training return: tensor(377.9159, device='cuda:0')
episode: 265 training return: tensor(331.1245, device='cuda:0')
episode: 266 training return: tensor(401.4054, device='cuda:0')
episode: 267 training return: tensor(342.0865, device='cuda:0')
epoch: 67 test_true_pfm: 3421.8129983790864 sim_pfm: 378.2798422444709
episode: 268 training return: tensor(304.0849, device='cuda:0')
episode: 269 training return: tensor(330.5750, device='cuda:0')
episode: 270 training return: tensor(368.4035, device='cuda:0')
episode: 271 training return: tensor(323.0895, device='cuda:0')
epoch: 68 test_true_pfm: 3424.5679864605627 sim_pfm: 372.66655516137445
episode: 272 training return: tensor(301.4379, device='cuda:0')
episode: 273 training return: tensor(331.5377, device='cuda:0')
episode: 274 training return: tensor(309.9279, device='cuda:0')
episode: 275 training return: tensor(190.6179, device='cuda:0')
epoch: 69 test_true_pfm: 3406.3304023489873 sim_pfm: 369.01719828508794
episode: 276 training return: tensor(367.5670, device='cuda:0')
episode: 277 training return: tensor(363.2200, device='cuda:0')
episode: 278 training return: tensor(385.9196, device='cuda:0')
episode: 279 training return: tensor(353.8070, device='cuda:0')
epoch: 70 test_true_pfm: 3390.0778333909852 sim_pfm: 331.56351143313805
episode: 280 training return: tensor(375.7887, device='cuda:0')
episode: 281 training return: tensor(305.4360, device='cuda:0')
episode: 282 training return: tensor(358.4550, device='cuda:0')
episode: 283 training return: tensor(379.4528, device='cuda:0')
epoch: 71 test_true_pfm: 3404.69940034854 sim_pfm: 236.64987495153522
episode: 284 training return: tensor(329.6422, device='cuda:0')
episode: 285 training return: tensor(256.2280, device='cuda:0')
episode: 286 training return: tensor(329.0403, device='cuda:0')
episode: 287 training return: tensor(314.7647, device='cuda:0')
epoch: 72 test_true_pfm: 3438.6474762284797 sim_pfm: 368.94435978508164
episode: 288 training return: tensor(364.8004, device='cuda:0')
episode: 289 training return: tensor(347.9363, device='cuda:0')
episode: 290 training return: tensor(376.2495, device='cuda:0')
episode: 291 training return: tensor(341.1154, device='cuda:0')
epoch: 73 test_true_pfm: 3432.7334656751045 sim_pfm: 319.2962210329715
episode: 292 training return: tensor(313.8197, device='cuda:0')
episode: 293 training return: tensor(336.7350, device='cuda:0')
episode: 294 training return: tensor(360.9190, device='cuda:0')
episode: 295 training return: tensor(341.0792, device='cuda:0')
epoch: 74 test_true_pfm: 3349.337905463383 sim_pfm: 370.16035048261983
episode: 296 training return: tensor(393.3755, device='cuda:0')
episode: 297 training return: tensor(315.8406, device='cuda:0')
episode: 298 training return: tensor(293.8521, device='cuda:0')
episode: 299 training return: tensor(244.5084, device='cuda:0')
epoch: 75 test_true_pfm: 3429.837310380952 sim_pfm: 385.4110473525555
episode: 300 training return: tensor(354.8174, device='cuda:0')
episode: 301 training return: tensor(399.4247, device='cuda:0')
episode: 302 training return: tensor(378.0168, device='cuda:0')
episode: 303 training return: tensor(316.0432, device='cuda:0')
epoch: 76 test_true_pfm: 2974.129282944475 sim_pfm: 376.16143987953546
episode: 304 training return: tensor(474.4957, device='cuda:0')
episode: 305 training return: tensor(345.5775, device='cuda:0')
episode: 306 training return: tensor(381.6128, device='cuda:0')
episode: 307 training return: tensor(365.1844, device='cuda:0')
epoch: 77 test_true_pfm: 3032.280359044995 sim_pfm: 336.5205660990323
episode: 308 training return: tensor(346.5238, device='cuda:0')
episode: 309 training return: tensor(297.9430, device='cuda:0')
episode: 310 training return: tensor(305.0643, device='cuda:0')
episode: 311 training return: tensor(296.5230, device='cuda:0')
epoch: 78 test_true_pfm: 3367.8781931388726 sim_pfm: 339.1445540600398
episode: 312 training return: tensor(358.0289, device='cuda:0')
episode: 313 training return: tensor(341.2185, device='cuda:0')
episode: 314 training return: tensor(432.4989, device='cuda:0')
episode: 315 training return: tensor(382.9010, device='cuda:0')
epoch: 79 test_true_pfm: 3410.783521250188 sim_pfm: 358.2950234443706
episode: 316 training return: tensor(418.9796, device='cuda:0')
episode: 317 training return: tensor(343.2715, device='cuda:0')
episode: 318 training return: tensor(351.4400, device='cuda:0')
episode: 319 training return: tensor(324.0173, device='cuda:0')
epoch: 80 test_true_pfm: 3357.6322682251357 sim_pfm: 353.76388712838525
episode: 320 training return: tensor(359.8353, device='cuda:0')
episode: 321 training return: tensor(340.3433, device='cuda:0')
episode: 322 training return: tensor(376.5326, device='cuda:0')
episode: 323 training return: tensor(365.9332, device='cuda:0')
epoch: 81 test_true_pfm: 3371.287250890506 sim_pfm: 330.92740430222085
episode: 324 training return: tensor(357.5618, device='cuda:0')
episode: 325 training return: tensor(393.5776, device='cuda:0')
episode: 326 training return: tensor(338.5959, device='cuda:0')
episode: 327 training return: tensor(363.1405, device='cuda:0')
epoch: 82 test_true_pfm: 3320.6798333318416 sim_pfm: 329.2670684029581
episode: 328 training return: tensor(380.1477, device='cuda:0')
episode: 329 training return: tensor(330.0363, device='cuda:0')
episode: 330 training return: tensor(329.5413, device='cuda:0')
episode: 331 training return: tensor(264.9300, device='cuda:0')
epoch: 83 test_true_pfm: 3377.3164925863407 sim_pfm: 326.4288151609168
episode: 332 training return: tensor(283.1104, device='cuda:0')
episode: 333 training return: tensor(310.0489, device='cuda:0')
episode: 334 training return: tensor(298.7682, device='cuda:0')
episode: 335 training return: tensor(331.1156, device='cuda:0')
epoch: 84 test_true_pfm: 3409.5038725933177 sim_pfm: 373.08880920151324
episode: 336 training return: tensor(293.3173, device='cuda:0')
episode: 337 training return: tensor(315.6920, device='cuda:0')
episode: 338 training return: tensor(327.6055, device='cuda:0')
episode: 339 training return: tensor(329.7016, device='cuda:0')
epoch: 85 test_true_pfm: 3334.00553323275 sim_pfm: 372.03619263249374
episode: 340 training return: tensor(373.1235, device='cuda:0')
episode: 341 training return: tensor(332.5990, device='cuda:0')
episode: 342 training return: tensor(458.4626, device='cuda:0')
episode: 343 training return: tensor(316.8368, device='cuda:0')
epoch: 86 test_true_pfm: 3453.2910688014726 sim_pfm: 354.9041690924011
episode: 344 training return: tensor(343.2449, device='cuda:0')
episode: 345 training return: tensor(328.1849, device='cuda:0')
episode: 346 training return: tensor(330.6126, device='cuda:0')
episode: 347 training return: tensor(338.7007, device='cuda:0')
epoch: 87 test_true_pfm: 3347.199501509407 sim_pfm: 346.95995228919975
episode: 348 training return: tensor(331.7160, device='cuda:0')
episode: 349 training return: tensor(314.9697, device='cuda:0')
episode: 350 training return: tensor(314.2883, device='cuda:0')
episode: 351 training return: tensor(444.2834, device='cuda:0')
epoch: 88 test_true_pfm: 3389.1871943143724 sim_pfm: 359.876359738914
episode: 352 training return: tensor(288.4989, device='cuda:0')
episode: 353 training return: tensor(97.2035, device='cuda:0')
episode: 354 training return: tensor(363.7238, device='cuda:0')
episode: 355 training return: tensor(264.4752, device='cuda:0')
epoch: 89 test_true_pfm: 3396.9407218126094 sim_pfm: 371.0076994207532
episode: 356 training return: tensor(275.1490, device='cuda:0')
episode: 357 training return: tensor(380.1809, device='cuda:0')
episode: 358 training return: tensor(356.9474, device='cuda:0')
episode: 359 training return: tensor(407.9447, device='cuda:0')
epoch: 90 test_true_pfm: 3410.9798340183274 sim_pfm: 366.17315133982146
episode: 360 training return: tensor(295.3052, device='cuda:0')
episode: 361 training return: tensor(384.6985, device='cuda:0')
episode: 362 training return: tensor(315.0791, device='cuda:0')
episode: 363 training return: tensor(348.5453, device='cuda:0')
epoch: 91 test_true_pfm: 3383.4522348451774 sim_pfm: 400.5532670179576
episode: 364 training return: tensor(214.4928, device='cuda:0')
episode: 365 training return: tensor(-97.4990, device='cuda:0')
episode: 366 training return: tensor(266.4599, device='cuda:0')
episode: 367 training return: tensor(350.8755, device='cuda:0')
epoch: 92 test_true_pfm: 3423.737782725437 sim_pfm: 349.26706223662285
episode: 368 training return: tensor(351.3547, device='cuda:0')
episode: 369 training return: tensor(325.8521, device='cuda:0')
episode: 370 training return: tensor(179.5009, device='cuda:0')
episode: 371 training return: tensor(324.8728, device='cuda:0')
epoch: 93 test_true_pfm: 3369.2107999779164 sim_pfm: 335.1020132147435
episode: 372 training return: tensor(377.1100, device='cuda:0')
episode: 373 training return: tensor(114.5261, device='cuda:0')
episode: 374 training return: tensor(344.0965, device='cuda:0')
episode: 375 training return: tensor(356.7255, device='cuda:0')
epoch: 94 test_true_pfm: 3381.1144871544525 sim_pfm: 364.6463859177699
episode: 376 training return: tensor(305.6281, device='cuda:0')
episode: 377 training return: tensor(392.2866, device='cuda:0')
episode: 378 training return: tensor(337.0059, device='cuda:0')
episode: 379 training return: tensor(385.0379, device='cuda:0')
epoch: 95 test_true_pfm: 3424.7870287240826 sim_pfm: 286.2801696124564
episode: 380 training return: tensor(325.1094, device='cuda:0')
episode: 381 training return: tensor(334.7449, device='cuda:0')
episode: 382 training return: tensor(192.9516, device='cuda:0')
episode: 383 training return: tensor(346.8183, device='cuda:0')
epoch: 96 test_true_pfm: 3370.507383977732 sim_pfm: 427.1244284166799
episode: 384 training return: tensor(303.2638, device='cuda:0')
episode: 385 training return: tensor(374.9253, device='cuda:0')
episode: 386 training return: tensor(339.2740, device='cuda:0')
episode: 387 training return: tensor(355.1082, device='cuda:0')
epoch: 97 test_true_pfm: 3426.9603531337525 sim_pfm: 380.8160206110236
episode: 388 training return: tensor(314.8927, device='cuda:0')
episode: 389 training return: tensor(295.6504, device='cuda:0')
episode: 390 training return: tensor(341.9698, device='cuda:0')
episode: 391 training return: tensor(299.2448, device='cuda:0')
epoch: 98 test_true_pfm: 3309.10051889733 sim_pfm: 341.39709448383655
episode: 392 training return: tensor(377.3112, device='cuda:0')
episode: 393 training return: tensor(263.4720, device='cuda:0')
episode: 394 training return: tensor(269.8429, device='cuda:0')
episode: 395 training return: tensor(320.8695, device='cuda:0')
epoch: 99 test_true_pfm: 3392.230083063889 sim_pfm: 359.711706983139
episode: 396 training return: tensor(381.7580, device='cuda:0')
episode: 397 training return: tensor(410.4590, device='cuda:0')
episode: 398 training return: tensor(330.3785, device='cuda:0')
episode: 399 training return: tensor(10.4455, device='cuda:0')
epoch: 100 test_true_pfm: 3468.0257580906527 sim_pfm: 406.41674935321015
episode: 400 training return: tensor(399.6038, device='cuda:0')
episode: 401 training return: tensor(15.1018, device='cuda:0')
episode: 402 training return: tensor(334.4007, device='cuda:0')
episode: 403 training return: tensor(377.6919, device='cuda:0')
epoch: 101 test_true_pfm: 3364.896507293863 sim_pfm: 374.6211679635259
episode: 404 training return: tensor(325.8652, device='cuda:0')
episode: 405 training return: tensor(330.2090, device='cuda:0')
episode: 406 training return: tensor(398.8550, device='cuda:0')
episode: 407 training return: tensor(299.5105, device='cuda:0')
epoch: 102 test_true_pfm: 3429.216964899511 sim_pfm: 362.9787435112812
episode: 408 training return: tensor(369.7041, device='cuda:0')
episode: 409 training return: tensor(357.5826, device='cuda:0')
episode: 410 training return: tensor(390.6796, device='cuda:0')
episode: 411 training return: tensor(310.9193, device='cuda:0')
epoch: 103 test_true_pfm: 3400.4779159276645 sim_pfm: 354.9578109289675
episode: 412 training return: tensor(346.6642, device='cuda:0')
episode: 413 training return: tensor(367.8506, device='cuda:0')
episode: 414 training return: tensor(361.2819, device='cuda:0')
episode: 415 training return: tensor(361.5031, device='cuda:0')
epoch: 104 test_true_pfm: 3415.1789424000453 sim_pfm: 373.1369365216233
episode: 416 training return: tensor(320.8399, device='cuda:0')
episode: 417 training return: tensor(365.2734, device='cuda:0')
episode: 418 training return: tensor(327.1587, device='cuda:0')
episode: 419 training return: tensor(366.2360, device='cuda:0')
epoch: 105 test_true_pfm: 3335.9233049094814 sim_pfm: 328.79962624392164
episode: 420 training return: tensor(366.6700, device='cuda:0')
episode: 421 training return: tensor(375.1671, device='cuda:0')
episode: 422 training return: tensor(328.8168, device='cuda:0')
episode: 423 training return: tensor(388.8074, device='cuda:0')
epoch: 106 test_true_pfm: 3453.7838602868324 sim_pfm: 379.06557842937764
episode: 424 training return: tensor(334.3083, device='cuda:0')
episode: 425 training return: tensor(283.4008, device='cuda:0')
episode: 426 training return: tensor(304.9908, device='cuda:0')
episode: 427 training return: tensor(335.5253, device='cuda:0')
epoch: 107 test_true_pfm: 3402.052742507547 sim_pfm: 343.42955948230036
episode: 428 training return: tensor(257.8978, device='cuda:0')
episode: 429 training return: tensor(254.1581, device='cuda:0')
episode: 430 training return: tensor(349.6744, device='cuda:0')
episode: 431 training return: tensor(374.5186, device='cuda:0')
epoch: 108 test_true_pfm: 3402.125417209643 sim_pfm: 404.1581543782183
episode: 432 training return: tensor(332.5844, device='cuda:0')
episode: 433 training return: tensor(350.0030, device='cuda:0')
episode: 434 training return: tensor(325.7803, device='cuda:0')
episode: 435 training return: tensor(277.3545, device='cuda:0')
epoch: 109 test_true_pfm: 3378.6907092882357 sim_pfm: 388.6365442166959
episode: 436 training return: tensor(427.1082, device='cuda:0')
episode: 437 training return: tensor(375.7386, device='cuda:0')
episode: 438 training return: tensor(295.5190, device='cuda:0')
episode: 439 training return: tensor(355.6819, device='cuda:0')
epoch: 110 test_true_pfm: 3406.6180909843297 sim_pfm: 369.45632621091016
episode: 440 training return: tensor(389.9041, device='cuda:0')
episode: 441 training return: tensor(356.9721, device='cuda:0')
episode: 442 training return: tensor(361.5656, device='cuda:0')
episode: 443 training return: tensor(300.6091, device='cuda:0')
epoch: 111 test_true_pfm: 3435.973387450182 sim_pfm: 352.9363642676229
episode: 444 training return: tensor(338.0447, device='cuda:0')
episode: 445 training return: tensor(339.5761, device='cuda:0')
episode: 446 training return: tensor(345.9396, device='cuda:0')
episode: 447 training return: tensor(338.5814, device='cuda:0')
epoch: 112 test_true_pfm: 3385.0789440768526 sim_pfm: 342.7808078212353
episode: 448 training return: tensor(369.2101, device='cuda:0')
episode: 449 training return: tensor(341.4794, device='cuda:0')
episode: 450 training return: tensor(261.5764, device='cuda:0')
episode: 451 training return: tensor(348.6219, device='cuda:0')
epoch: 113 test_true_pfm: 3353.7836071349248 sim_pfm: 337.4258772311635
episode: 452 training return: tensor(375.2222, device='cuda:0')
episode: 453 training return: tensor(345.0151, device='cuda:0')
episode: 454 training return: tensor(370.4276, device='cuda:0')
episode: 455 training return: tensor(348.4330, device='cuda:0')
epoch: 114 test_true_pfm: 3397.1939597389937 sim_pfm: 344.5985054379562
episode: 456 training return: tensor(375.9516, device='cuda:0')
episode: 457 training return: tensor(398.8980, device='cuda:0')
episode: 458 training return: tensor(317.0603, device='cuda:0')
episode: 459 training return: tensor(359.8972, device='cuda:0')
epoch: 115 test_true_pfm: 3350.7936574160453 sim_pfm: 330.4564048103833
episode: 460 training return: tensor(333.7695, device='cuda:0')
episode: 461 training return: tensor(310.1723, device='cuda:0')
episode: 462 training return: tensor(182.2989, device='cuda:0')
episode: 463 training return: tensor(354.4469, device='cuda:0')
epoch: 116 test_true_pfm: 3392.3597532140398 sim_pfm: 358.87514190375805
episode: 464 training return: tensor(302.2367, device='cuda:0')
episode: 465 training return: tensor(-116.1196, device='cuda:0')
episode: 466 training return: tensor(339.1715, device='cuda:0')
episode: 467 training return: tensor(345.4550, device='cuda:0')
epoch: 117 test_true_pfm: 3382.329187505619 sim_pfm: 378.4466341479953
episode: 468 training return: tensor(403.0643, device='cuda:0')
episode: 469 training return: tensor(331.0094, device='cuda:0')
episode: 470 training return: tensor(391.7326, device='cuda:0')
episode: 471 training return: tensor(376.3994, device='cuda:0')
epoch: 118 test_true_pfm: 3379.254453702164 sim_pfm: 376.65793932132266
episode: 472 training return: tensor(320.2952, device='cuda:0')
episode: 473 training return: tensor(-102.8864, device='cuda:0')
episode: 474 training return: tensor(305.0389, device='cuda:0')
episode: 475 training return: tensor(353.6080, device='cuda:0')
epoch: 119 test_true_pfm: 3428.623695855144 sim_pfm: 244.50371796446657
episode: 476 training return: tensor(315.6849, device='cuda:0')
episode: 477 training return: tensor(357.4049, device='cuda:0')
episode: 478 training return: tensor(338.5560, device='cuda:0')
episode: 479 training return: tensor(12.4853, device='cuda:0')
epoch: 120 test_true_pfm: 3355.5470573916896 sim_pfm: 365.0943701929743
episode: 480 training return: tensor(336.6685, device='cuda:0')
episode: 481 training return: tensor(339.8769, device='cuda:0')
episode: 482 training return: tensor(340.6811, device='cuda:0')
episode: 483 training return: tensor(331.6051, device='cuda:0')
epoch: 121 test_true_pfm: 3431.2694558532853 sim_pfm: 377.36416084532783
episode: 484 training return: tensor(391.7298, device='cuda:0')
episode: 485 training return: tensor(347.4001, device='cuda:0')
episode: 486 training return: tensor(315.3311, device='cuda:0')
episode: 487 training return: tensor(340.7005, device='cuda:0')
epoch: 122 test_true_pfm: 3304.1630446915137 sim_pfm: 387.5196349542239
episode: 488 training return: tensor(371.8393, device='cuda:0')
episode: 489 training return: tensor(430.6924, device='cuda:0')
episode: 490 training return: tensor(399.0508, device='cuda:0')
episode: 491 training return: tensor(389.5801, device='cuda:0')
epoch: 123 test_true_pfm: 3434.265851912483 sim_pfm: 359.6030455037738
episode: 492 training return: tensor(363.7886, device='cuda:0')
episode: 493 training return: tensor(372.3821, device='cuda:0')
episode: 494 training return: tensor(296.9104, device='cuda:0')
episode: 495 training return: tensor(327.1337, device='cuda:0')
epoch: 124 test_true_pfm: 3421.0627461142194 sim_pfm: 373.58038455837715
episode: 496 training return: tensor(340.9032, device='cuda:0')
episode: 497 training return: tensor(429.5841, device='cuda:0')
episode: 498 training return: tensor(365.3151, device='cuda:0')
episode: 499 training return: tensor(343.9104, device='cuda:0')
epoch: 125 test_true_pfm: 3436.3379518935308 sim_pfm: 373.1055975221777
episode: 500 training return: tensor(362.6015, device='cuda:0')
episode: 501 training return: tensor(314.9634, device='cuda:0')
episode: 502 training return: tensor(416.4178, device='cuda:0')
episode: 503 training return: tensor(337.3776, device='cuda:0')
epoch: 126 test_true_pfm: 3434.273530032035 sim_pfm: 379.2934824488766
episode: 504 training return: tensor(309.8045, device='cuda:0')
episode: 505 training return: tensor(361.9357, device='cuda:0')
episode: 506 training return: tensor(340.1169, device='cuda:0')
episode: 507 training return: tensor(349.8794, device='cuda:0')
epoch: 127 test_true_pfm: 3067.75119603874 sim_pfm: 353.9562409170321
episode: 508 training return: tensor(399.7169, device='cuda:0')
episode: 509 training return: tensor(413.2831, device='cuda:0')
episode: 510 training return: tensor(348.1864, device='cuda:0')
episode: 511 training return: tensor(279.9555, device='cuda:0')
epoch: 128 test_true_pfm: 3383.50653134528 sim_pfm: 371.26846070289804
episode: 512 training return: tensor(337.9202, device='cuda:0')
episode: 513 training return: tensor(375.0515, device='cuda:0')
episode: 514 training return: tensor(327.5357, device='cuda:0')
episode: 515 training return: tensor(331.7654, device='cuda:0')
epoch: 129 test_true_pfm: 3425.9400591553817 sim_pfm: 364.7148220502713
episode: 516 training return: tensor(375.4466, device='cuda:0')
episode: 517 training return: tensor(401.9199, device='cuda:0')
episode: 518 training return: tensor(332.5045, device='cuda:0')
episode: 519 training return: tensor(332.3611, device='cuda:0')
epoch: 130 test_true_pfm: 3359.8228951534597 sim_pfm: 339.2025565664517
episode: 520 training return: tensor(343.7765, device='cuda:0')
episode: 521 training return: tensor(294.9698, device='cuda:0')
episode: 522 training return: tensor(269.3061, device='cuda:0')
episode: 523 training return: tensor(327.5480, device='cuda:0')
epoch: 131 test_true_pfm: 3403.8263233540597 sim_pfm: 345.57454175222665
episode: 524 training return: tensor(254.5393, device='cuda:0')
episode: 525 training return: tensor(343.0611, device='cuda:0')
episode: 526 training return: tensor(313.3794, device='cuda:0')
episode: 527 training return: tensor(371.7399, device='cuda:0')
epoch: 132 test_true_pfm: 3420.471986321447 sim_pfm: 390.7555198431849
episode: 528 training return: tensor(-101.3887, device='cuda:0')
episode: 529 training return: tensor(288.9355, device='cuda:0')
episode: 530 training return: tensor(357.9080, device='cuda:0')
episode: 531 training return: tensor(333.0193, device='cuda:0')
epoch: 133 test_true_pfm: 3409.4284638913814 sim_pfm: 364.4392084904248
episode: 532 training return: tensor(348.4182, device='cuda:0')
episode: 533 training return: tensor(314.2574, device='cuda:0')
episode: 534 training return: tensor(357.7886, device='cuda:0')
episode: 535 training return: tensor(404.4704, device='cuda:0')
epoch: 134 test_true_pfm: 3432.6366851980238 sim_pfm: 359.9830734070274
episode: 536 training return: tensor(386.8973, device='cuda:0')
episode: 537 training return: tensor(363.6533, device='cuda:0')
episode: 538 training return: tensor(349.5157, device='cuda:0')
episode: 539 training return: tensor(329.6958, device='cuda:0')
epoch: 135 test_true_pfm: 3445.2588475937105 sim_pfm: 391.93820541879785
episode: 540 training return: tensor(382.7631, device='cuda:0')
episode: 541 training return: tensor(383.2326, device='cuda:0')
episode: 542 training return: tensor(327.0721, device='cuda:0')
episode: 543 training return: tensor(353.1801, device='cuda:0')
epoch: 136 test_true_pfm: 3424.779218431109 sim_pfm: 399.959561428938
episode: 544 training return: tensor(293.3155, device='cuda:0')
episode: 545 training return: tensor(386.2755, device='cuda:0')
episode: 546 training return: tensor(355.1597, device='cuda:0')
episode: 547 training return: tensor(388.0629, device='cuda:0')
epoch: 137 test_true_pfm: 3378.0290124264334 sim_pfm: 378.4889778207192
episode: 548 training return: tensor(330.9578, device='cuda:0')
episode: 549 training return: tensor(332.7267, device='cuda:0')
episode: 550 training return: tensor(333.4782, device='cuda:0')
episode: 551 training return: tensor(366.5960, device='cuda:0')
epoch: 138 test_true_pfm: 3405.709398551205 sim_pfm: 366.52493481447647
episode: 552 training return: tensor(300.5406, device='cuda:0')
episode: 553 training return: tensor(348.1280, device='cuda:0')
episode: 554 training return: tensor(367.5260, device='cuda:0')
episode: 555 training return: tensor(361.7878, device='cuda:0')
epoch: 139 test_true_pfm: 3436.8744518833023 sim_pfm: 376.4233433905368
episode: 556 training return: tensor(330.6546, device='cuda:0')
episode: 557 training return: tensor(324.3508, device='cuda:0')
episode: 558 training return: tensor(341.6632, device='cuda:0')
episode: 559 training return: tensor(319.6474, device='cuda:0')
epoch: 140 test_true_pfm: 3506.563511443261 sim_pfm: 389.1706463409549
episode: 560 training return: tensor(350.3064, device='cuda:0')
episode: 561 training return: tensor(345.3080, device='cuda:0')
episode: 562 training return: tensor(368.5392, device='cuda:0')
episode: 563 training return: tensor(352.1110, device='cuda:0')
epoch: 141 test_true_pfm: 3419.804433875688 sim_pfm: 360.60364721448667
episode: 564 training return: tensor(392.8639, device='cuda:0')
episode: 565 training return: tensor(301.4216, device='cuda:0')
episode: 566 training return: tensor(343.6714, device='cuda:0')
episode: 567 training return: tensor(318.7990, device='cuda:0')
epoch: 142 test_true_pfm: 3051.21909136994 sim_pfm: 385.73189440030063
episode: 568 training return: tensor(325.5901, device='cuda:0')
episode: 569 training return: tensor(361.5906, device='cuda:0')
episode: 570 training return: tensor(389.6849, device='cuda:0')
episode: 571 training return: tensor(371.8841, device='cuda:0')
epoch: 143 test_true_pfm: 3409.580984388386 sim_pfm: 387.18892579392804
episode: 572 training return: tensor(260.9330, device='cuda:0')
episode: 573 training return: tensor(388.1193, device='cuda:0')
episode: 574 training return: tensor(321.7314, device='cuda:0')
episode: 575 training return: tensor(329.8305, device='cuda:0')
epoch: 144 test_true_pfm: 3358.1172157188885 sim_pfm: 349.36030519676086
episode: 576 training return: tensor(343.8929, device='cuda:0')
episode: 577 training return: tensor(316.7898, device='cuda:0')
episode: 578 training return: tensor(355.6652, device='cuda:0')
episode: 579 training return: tensor(191.9797, device='cuda:0')
epoch: 145 test_true_pfm: 3397.4403762092547 sim_pfm: 340.5049417616101
episode: 580 training return: tensor(306.0134, device='cuda:0')
episode: 581 training return: tensor(401.0883, device='cuda:0')
episode: 582 training return: tensor(363.0997, device='cuda:0')
episode: 583 training return: tensor(172.2156, device='cuda:0')
epoch: 146 test_true_pfm: 3392.5702627448154 sim_pfm: 386.08185502150445
episode: 584 training return: tensor(317.2674, device='cuda:0')
episode: 585 training return: tensor(406.8312, device='cuda:0')
episode: 586 training return: tensor(341.3606, device='cuda:0')
episode: 587 training return: tensor(315.7220, device='cuda:0')
epoch: 147 test_true_pfm: 3416.1764942409586 sim_pfm: 384.9628962846473
episode: 588 training return: tensor(358.2114, device='cuda:0')
episode: 589 training return: tensor(340.0821, device='cuda:0')
episode: 590 training return: tensor(380.6688, device='cuda:0')
episode: 591 training return: tensor(333.1451, device='cuda:0')
epoch: 148 test_true_pfm: 3449.4528614013507 sim_pfm: 379.98322899562
episode: 592 training return: tensor(386.6883, device='cuda:0')
episode: 593 training return: tensor(294.5400, device='cuda:0')
episode: 594 training return: tensor(382.7461, device='cuda:0')
episode: 595 training return: tensor(397.0497, device='cuda:0')
epoch: 149 test_true_pfm: 3393.920483339362 sim_pfm: 376.3830096288196
episode: 596 training return: tensor(343.4154, device='cuda:0')
episode: 597 training return: tensor(322.5567, device='cuda:0')
episode: 598 training return: tensor(392.8300, device='cuda:0')
episode: 599 training return: tensor(392.8291, device='cuda:0')
epoch: 150 test_true_pfm: 3433.5741067553136 sim_pfm: 366.70518342993455
