['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '4', '--data', '30000', '--sub']
epoch: 0 training_loss 0.23941105119884015 test_loss: 0.18619686365127563
epoch: 1 training_loss 0.1577658184617758 test_loss: 0.1298947215080261
epoch: 2 training_loss 0.14087820246815683 test_loss: 0.12999142408370973
epoch: 3 training_loss 0.1204774758592248 test_loss: 0.14350219964981079
epoch: 4 training_loss 0.11859707217663526 test_loss: 0.1182294487953186
epoch: 5 training_loss 0.12111104171723128 test_loss: 0.09970581531524658
epoch: 6 training_loss 0.10978940237313509 test_loss: 0.10414657592773438
epoch: 7 training_loss 0.10890513375401496 test_loss: 0.09749038815498352
epoch: 8 training_loss 0.11115957221016287 test_loss: 0.10912472009658813
epoch: 9 training_loss 0.11178001351654529 test_loss: 0.09841479659080506
epoch: 10 training_loss 0.10960641670972109 test_loss: 0.09308596849441528
epoch: 11 training_loss 0.10761583518236875 test_loss: 0.10777232646942139
epoch: 12 training_loss 0.10862308256328106 test_loss: 0.109114670753479
epoch: 13 training_loss 0.10194199923425913 test_loss: 0.10076624155044556
epoch: 14 training_loss 0.10733879774808884 test_loss: 0.10486022233963013
epoch: 15 training_loss 0.10674012869596482 test_loss: 0.10315190553665161
epoch: 16 training_loss 0.10602450786158442 test_loss: 0.08711810111999511
epoch: 17 training_loss 0.09666489988565445 test_loss: 0.09720371961593628
epoch: 18 training_loss 0.09764681966975332 test_loss: 0.10614798069000245
epoch: 19 training_loss 0.09770173016935586 test_loss: 0.09509169459342956
epoch: 20 training_loss 0.10132848117500544 test_loss: 0.1075996994972229
epoch: 21 training_loss 0.1035713722370565 test_loss: 0.10677968263626099
epoch: 22 training_loss 0.0956357504427433 test_loss: 0.09838131070137024
epoch: 23 training_loss 0.10052155394107104 test_loss: 0.09233629703521729
epoch: 24 training_loss 0.10295607002452016 test_loss: 0.08307206630706787
epoch: 25 training_loss 0.09915401641279459 test_loss: 0.0932776689529419
epoch: 26 training_loss 0.09694365106523037 test_loss: 0.09093517661094666
epoch: 27 training_loss 0.10043237064033747 test_loss: 0.0878133773803711
epoch: 28 training_loss 0.09608208164572715 test_loss: 0.09032622575759888
epoch: 29 training_loss 0.10598524052649737 test_loss: 0.1158382773399353
epoch: 30 training_loss 0.0934712919779122 test_loss: 0.09193052649497986
epoch: 31 training_loss 0.09748188614845275 test_loss: 0.10556203126907349
epoch: 32 training_loss 0.09621460787951946 test_loss: 0.10823295116424561
epoch: 33 training_loss 0.09444405257701874 test_loss: 0.0839199423789978
epoch: 34 training_loss 0.09461665518581867 test_loss: 0.09485487341880798
epoch: 35 training_loss 0.09898247327655554 test_loss: 0.0874857783317566
epoch: 36 training_loss 0.09494124056771397 test_loss: 0.08679878115653991
epoch: 37 training_loss 0.09448292952030897 test_loss: 0.09925398826599122
epoch: 38 training_loss 0.09189879696816206 test_loss: 0.08823426961898803
epoch: 39 training_loss 0.09261665569618344 test_loss: 0.088016676902771
epoch: 40 training_loss 0.09605502620339394 test_loss: 0.09842078685760498
epoch: 41 training_loss 0.09337109526619315 test_loss: 0.08269951343536378
epoch: 42 training_loss 0.0914485554676503 test_loss: 0.09045032262802125
epoch: 43 training_loss 0.09754253443330527 test_loss: 0.09620903730392456
epoch: 44 training_loss 0.08996178731322288 test_loss: 0.10544999837875366
epoch: 45 training_loss 0.09086335940286518 test_loss: 0.09291772246360779
epoch: 46 training_loss 0.09733065567910672 test_loss: 0.09845196008682251
epoch: 47 training_loss 0.09681267362087966 test_loss: 0.11155449151992798
epoch: 48 training_loss 0.09471564618870616 test_loss: 0.0928142249584198
epoch: 49 training_loss 0.09543330769985914 test_loss: 0.07995669841766358
epoch: 50 training_loss 0.08815672243013978 test_loss: 0.08514068722724914
epoch: 51 training_loss 0.0927592203579843 test_loss: 0.0977558434009552
epoch: 52 training_loss 0.09245614197105169 test_loss: 0.08813183307647705
epoch: 53 training_loss 0.09228348707780241 test_loss: 0.10041729211807252
epoch: 54 training_loss 0.08910532541573048 test_loss: 0.0911183774471283
epoch: 55 training_loss 0.08558528907597066 test_loss: 0.08916867971420288
epoch: 56 training_loss 0.08952889329753816 test_loss: 0.0810544490814209
epoch: 57 training_loss 0.09860887788236142 test_loss: 0.1203510046005249
epoch: 58 training_loss 0.09086098708212376 test_loss: 0.09763954877853394
epoch: 59 training_loss 0.09522133165970445 test_loss: 0.09789195656776428
epoch: 60 training_loss 0.0933533356525004 test_loss: 0.09609614610671997
epoch: 61 training_loss 0.09233889454975724 test_loss: 0.08969988226890564
epoch: 62 training_loss 0.09153547853231431 test_loss: 0.10195842981338502
epoch: 63 training_loss 0.08384585896506906 test_loss: 0.10157883167266846
epoch: 64 training_loss 0.0812211830355227 test_loss: 0.09800849556922912
epoch: 65 training_loss 0.09440602611750365 test_loss: 0.09921900629997253
epoch: 66 training_loss 0.086897081322968 test_loss: 0.08017623424530029
epoch: 67 training_loss 0.09239261448383332 test_loss: 0.0951441466808319
epoch: 68 training_loss 0.08862174239009618 test_loss: 0.09325056672096252
epoch: 69 training_loss 0.08464599875733256 test_loss: 0.0884955108165741
epoch: 70 training_loss 0.08412468375638127 test_loss: 0.08661695718765258
epoch: 71 training_loss 0.09094408133998513 test_loss: 0.09064664840698242
epoch: 72 training_loss 0.08534187285229564 test_loss: 0.08859390020370483
epoch: 73 training_loss 0.08610121781006455 test_loss: 0.07810127139091491
epoch: 74 training_loss 0.0941572324372828 test_loss: 0.11451959609985352
epoch: 75 training_loss 0.08311291484162212 test_loss: 0.08199567198753357
epoch: 76 training_loss 0.08770052501931787 test_loss: 0.08967742919921876
epoch: 77 training_loss 0.08627862373366951 test_loss: 0.095253324508667
epoch: 78 training_loss 0.08660432818345726 test_loss: 0.10323612689971924
epoch: 79 training_loss 0.0919640314206481 test_loss: 0.08333355188369751
epoch: 80 training_loss 0.08811858776956796 test_loss: 0.12708760499954225
epoch: 81 training_loss 0.08479075431823731 test_loss: 0.1048578143119812
epoch: 82 training_loss 0.08566469991579652 test_loss: 0.09471880793571472
epoch: 83 training_loss 0.08763409879058599 test_loss: 0.09942143559455871
epoch: 84 training_loss 0.0843093154206872 test_loss: 0.08556302189826966
epoch: 85 training_loss 0.09160486148670316 test_loss: 0.09866223931312561
epoch: 86 training_loss 0.08692617671564222 test_loss: 0.08267811536788941
epoch: 87 training_loss 0.0852695238403976 test_loss: 0.08234306573867797
epoch: 88 training_loss 0.0800631407648325 test_loss: 0.09269163608551026
epoch: 89 training_loss 0.08646968798711896 test_loss: 0.08429288864135742
epoch: 90 training_loss 0.08844851698726415 test_loss: 0.08952946066856385
epoch: 91 training_loss 0.09468737652525305 test_loss: 0.08914304971694946
epoch: 92 training_loss 0.08404489867389202 test_loss: 0.09221119284629822
epoch: 93 training_loss 0.08760695956647396 test_loss: 0.08680252432823181
epoch: 94 training_loss 0.08693637397140265 test_loss: 0.09329570531845092
epoch: 95 training_loss 0.0892925321124494 test_loss: 0.08891732692718506
epoch: 96 training_loss 0.08400848399847746 test_loss: 0.09247686862945556
epoch: 97 training_loss 0.07967728599905968 test_loss: 0.08267483711242676
epoch: 98 training_loss 0.08254829226061702 test_loss: 0.08922700881958008
epoch: 99 training_loss 0.08108208777382969 test_loss: 0.1075502872467041
epoch: 100 training_loss 0.08648279405198991 test_loss: 0.09869458079338074
epoch: 101 training_loss 0.08198950422927737 test_loss: 0.09808480143547058
epoch: 102 training_loss 0.08651479557156563 test_loss: 0.10641413927078247
epoch: 103 training_loss 0.08819549084641039 test_loss: 0.102619469165802
epoch: 104 training_loss 0.09108824668452144 test_loss: 0.08841565847396851
epoch: 105 training_loss 0.0808155345916748 test_loss: 0.10054347515106202
epoch: 106 training_loss 0.0833559351786971 test_loss: 0.09035177826881409
epoch: 107 training_loss 0.09472766857594252 test_loss: 0.0989458441734314
epoch: 108 training_loss 0.07914299989119172 test_loss: 0.08408724069595337
epoch: 109 training_loss 0.09214071214199065 test_loss: 0.0918110728263855
epoch: 110 training_loss 0.0910124391131103 test_loss: 0.09581544995307922
epoch: 111 training_loss 0.08610668251290918 test_loss: 0.11035029888153076
epoch: 112 training_loss 0.08181831978261471 test_loss: 0.0918673574924469
epoch: 113 training_loss 0.08050283497199416 test_loss: 0.0883946418762207
epoch: 114 training_loss 0.0836849919334054 test_loss: 0.09599689841270446
epoch: 115 training_loss 0.08566222520545125 test_loss: 0.09211546182632446
epoch: 116 training_loss 0.08916692789644003 test_loss: 0.094022136926651
epoch: 117 training_loss 0.08480409373529256 test_loss: 0.09234050512313843
epoch: 118 training_loss 0.09008680615574122 test_loss: 0.08316859006881713
epoch: 119 training_loss 0.0788181160017848 test_loss: 0.09188883304595948
epoch: 120 training_loss 0.08547395771369339 test_loss: 0.1101421594619751
epoch: 121 training_loss 0.08265639459714294 test_loss: 0.0979038953781128
epoch: 122 training_loss 0.08274182355031372 test_loss: 0.08584657907485962
epoch: 123 training_loss 0.08222877386026084 test_loss: 0.09283310174942017
epoch: 124 training_loss 0.0903860410116613 test_loss: 0.08524717092514038
epoch: 125 training_loss 0.08357837993651629 test_loss: 0.10196583271026612
epoch: 126 training_loss 0.07970550121739507 test_loss: 0.11292364597320556
epoch: 127 training_loss 0.07661975804716349 test_loss: 0.08759799003601074
epoch: 128 training_loss 0.08793858505785465 test_loss: 0.082495778799057
epoch: 129 training_loss 0.088226359449327 test_loss: 0.09453382492065429
epoch: 130 training_loss 0.08607989102602005 test_loss: 0.09717891216278077
epoch: 131 training_loss 0.0807482492364943 test_loss: 0.08476218581199646
epoch: 132 training_loss 0.0890250371210277 test_loss: 0.09258469343185424
epoch: 133 training_loss 0.08341902785003186 test_loss: 0.08786901831626892
epoch: 134 training_loss 0.07303539740853011 test_loss: 0.09973098635673523
epoch: 135 training_loss 0.07593647946137935 test_loss: 0.08994100093841553
epoch: 136 training_loss 0.0793618007004261 test_loss: 0.09234658479690552
epoch: 137 training_loss 0.08076863350346684 test_loss: 0.09597328305244446
epoch: 138 training_loss 0.08216588546521962 test_loss: 0.10492640733718872
epoch: 139 training_loss 0.08192182529717684 test_loss: 0.09512832164764404
epoch: 140 training_loss 0.08120485773310065 test_loss: 0.10140063762664794
epoch: 141 training_loss 0.07961604168638586 test_loss: 0.10436466932296753
epoch: 142 training_loss 0.08531314373016358 test_loss: 0.09004828929901124
epoch: 143 training_loss 0.08090874772518873 test_loss: 0.10111616849899292
epoch: 144 training_loss 0.07972170585766435 test_loss: 0.10005440711975097
epoch: 145 training_loss 0.08209224069491029 test_loss: 0.10715991258621216
epoch: 146 training_loss 0.0824862514808774 test_loss: 0.0978554368019104
epoch: 147 training_loss 0.08249134542420507 test_loss: 0.09364245533943176
epoch: 148 training_loss 0.07573498636484147 test_loss: 0.09504485726356507
epoch: 149 training_loss 0.08162331720814109 test_loss: 0.09337493777275085
epoch: 0 training_loss 39.11367296218872 test_loss: 20.693998718261717
epoch: 1 training_loss 16.44255886077881 test_loss: 14.171928405761719
epoch: 2 training_loss 12.259195623397828 test_loss: 10.994979858398438
epoch: 3 training_loss 10.363492078781128 test_loss: 9.380097961425781
epoch: 4 training_loss 9.119580993652344 test_loss: 8.944815826416015
epoch: 5 training_loss 8.370843596458435 test_loss: 7.980884552001953
epoch: 6 training_loss 7.516112756729126 test_loss: 7.418941497802734
epoch: 7 training_loss 7.260636868476868 test_loss: 7.027689361572266
epoch: 8 training_loss 6.835918865203857 test_loss: 6.667064666748047
epoch: 9 training_loss 6.3878932762146 test_loss: 6.215460586547851
epoch: 10 training_loss 6.200330462455749 test_loss: 6.0695960998535154
epoch: 11 training_loss 5.922536935806274 test_loss: 5.674396514892578
epoch: 12 training_loss 5.59752938747406 test_loss: 5.598408889770508
epoch: 13 training_loss 5.386002554893493 test_loss: 5.4647777557373045
epoch: 14 training_loss 5.182391848564148 test_loss: 5.1634765625
epoch: 15 training_loss 5.061943323612213 test_loss: 5.1283203125
epoch: 16 training_loss 4.843593332767487 test_loss: 4.745575332641602
epoch: 17 training_loss 4.71377649307251 test_loss: 4.6768798828125
epoch: 18 training_loss 4.540386304855347 test_loss: 4.580492782592773
epoch: 19 training_loss 4.46725341796875 test_loss: 4.480842208862304
epoch: 20 training_loss 4.510808126926422 test_loss: 4.56195182800293
epoch: 21 training_loss 4.226233637332916 test_loss: 4.375382614135742
epoch: 22 training_loss 4.149679884910584 test_loss: 4.010940170288086
epoch: 23 training_loss 4.069142019748687 test_loss: 4.115145874023438
epoch: 24 training_loss 3.909090049266815 test_loss: 4.0247344970703125
epoch: 25 training_loss 3.937089765071869 test_loss: 3.890340805053711
epoch: 26 training_loss 3.879266016483307 test_loss: 3.8881649017333983
epoch: 27 training_loss 3.731448621749878 test_loss: 3.720927047729492
epoch: 28 training_loss 3.771118519306183 test_loss: 3.682827377319336
epoch: 29 training_loss 3.7151905941963195 test_loss: 3.667061996459961
epoch: 30 training_loss 3.5932175588607786 test_loss: 3.5640853881835937
epoch: 31 training_loss 3.5738739490509035 test_loss: 3.700814437866211
epoch: 32 training_loss 3.46137823343277 test_loss: 3.434707260131836
epoch: 33 training_loss 3.5417200446128847 test_loss: 3.653779983520508
epoch: 34 training_loss 3.457836997509003 test_loss: 3.72027702331543
epoch: 35 training_loss 3.349847161769867 test_loss: 3.4263538360595702
epoch: 36 training_loss 3.3646272563934327 test_loss: 3.4606014251708985
epoch: 37 training_loss 3.304149694442749 test_loss: 3.366282272338867
epoch: 38 training_loss 3.3187459778785704 test_loss: 3.4630142211914063
epoch: 39 training_loss 3.285391924381256 test_loss: 3.3368144989013673
epoch: 40 training_loss 3.2579135131835937 test_loss: 3.343050003051758
epoch: 41 training_loss 3.2562182211875914 test_loss: 3.1392915725708006
epoch: 42 training_loss 3.162337839603424 test_loss: 3.1982648849487303
epoch: 43 training_loss 3.187942428588867 test_loss: 3.2140064239501953
epoch: 44 training_loss 3.2249401903152464 test_loss: 3.1906837463378905
epoch: 45 training_loss 3.1278339409828186 test_loss: 3.196313667297363
epoch: 46 training_loss 3.083617353439331 test_loss: 3.184120750427246
epoch: 47 training_loss 3.083063361644745 test_loss: 3.225737380981445
epoch: 48 training_loss 3.03160258769989 test_loss: 3.0985057830810545
epoch: 49 training_loss 3.0307612133026125 test_loss: 2.9841215133666994
epoch: 50 training_loss 2.966478204727173 test_loss: 3.0569732666015623
epoch: 51 training_loss 3.0575612664222716 test_loss: 3.119719123840332
epoch: 52 training_loss 2.963589334487915 test_loss: 2.976670265197754
epoch: 53 training_loss 2.963344476222992 test_loss: 2.935721778869629
epoch: 54 training_loss 2.9447488379478455 test_loss: 2.9418087005615234
epoch: 55 training_loss 2.942388517856598 test_loss: 2.8776727676391602
epoch: 56 training_loss 2.8601818227767946 test_loss: 2.8625965118408203
epoch: 57 training_loss 2.855828380584717 test_loss: 2.8422603607177734
epoch: 58 training_loss 2.9269321870803835 test_loss: 3.0123416900634767
epoch: 59 training_loss 2.8622859859466554 test_loss: 2.9581815719604494
epoch: 60 training_loss 2.748222522735596 test_loss: 2.8025007247924805
epoch: 61 training_loss 2.835314118862152 test_loss: 2.7719850540161133
epoch: 62 training_loss 2.815086703300476 test_loss: 2.913436508178711
epoch: 63 training_loss 2.812629005908966 test_loss: 2.9034679412841795
epoch: 64 training_loss 2.7580657148361207 test_loss: 2.716025733947754
epoch: 65 training_loss 2.760471532344818 test_loss: 2.823528289794922
epoch: 66 training_loss 2.658377020359039 test_loss: 2.689921760559082
epoch: 67 training_loss 2.7295797944068907 test_loss: 3.0188045501708984
epoch: 68 training_loss 2.763249728679657 test_loss: 2.9140424728393555
epoch: 69 training_loss 2.6767852687835694 test_loss: 2.729573631286621
epoch: 70 training_loss 2.7078280377388 test_loss: 2.689884567260742
epoch: 71 training_loss 2.70567138671875 test_loss: 2.81893367767334
epoch: 72 training_loss 2.659027121067047 test_loss: 2.5913440704345705
epoch: 73 training_loss 2.6361380565166472 test_loss: 2.6809768676757812
epoch: 74 training_loss 2.6623185801506044 test_loss: 2.816404342651367
epoch: 75 training_loss 2.6321780252456666 test_loss: 2.7328908920288084
epoch: 76 training_loss 2.6141117751598357 test_loss: 2.7217744827270507
epoch: 77 training_loss 2.5872396445274353 test_loss: 2.757661056518555
epoch: 78 training_loss 2.5569930362701414 test_loss: 2.6367773056030273
epoch: 79 training_loss 2.574711220264435 test_loss: 2.6840997695922852
epoch: 80 training_loss 2.5974891769886015 test_loss: 2.6117029190063477
epoch: 81 training_loss 2.5653740584850313 test_loss: 2.492171859741211
epoch: 82 training_loss 2.591261234283447 test_loss: 2.6804166793823243
epoch: 83 training_loss 2.6092618775367735 test_loss: 2.5419290542602537
epoch: 84 training_loss 2.548716571331024 test_loss: 2.6323591232299806
epoch: 85 training_loss 2.567634494304657 test_loss: 2.6567781448364256
epoch: 86 training_loss 2.5102558982372285 test_loss: 2.4910663604736327
epoch: 87 training_loss 2.5198224341869353 test_loss: 2.515072250366211
epoch: 88 training_loss 2.523364711999893 test_loss: 2.6648736953735352
epoch: 89 training_loss 2.5052983379364013 test_loss: 2.7540836334228516
epoch: 90 training_loss 2.499576666355133 test_loss: 2.593465805053711
epoch: 91 training_loss 2.5139256465435027 test_loss: 2.4937076568603516
epoch: 92 training_loss 2.4482451677322388 test_loss: 2.3728759765625
epoch: 93 training_loss 2.462206962108612 test_loss: 2.433095932006836
epoch: 94 training_loss 2.4901256775856018 test_loss: 2.489984703063965
epoch: 95 training_loss 2.410593172311783 test_loss: 2.572539138793945
epoch: 96 training_loss 2.4934013795852663 test_loss: 2.5621320724487306
epoch: 97 training_loss 2.4378103125095367 test_loss: 2.609147834777832
epoch: 98 training_loss 2.5096760880947113 test_loss: 2.500510025024414
epoch: 99 training_loss 2.4021431279182432 test_loss: 2.3930021286010743
epoch: 100 training_loss 2.4486542284488677 test_loss: 2.5052444458007814
epoch: 101 training_loss 2.4229832923412324 test_loss: 2.585044097900391
epoch: 102 training_loss 2.407940533161163 test_loss: 2.3821807861328126
epoch: 103 training_loss 2.422007474899292 test_loss: 2.4839672088623046
epoch: 104 training_loss 2.3862709641456603 test_loss: 2.3977684020996093
epoch: 105 training_loss 2.4425692951679228 test_loss: 2.4131795883178713
epoch: 106 training_loss 2.411687760353088 test_loss: 2.457125663757324
epoch: 107 training_loss 2.3736713135242464 test_loss: 2.3786211013793945
epoch: 108 training_loss 2.430543922185898 test_loss: 2.544205665588379
epoch: 109 training_loss 2.403252991437912 test_loss: 2.523369789123535
epoch: 110 training_loss 2.30232305765152 test_loss: 2.342155647277832
epoch: 111 training_loss 2.3494663977622987 test_loss: 2.383199691772461
epoch: 112 training_loss 2.3672753036022187 test_loss: 2.3756954193115236
epoch: 113 training_loss 2.348395006656647 test_loss: 2.4679071426391603
epoch: 114 training_loss 2.3436156916618347 test_loss: 2.3242729187011717
epoch: 115 training_loss 2.322486298084259 test_loss: 2.4369115829467773
epoch: 116 training_loss 2.3917936265468596 test_loss: 2.2804567337036135
epoch: 117 training_loss 2.3044561076164247 test_loss: 2.3668994903564453
epoch: 118 training_loss 2.2975839257240294 test_loss: 2.4235021591186525
epoch: 119 training_loss 2.2793619871139525 test_loss: 2.3929391860961915
epoch: 120 training_loss 2.298282902240753 test_loss: 2.4226335525512694
epoch: 121 training_loss 2.2767057108879087 test_loss: 2.3726953506469726
epoch: 122 training_loss 2.3613053584098815 test_loss: 2.3953554153442385
epoch: 123 training_loss 2.316880110502243 test_loss: 2.4258964538574217
epoch: 124 training_loss 2.2551471841335298 test_loss: 2.3894626617431642
epoch: 125 training_loss 2.34041099190712 test_loss: 2.305065727233887
epoch: 126 training_loss 2.2292564737796785 test_loss: 2.4171445846557615
epoch: 127 training_loss 2.2848883128166197 test_loss: 2.392485427856445
epoch: 128 training_loss 2.2226022517681123 test_loss: 2.1989850997924805
epoch: 129 training_loss 2.2643577957153322 test_loss: 2.345642852783203
epoch: 130 training_loss 2.2799339938163756 test_loss: 2.313381576538086
epoch: 131 training_loss 2.2497846484184265 test_loss: 2.3751516342163086
epoch: 132 training_loss 2.2513812029361726 test_loss: 2.487850379943848
epoch: 133 training_loss 2.2304159033298494 test_loss: 2.3253374099731445
epoch: 134 training_loss 2.2109920394420626 test_loss: 2.3447946548461913
epoch: 135 training_loss 2.2822114861011507 test_loss: 2.2573007583618163
epoch: 136 training_loss 2.223938748836517 test_loss: 2.2426374435424803
epoch: 137 training_loss 2.2694345140457153 test_loss: 2.43770809173584
epoch: 138 training_loss 2.219228068590164 test_loss: 2.2044158935546876
epoch: 139 training_loss 2.232124338150024 test_loss: 2.373331832885742
epoch: 140 training_loss 2.2346927344799044 test_loss: 2.194687271118164
epoch: 141 training_loss 2.237942924499512 test_loss: 2.4237857818603517
epoch: 142 training_loss 2.220159052610397 test_loss: 2.3419981002807617
epoch: 143 training_loss 2.2159313201904296 test_loss: 2.3712461471557615
epoch: 144 training_loss 2.236451687812805 test_loss: 2.2968753814697265
epoch: 145 training_loss 2.1851269221305847 test_loss: 2.2484418869018556
epoch: 146 training_loss 2.225992385149002 test_loss: 2.4711820602416994
epoch: 147 training_loss 2.1875092828273774 test_loss: 2.2094165802001955
epoch: 148 training_loss 2.276998828649521 test_loss: 2.2646650314331054
epoch: 149 training_loss 2.2130974912643433 test_loss: 2.248811721801758
3041.505238693836
episode: 0 training return: tensor(36.4005, device='cuda:0')
episode: 1 training return: tensor(319.3276, device='cuda:0')
episode: 2 training return: tensor(364.4986, device='cuda:0')
episode: 3 training return: tensor(355.9326, device='cuda:0')
epoch: 1 test_true_pfm: 2081.9399410779033 sim_pfm: 64.5497899086331
episode: 4 training return: tensor(334.4874, device='cuda:0')
episode: 5 training return: tensor(330.7782, device='cuda:0')
episode: 6 training return: tensor(-1.7723, device='cuda:0')
episode: 7 training return: tensor(267.9478, device='cuda:0')
epoch: 2 test_true_pfm: 2769.9428916954016 sim_pfm: 173.21640242711874
episode: 8 training return: tensor(338.8091, device='cuda:0')
episode: 9 training return: tensor(321.7643, device='cuda:0')
episode: 10 training return: tensor(288.2921, device='cuda:0')
episode: 11 training return: tensor(348.5844, device='cuda:0')
epoch: 3 test_true_pfm: 3096.2093808943555 sim_pfm: 257.0472655654982
episode: 12 training return: tensor(270.8322, device='cuda:0')
episode: 13 training return: tensor(146.9210, device='cuda:0')
episode: 14 training return: tensor(-346.1327, device='cuda:0')
episode: 15 training return: tensor(266.8169, device='cuda:0')
epoch: 4 test_true_pfm: 3063.4427147142546 sim_pfm: 93.09879525772219
episode: 16 training return: tensor(297.6128, device='cuda:0')
episode: 17 training return: tensor(314.9697, device='cuda:0')
episode: 18 training return: tensor(328.3184, device='cuda:0')
episode: 19 training return: tensor(298.5948, device='cuda:0')
epoch: 5 test_true_pfm: 3021.185310351167 sim_pfm: 10.235073778836522
episode: 20 training return: tensor(299.8724, device='cuda:0')
episode: 21 training return: tensor(317.4094, device='cuda:0')
episode: 22 training return: tensor(340.0333, device='cuda:0')
episode: 23 training return: tensor(-44.9477, device='cuda:0')
epoch: 6 test_true_pfm: 3365.3096539455523 sim_pfm: 107.77914961838785
episode: 24 training return: tensor(-373.7347, device='cuda:0')
episode: 25 training return: tensor(114.6599, device='cuda:0')
episode: 26 training return: tensor(269.4708, device='cuda:0')
episode: 27 training return: tensor(316.1860, device='cuda:0')
epoch: 7 test_true_pfm: 3333.0682686342516 sim_pfm: 251.8830332001089
episode: 28 training return: tensor(104.6109, device='cuda:0')
episode: 29 training return: tensor(256.7463, device='cuda:0')
episode: 30 training return: tensor(-219.6616, device='cuda:0')
episode: 31 training return: tensor(-221.1067, device='cuda:0')
epoch: 8 test_true_pfm: 3307.2655874014504 sim_pfm: 113.33632667659549
episode: 32 training return: tensor(299.1583, device='cuda:0')
episode: 33 training return: tensor(280.5321, device='cuda:0')
episode: 34 training return: tensor(321.7208, device='cuda:0')
episode: 35 training return: tensor(-93.1916, device='cuda:0')
epoch: 9 test_true_pfm: 3341.9842842879516 sim_pfm: 222.06864228703003
episode: 36 training return: tensor(252.6956, device='cuda:0')
episode: 37 training return: tensor(307.8432, device='cuda:0')
episode: 38 training return: tensor(363.1254, device='cuda:0')
episode: 39 training return: tensor(83.9024, device='cuda:0')
epoch: 10 test_true_pfm: 3361.3054204627624 sim_pfm: 210.53981875020932
episode: 40 training return: tensor(263.2237, device='cuda:0')
episode: 41 training return: tensor(110.3122, device='cuda:0')
episode: 42 training return: tensor(327.2307, device='cuda:0')
episode: 43 training return: tensor(101.6569, device='cuda:0')
epoch: 11 test_true_pfm: 2485.437208949352 sim_pfm: 126.88434530782008
episode: 44 training return: tensor(183.7189, device='cuda:0')
episode: 45 training return: tensor(332.7994, device='cuda:0')
episode: 46 training return: tensor(327.1379, device='cuda:0')
episode: 47 training return: tensor(35.4977, device='cuda:0')
epoch: 12 test_true_pfm: 3058.126567794214 sim_pfm: 246.4907189202107
episode: 48 training return: tensor(341.5226, device='cuda:0')
episode: 49 training return: tensor(301.4322, device='cuda:0')
episode: 50 training return: tensor(131.0182, device='cuda:0')
episode: 51 training return: tensor(39.6623, device='cuda:0')
epoch: 13 test_true_pfm: 2628.265337519002 sim_pfm: 11.466608432335002
episode: 52 training return: tensor(369.0199, device='cuda:0')
episode: 53 training return: tensor(345.8586, device='cuda:0')
episode: 54 training return: tensor(181.4902, device='cuda:0')
episode: 55 training return: tensor(11.0384, device='cuda:0')
epoch: 14 test_true_pfm: 3345.6621011196808 sim_pfm: 214.6878023770599
episode: 56 training return: tensor(320.7530, device='cuda:0')
episode: 57 training return: tensor(335.2011, device='cuda:0')
episode: 58 training return: tensor(217.8813, device='cuda:0')
episode: 59 training return: tensor(281.9906, device='cuda:0')
epoch: 15 test_true_pfm: 3098.8175725278757 sim_pfm: 258.4160688558283
episode: 60 training return: tensor(-22.2857, device='cuda:0')
episode: 61 training return: tensor(274.4992, device='cuda:0')
episode: 62 training return: tensor(275.3338, device='cuda:0')
episode: 63 training return: tensor(204.7622, device='cuda:0')
epoch: 16 test_true_pfm: 3373.643857453148 sim_pfm: 324.1309775217863
episode: 64 training return: tensor(215.2182, device='cuda:0')
episode: 65 training return: tensor(290.4459, device='cuda:0')
episode: 66 training return: tensor(6.2303, device='cuda:0')
episode: 67 training return: tensor(281.9418, device='cuda:0')
epoch: 17 test_true_pfm: 2922.3709975425586 sim_pfm: 185.37948203730048
episode: 68 training return: tensor(292.6995, device='cuda:0')
episode: 69 training return: tensor(257.9381, device='cuda:0')
episode: 70 training return: tensor(73.2315, device='cuda:0')
episode: 71 training return: tensor(347.1295, device='cuda:0')
epoch: 18 test_true_pfm: 3370.2464464184263 sim_pfm: 364.3036115714931
episode: 72 training return: tensor(392.3083, device='cuda:0')
episode: 73 training return: tensor(355.2610, device='cuda:0')
episode: 74 training return: tensor(423.0557, device='cuda:0')
episode: 75 training return: tensor(356.1599, device='cuda:0')
epoch: 19 test_true_pfm: 3385.8487774521514 sim_pfm: 262.1126237272886
episode: 76 training return: tensor(334.5069, device='cuda:0')
episode: 77 training return: tensor(287.8706, device='cuda:0')
episode: 78 training return: tensor(364.1056, device='cuda:0')
episode: 79 training return: tensor(276.0951, device='cuda:0')
epoch: 20 test_true_pfm: 3440.40946703494 sim_pfm: 202.5947960469348
episode: 80 training return: tensor(367.3665, device='cuda:0')
episode: 81 training return: tensor(97.5238, device='cuda:0')
episode: 82 training return: tensor(330.7151, device='cuda:0')
episode: 83 training return: tensor(366.7021, device='cuda:0')
epoch: 21 test_true_pfm: 3351.0201113790426 sim_pfm: 353.55505890998757
episode: 84 training return: tensor(319.1919, device='cuda:0')
episode: 85 training return: tensor(342.7831, device='cuda:0')
episode: 86 training return: tensor(288.2884, device='cuda:0')
episode: 87 training return: tensor(334.7407, device='cuda:0')
epoch: 22 test_true_pfm: 3467.8924780952657 sim_pfm: 268.83094068007387
episode: 88 training return: tensor(328.7894, device='cuda:0')
episode: 89 training return: tensor(343.1084, device='cuda:0')
episode: 90 training return: tensor(323.7366, device='cuda:0')
episode: 91 training return: tensor(353.3249, device='cuda:0')
epoch: 23 test_true_pfm: 3405.663814225843 sim_pfm: 295.7441016245575
episode: 92 training return: tensor(368.6390, device='cuda:0')
episode: 93 training return: tensor(316.1110, device='cuda:0')
episode: 94 training return: tensor(324.1953, device='cuda:0')
episode: 95 training return: tensor(383.7405, device='cuda:0')
epoch: 24 test_true_pfm: 3435.7125422149234 sim_pfm: 310.96281262804405
episode: 96 training return: tensor(210.4512, device='cuda:0')
episode: 97 training return: tensor(329.4092, device='cuda:0')
episode: 98 training return: tensor(364.0278, device='cuda:0')
episode: 99 training return: tensor(371.8029, device='cuda:0')
epoch: 25 test_true_pfm: 3368.9737586740634 sim_pfm: 370.44323232372216
episode: 100 training return: tensor(383.4893, device='cuda:0')
episode: 101 training return: tensor(382.3802, device='cuda:0')
episode: 102 training return: tensor(331.0522, device='cuda:0')
episode: 103 training return: tensor(306.1384, device='cuda:0')
epoch: 26 test_true_pfm: 3451.155698501883 sim_pfm: 96.89997590577696
episode: 104 training return: tensor(393.2151, device='cuda:0')
episode: 105 training return: tensor(392.1387, device='cuda:0')
episode: 106 training return: tensor(349.8845, device='cuda:0')
episode: 107 training return: tensor(397.7068, device='cuda:0')
epoch: 27 test_true_pfm: 3439.1864938945328 sim_pfm: 94.16773172563019
episode: 108 training return: tensor(230.9056, device='cuda:0')
episode: 109 training return: tensor(-468.1820, device='cuda:0')
episode: 110 training return: tensor(360.4180, device='cuda:0')
episode: 111 training return: tensor(377.1430, device='cuda:0')
epoch: 28 test_true_pfm: 3448.1864254123525 sim_pfm: -33.35495094019765
episode: 112 training return: tensor(-168.5075, device='cuda:0')
episode: 113 training return: tensor(382.1614, device='cuda:0')
episode: 114 training return: tensor(413.6069, device='cuda:0')
episode: 115 training return: tensor(37.1545, device='cuda:0')
epoch: 29 test_true_pfm: 3431.144680064935 sim_pfm: 339.79823761151056
episode: 116 training return: tensor(343.1302, device='cuda:0')
episode: 117 training return: tensor(329.0527, device='cuda:0')
episode: 118 training return: tensor(399.4483, device='cuda:0')
episode: 119 training return: tensor(414.7723, device='cuda:0')
epoch: 30 test_true_pfm: 3436.5578996843337 sim_pfm: 312.84350755115156
episode: 120 training return: tensor(381.2366, device='cuda:0')
episode: 121 training return: tensor(274.2719, device='cuda:0')
episode: 122 training return: tensor(369.1003, device='cuda:0')
episode: 123 training return: tensor(259.1714, device='cuda:0')
epoch: 31 test_true_pfm: 3419.513721998704 sim_pfm: 320.9840067212256
episode: 124 training return: tensor(393.6110, device='cuda:0')
episode: 125 training return: tensor(362.9208, device='cuda:0')
episode: 126 training return: tensor(321.6979, device='cuda:0')
episode: 127 training return: tensor(367.2711, device='cuda:0')
epoch: 32 test_true_pfm: 3401.1319453513274 sim_pfm: 382.7855666487109
episode: 128 training return: tensor(136.8815, device='cuda:0')
episode: 129 training return: tensor(318.8353, device='cuda:0')
episode: 130 training return: tensor(299.0222, device='cuda:0')
episode: 131 training return: tensor(337.6974, device='cuda:0')
epoch: 33 test_true_pfm: 3437.977026693678 sim_pfm: 366.0468278552871
episode: 132 training return: tensor(337.4747, device='cuda:0')
episode: 133 training return: tensor(381.8723, device='cuda:0')
episode: 134 training return: tensor(79.5779, device='cuda:0')
episode: 135 training return: tensor(422.8642, device='cuda:0')
epoch: 34 test_true_pfm: 3275.446867981882 sim_pfm: 394.51327833115164
episode: 136 training return: tensor(363.0340, device='cuda:0')
episode: 137 training return: tensor(284.0728, device='cuda:0')
episode: 138 training return: tensor(407.1010, device='cuda:0')
episode: 139 training return: tensor(386.9073, device='cuda:0')
epoch: 35 test_true_pfm: 3451.996709680537 sim_pfm: 239.924027385811
episode: 140 training return: tensor(-173.3718, device='cuda:0')
episode: 141 training return: tensor(326.1762, device='cuda:0')
episode: 142 training return: tensor(214.7540, device='cuda:0')
episode: 143 training return: tensor(232.7718, device='cuda:0')
epoch: 36 test_true_pfm: 3387.385151877059 sim_pfm: 319.3151032250219
episode: 144 training return: tensor(339.2965, device='cuda:0')
episode: 145 training return: tensor(334.8122, device='cuda:0')
episode: 146 training return: tensor(330.1198, device='cuda:0')
episode: 147 training return: tensor(348.4720, device='cuda:0')
epoch: 37 test_true_pfm: 3494.072723454245 sim_pfm: 382.68068528789445
episode: 148 training return: tensor(347.2389, device='cuda:0')
episode: 149 training return: tensor(326.6302, device='cuda:0')
episode: 150 training return: tensor(378.9647, device='cuda:0')
episode: 151 training return: tensor(379.9536, device='cuda:0')
epoch: 38 test_true_pfm: 3345.4859979089397 sim_pfm: 280.203931312232
episode: 152 training return: tensor(395.5882, device='cuda:0')
episode: 153 training return: tensor(411.8810, device='cuda:0')
episode: 154 training return: tensor(302.2634, device='cuda:0')
episode: 155 training return: tensor(403.7784, device='cuda:0')
epoch: 39 test_true_pfm: 3457.162495715695 sim_pfm: 398.4144085076211
episode: 156 training return: tensor(384.7305, device='cuda:0')
episode: 157 training return: tensor(269.8025, device='cuda:0')
episode: 158 training return: tensor(342.1024, device='cuda:0')
episode: 159 training return: tensor(383.6779, device='cuda:0')
epoch: 40 test_true_pfm: 3172.093940757 sim_pfm: 378.9815361846607
episode: 160 training return: tensor(387.1461, device='cuda:0')
episode: 161 training return: tensor(355.5186, device='cuda:0')
episode: 162 training return: tensor(376.0642, device='cuda:0')
episode: 163 training return: tensor(372.8387, device='cuda:0')
epoch: 41 test_true_pfm: 3455.829341563554 sim_pfm: 394.7301352582678
episode: 164 training return: tensor(241.2297, device='cuda:0')
episode: 165 training return: tensor(288.0870, device='cuda:0')
episode: 166 training return: tensor(236.0711, device='cuda:0')
episode: 167 training return: tensor(274.5551, device='cuda:0')
epoch: 42 test_true_pfm: 3507.4757401157985 sim_pfm: 409.46682133318
episode: 168 training return: tensor(369.5991, device='cuda:0')
episode: 169 training return: tensor(335.6054, device='cuda:0')
episode: 170 training return: tensor(388.5956, device='cuda:0')
episode: 171 training return: tensor(84.6790, device='cuda:0')
epoch: 43 test_true_pfm: 3411.893286273856 sim_pfm: 427.29209792163846
episode: 172 training return: tensor(383.3065, device='cuda:0')
episode: 173 training return: tensor(396.1493, device='cuda:0')
episode: 174 training return: tensor(359.9721, device='cuda:0')
episode: 175 training return: tensor(362.7086, device='cuda:0')
epoch: 44 test_true_pfm: 3468.179181735348 sim_pfm: 417.26617047083954
episode: 176 training return: tensor(355.2100, device='cuda:0')
episode: 177 training return: tensor(395.8181, device='cuda:0')
episode: 178 training return: tensor(419.2874, device='cuda:0')
episode: 179 training return: tensor(395.5867, device='cuda:0')
epoch: 45 test_true_pfm: 3537.4724680400636 sim_pfm: 400.0033104813483
episode: 180 training return: tensor(358.4279, device='cuda:0')
episode: 181 training return: tensor(330.1725, device='cuda:0')
episode: 182 training return: tensor(362.6129, device='cuda:0')
episode: 183 training return: tensor(364.4549, device='cuda:0')
epoch: 46 test_true_pfm: 3500.085973182326 sim_pfm: 395.6863306342663
episode: 184 training return: tensor(31.9940, device='cuda:0')
episode: 185 training return: tensor(385.2633, device='cuda:0')
episode: 186 training return: tensor(342.6048, device='cuda:0')
episode: 187 training return: tensor(360.2388, device='cuda:0')
epoch: 47 test_true_pfm: 3466.6265493741143 sim_pfm: 422.51209505447576
episode: 188 training return: tensor(440.0774, device='cuda:0')
episode: 189 training return: tensor(332.5927, device='cuda:0')
episode: 190 training return: tensor(419.6700, device='cuda:0')
episode: 191 training return: tensor(403.3550, device='cuda:0')
epoch: 48 test_true_pfm: 3517.1161844614085 sim_pfm: 178.37548311527158
episode: 192 training return: tensor(462.3178, device='cuda:0')
episode: 193 training return: tensor(332.0379, device='cuda:0')
episode: 194 training return: tensor(191.5242, device='cuda:0')
episode: 195 training return: tensor(291.4168, device='cuda:0')
epoch: 49 test_true_pfm: 3602.840552244203 sim_pfm: 451.61120015990065
episode: 196 training return: tensor(350.1049, device='cuda:0')
episode: 197 training return: tensor(411.1768, device='cuda:0')
episode: 198 training return: tensor(409.9533, device='cuda:0')
episode: 199 training return: tensor(437.4583, device='cuda:0')
epoch: 50 test_true_pfm: 3524.469314037084 sim_pfm: 427.0271314624503
episode: 200 training return: tensor(404.0331, device='cuda:0')
episode: 201 training return: tensor(349.9054, device='cuda:0')
episode: 202 training return: tensor(273.8547, device='cuda:0')
episode: 203 training return: tensor(361.7879, device='cuda:0')
epoch: 51 test_true_pfm: 3493.4775258665945 sim_pfm: 415.09351585805416
episode: 204 training return: tensor(434.1201, device='cuda:0')
episode: 205 training return: tensor(392.7360, device='cuda:0')
episode: 206 training return: tensor(416.0852, device='cuda:0')
episode: 207 training return: tensor(367.5522, device='cuda:0')
epoch: 52 test_true_pfm: 3426.9604866221157 sim_pfm: 360.84678480676183
episode: 208 training return: tensor(343.3029, device='cuda:0')
episode: 209 training return: tensor(405.8636, device='cuda:0')
episode: 210 training return: tensor(344.2326, device='cuda:0')
episode: 211 training return: tensor(381.6593, device='cuda:0')
epoch: 53 test_true_pfm: 3436.955047526364 sim_pfm: 386.33535893032484
episode: 212 training return: tensor(379.8811, device='cuda:0')
episode: 213 training return: tensor(380.5456, device='cuda:0')
episode: 214 training return: tensor(347.2952, device='cuda:0')
episode: 215 training return: tensor(345.4689, device='cuda:0')
epoch: 54 test_true_pfm: 3461.9673100625223 sim_pfm: 426.4373733139946
episode: 216 training return: tensor(395.4316, device='cuda:0')
episode: 217 training return: tensor(355.6445, device='cuda:0')
episode: 218 training return: tensor(373.0835, device='cuda:0')
episode: 219 training return: tensor(380.9788, device='cuda:0')
epoch: 55 test_true_pfm: 3419.5665799483845 sim_pfm: 175.95856273411968
episode: 220 training return: tensor(390.3303, device='cuda:0')
episode: 221 training return: tensor(416.2341, device='cuda:0')
episode: 222 training return: tensor(366.8405, device='cuda:0')
episode: 223 training return: tensor(60.2504, device='cuda:0')
epoch: 56 test_true_pfm: 3499.9780526401464 sim_pfm: 417.7437975985813
episode: 224 training return: tensor(352.9844, device='cuda:0')
episode: 225 training return: tensor(422.4748, device='cuda:0')
episode: 226 training return: tensor(378.8024, device='cuda:0')
episode: 227 training return: tensor(349.4833, device='cuda:0')
epoch: 57 test_true_pfm: 3465.457633916041 sim_pfm: 331.3671369269723
episode: 228 training return: tensor(393.3818, device='cuda:0')
episode: 229 training return: tensor(377.0344, device='cuda:0')
episode: 230 training return: tensor(384.0471, device='cuda:0')
episode: 231 training return: tensor(372.1231, device='cuda:0')
epoch: 58 test_true_pfm: 3494.9873389536083 sim_pfm: 375.759239304966
episode: 232 training return: tensor(399.2935, device='cuda:0')
episode: 233 training return: tensor(277.8093, device='cuda:0')
episode: 234 training return: tensor(350.1622, device='cuda:0')
episode: 235 training return: tensor(475.1830, device='cuda:0')
epoch: 59 test_true_pfm: 3526.0290106110465 sim_pfm: 430.23428041584947
episode: 236 training return: tensor(347.4543, device='cuda:0')
episode: 237 training return: tensor(368.6223, device='cuda:0')
episode: 238 training return: tensor(295.3872, device='cuda:0')
episode: 239 training return: tensor(402.8266, device='cuda:0')
epoch: 60 test_true_pfm: 3454.21114763821 sim_pfm: 407.35314791586524
episode: 240 training return: tensor(400.3340, device='cuda:0')
episode: 241 training return: tensor(389.3265, device='cuda:0')
episode: 242 training return: tensor(437.0698, device='cuda:0')
episode: 243 training return: tensor(399.6179, device='cuda:0')
epoch: 61 test_true_pfm: 3496.8663784616824 sim_pfm: 414.4946053704286
episode: 244 training return: tensor(367.3270, device='cuda:0')
episode: 245 training return: tensor(377.6277, device='cuda:0')
episode: 246 training return: tensor(415.8276, device='cuda:0')
episode: 247 training return: tensor(423.8580, device='cuda:0')
epoch: 62 test_true_pfm: 3538.416213226294 sim_pfm: 80.53561151063575
episode: 248 training return: tensor(435.3780, device='cuda:0')
episode: 249 training return: tensor(300.9232, device='cuda:0')
episode: 250 training return: tensor(335.1147, device='cuda:0')
episode: 251 training return: tensor(389.9076, device='cuda:0')
epoch: 63 test_true_pfm: 3512.897927190816 sim_pfm: 429.2971804670524
episode: 252 training return: tensor(226.8353, device='cuda:0')
episode: 253 training return: tensor(371.3716, device='cuda:0')
episode: 254 training return: tensor(398.3568, device='cuda:0')
episode: 255 training return: tensor(429.9304, device='cuda:0')
epoch: 64 test_true_pfm: 3539.0048036423163 sim_pfm: 390.9293705270393
episode: 256 training return: tensor(307.7243, device='cuda:0')
episode: 257 training return: tensor(342.0587, device='cuda:0')
episode: 258 training return: tensor(321.9020, device='cuda:0')
episode: 259 training return: tensor(379.4708, device='cuda:0')
epoch: 65 test_true_pfm: 3513.425542415131 sim_pfm: 431.6056426798071
episode: 260 training return: tensor(406.8672, device='cuda:0')
episode: 261 training return: tensor(405.2844, device='cuda:0')
episode: 262 training return: tensor(374.3146, device='cuda:0')
episode: 263 training return: tensor(430.8334, device='cuda:0')
epoch: 66 test_true_pfm: 3485.8963933093332 sim_pfm: 387.9307391491214
episode: 264 training return: tensor(380.3416, device='cuda:0')
episode: 265 training return: tensor(341.0358, device='cuda:0')
episode: 266 training return: tensor(440.1670, device='cuda:0')
episode: 267 training return: tensor(422.5715, device='cuda:0')
epoch: 67 test_true_pfm: 3512.314382651863 sim_pfm: 434.1907652033066
episode: 268 training return: tensor(403.9789, device='cuda:0')
episode: 269 training return: tensor(280.9092, device='cuda:0')
episode: 270 training return: tensor(393.5941, device='cuda:0')
episode: 271 training return: tensor(374.8461, device='cuda:0')
epoch: 68 test_true_pfm: 3480.0222950342863 sim_pfm: 393.0545663899781
episode: 272 training return: tensor(454.9925, device='cuda:0')
episode: 273 training return: tensor(430.4815, device='cuda:0')
episode: 274 training return: tensor(292.4315, device='cuda:0')
episode: 275 training return: tensor(342.6943, device='cuda:0')
epoch: 69 test_true_pfm: 3469.4050741735587 sim_pfm: 376.1526838437033
episode: 276 training return: tensor(355.5151, device='cuda:0')
episode: 277 training return: tensor(392.4610, device='cuda:0')
episode: 278 training return: tensor(380.6905, device='cuda:0')
episode: 279 training return: tensor(421.9619, device='cuda:0')
epoch: 70 test_true_pfm: 3506.4737343731117 sim_pfm: 390.7191764404609
episode: 280 training return: tensor(326.2297, device='cuda:0')
episode: 281 training return: tensor(358.2088, device='cuda:0')
episode: 282 training return: tensor(358.8111, device='cuda:0')
episode: 283 training return: tensor(407.9189, device='cuda:0')
epoch: 71 test_true_pfm: 3473.3390194104936 sim_pfm: 386.1538160245594
episode: 284 training return: tensor(398.1783, device='cuda:0')
episode: 285 training return: tensor(378.4944, device='cuda:0')
episode: 286 training return: tensor(374.5494, device='cuda:0')
episode: 287 training return: tensor(426.1937, device='cuda:0')
epoch: 72 test_true_pfm: 3482.6479757407965 sim_pfm: 452.60880813876673
episode: 288 training return: tensor(400.5862, device='cuda:0')
episode: 289 training return: tensor(369.3308, device='cuda:0')
episode: 290 training return: tensor(269.5217, device='cuda:0')
episode: 291 training return: tensor(416.5009, device='cuda:0')
epoch: 73 test_true_pfm: 3463.990340301131 sim_pfm: 432.68579441093607
episode: 292 training return: tensor(348.6062, device='cuda:0')
episode: 293 training return: tensor(420.7927, device='cuda:0')
episode: 294 training return: tensor(389.1850, device='cuda:0')
episode: 295 training return: tensor(355.9600, device='cuda:0')
epoch: 74 test_true_pfm: 3512.939925914201 sim_pfm: 418.0175245050438
episode: 296 training return: tensor(408.1942, device='cuda:0')
episode: 297 training return: tensor(250.3803, device='cuda:0')
episode: 298 training return: tensor(352.9509, device='cuda:0')
episode: 299 training return: tensor(395.8826, device='cuda:0')
epoch: 75 test_true_pfm: 3446.5040994429055 sim_pfm: 396.68632206091814
episode: 300 training return: tensor(428.2240, device='cuda:0')
episode: 301 training return: tensor(386.0378, device='cuda:0')
episode: 302 training return: tensor(340.6335, device='cuda:0')
episode: 303 training return: tensor(385.9696, device='cuda:0')
epoch: 76 test_true_pfm: 3505.652461784593 sim_pfm: 401.9992430281806
episode: 304 training return: tensor(392.4206, device='cuda:0')
episode: 305 training return: tensor(380.0860, device='cuda:0')
episode: 306 training return: tensor(400.9591, device='cuda:0')
episode: 307 training return: tensor(154.9581, device='cuda:0')
epoch: 77 test_true_pfm: 3503.9567977823003 sim_pfm: 431.85990260593826
episode: 308 training return: tensor(373.2507, device='cuda:0')
episode: 309 training return: tensor(382.8904, device='cuda:0')
episode: 310 training return: tensor(432.8089, device='cuda:0')
episode: 311 training return: tensor(356.7473, device='cuda:0')
epoch: 78 test_true_pfm: 3499.422898801675 sim_pfm: 413.62927065432694
episode: 312 training return: tensor(399.9163, device='cuda:0')
episode: 313 training return: tensor(393.9373, device='cuda:0')
episode: 314 training return: tensor(396.7553, device='cuda:0')
episode: 315 training return: tensor(428.0625, device='cuda:0')
epoch: 79 test_true_pfm: 3472.8235006871932 sim_pfm: 383.31187415736105
episode: 316 training return: tensor(433.9155, device='cuda:0')
episode: 317 training return: tensor(370.9744, device='cuda:0')
episode: 318 training return: tensor(327.9338, device='cuda:0')
episode: 319 training return: tensor(358.6979, device='cuda:0')
epoch: 80 test_true_pfm: 3504.5587987622744 sim_pfm: 414.9566749295821
episode: 320 training return: tensor(289.1935, device='cuda:0')
episode: 321 training return: tensor(423.9509, device='cuda:0')
episode: 322 training return: tensor(418.3505, device='cuda:0')
episode: 323 training return: tensor(382.1275, device='cuda:0')
epoch: 81 test_true_pfm: 3528.9492267421824 sim_pfm: 407.30062834990287
episode: 324 training return: tensor(381.7000, device='cuda:0')
episode: 325 training return: tensor(384.2110, device='cuda:0')
episode: 326 training return: tensor(434.2644, device='cuda:0')
episode: 327 training return: tensor(448.4265, device='cuda:0')
epoch: 82 test_true_pfm: 3483.037061419885 sim_pfm: 410.7633735008033
episode: 328 training return: tensor(396.5374, device='cuda:0')
episode: 329 training return: tensor(393.0769, device='cuda:0')
episode: 330 training return: tensor(324.8720, device='cuda:0')
episode: 331 training return: tensor(376.7094, device='cuda:0')
epoch: 83 test_true_pfm: 3484.939675147461 sim_pfm: 409.49852970302646
episode: 332 training return: tensor(370.3349, device='cuda:0')
episode: 333 training return: tensor(413.9176, device='cuda:0')
episode: 334 training return: tensor(398.8607, device='cuda:0')
episode: 335 training return: tensor(419.1063, device='cuda:0')
epoch: 84 test_true_pfm: 3519.1221581301547 sim_pfm: 422.6304243085421
episode: 336 training return: tensor(393.9388, device='cuda:0')
episode: 337 training return: tensor(375.6672, device='cuda:0')
episode: 338 training return: tensor(385.1401, device='cuda:0')
episode: 339 training return: tensor(387.4391, device='cuda:0')
epoch: 85 test_true_pfm: 3554.0764366567055 sim_pfm: 413.3780600174796
episode: 340 training return: tensor(373.8196, device='cuda:0')
episode: 341 training return: tensor(320.3712, device='cuda:0')
episode: 342 training return: tensor(340.6469, device='cuda:0')
episode: 343 training return: tensor(401.8042, device='cuda:0')
epoch: 86 test_true_pfm: 3434.30991635584 sim_pfm: 384.69348041558015
episode: 344 training return: tensor(356.3640, device='cuda:0')
episode: 345 training return: tensor(362.4818, device='cuda:0')
episode: 346 training return: tensor(362.5130, device='cuda:0')
episode: 347 training return: tensor(394.9359, device='cuda:0')
epoch: 87 test_true_pfm: 3430.654123947424 sim_pfm: 385.0743764764241
episode: 348 training return: tensor(378.4343, device='cuda:0')
episode: 349 training return: tensor(324.8969, device='cuda:0')
episode: 350 training return: tensor(357.7268, device='cuda:0')
episode: 351 training return: tensor(394.9348, device='cuda:0')
epoch: 88 test_true_pfm: 3548.4124631351115 sim_pfm: 414.2370914207616
episode: 352 training return: tensor(369.7495, device='cuda:0')
episode: 353 training return: tensor(383.8017, device='cuda:0')
episode: 354 training return: tensor(388.3125, device='cuda:0')
episode: 355 training return: tensor(315.6864, device='cuda:0')
epoch: 89 test_true_pfm: 3501.413024185224 sim_pfm: 450.26200470425346
episode: 356 training return: tensor(443.8169, device='cuda:0')
episode: 357 training return: tensor(404.7959, device='cuda:0')
episode: 358 training return: tensor(398.4252, device='cuda:0')
episode: 359 training return: tensor(361.2174, device='cuda:0')
epoch: 90 test_true_pfm: 3501.7659021063605 sim_pfm: 400.13318943057675
episode: 360 training return: tensor(374.9720, device='cuda:0')
episode: 361 training return: tensor(388.9571, device='cuda:0')
episode: 362 training return: tensor(396.2499, device='cuda:0')
episode: 363 training return: tensor(382.5778, device='cuda:0')
epoch: 91 test_true_pfm: 3514.3968037813443 sim_pfm: 400.00927667863044
episode: 364 training return: tensor(379.5375, device='cuda:0')
episode: 365 training return: tensor(220.8681, device='cuda:0')
episode: 366 training return: tensor(362.3414, device='cuda:0')
episode: 367 training return: tensor(404.3874, device='cuda:0')
epoch: 92 test_true_pfm: 3475.6076695417228 sim_pfm: 326.56496966031654
episode: 368 training return: tensor(417.3108, device='cuda:0')
episode: 369 training return: tensor(428.4070, device='cuda:0')
episode: 370 training return: tensor(414.1405, device='cuda:0')
episode: 371 training return: tensor(332.6544, device='cuda:0')
epoch: 93 test_true_pfm: 3348.0749743125143 sim_pfm: 357.8654693653031
episode: 372 training return: tensor(407.3531, device='cuda:0')
episode: 373 training return: tensor(412.0877, device='cuda:0')
episode: 374 training return: tensor(369.2740, device='cuda:0')
episode: 375 training return: tensor(405.6194, device='cuda:0')
epoch: 94 test_true_pfm: 3438.389965796008 sim_pfm: 400.8517893291525
episode: 376 training return: tensor(403.3087, device='cuda:0')
episode: 377 training return: tensor(377.6926, device='cuda:0')
episode: 378 training return: tensor(422.1075, device='cuda:0')
episode: 379 training return: tensor(398.3085, device='cuda:0')
epoch: 95 test_true_pfm: 3481.233154711521 sim_pfm: 435.86883051081287
episode: 380 training return: tensor(355.5383, device='cuda:0')
episode: 381 training return: tensor(279.0134, device='cuda:0')
episode: 382 training return: tensor(450.6024, device='cuda:0')
episode: 383 training return: tensor(410.6682, device='cuda:0')
epoch: 96 test_true_pfm: 3480.8463979317767 sim_pfm: 365.2126939134226
episode: 384 training return: tensor(395.5824, device='cuda:0')
episode: 385 training return: tensor(451.8229, device='cuda:0')
episode: 386 training return: tensor(453.7516, device='cuda:0')
episode: 387 training return: tensor(376.3958, device='cuda:0')
epoch: 97 test_true_pfm: 3502.467457962465 sim_pfm: 437.275984440969
episode: 388 training return: tensor(406.5992, device='cuda:0')
episode: 389 training return: tensor(402.6235, device='cuda:0')
episode: 390 training return: tensor(390.5832, device='cuda:0')
episode: 391 training return: tensor(371.8212, device='cuda:0')
epoch: 98 test_true_pfm: 3455.085782957614 sim_pfm: 387.5255367476978
episode: 392 training return: tensor(440.9860, device='cuda:0')
episode: 393 training return: tensor(356.5320, device='cuda:0')
episode: 394 training return: tensor(442.2494, device='cuda:0')
episode: 395 training return: tensor(378.0451, device='cuda:0')
epoch: 99 test_true_pfm: 3475.32652112439 sim_pfm: 415.0512978696982
episode: 396 training return: tensor(401.6537, device='cuda:0')
episode: 397 training return: tensor(438.8224, device='cuda:0')
episode: 398 training return: tensor(389.0882, device='cuda:0')
episode: 399 training return: tensor(267.0003, device='cuda:0')
epoch: 100 test_true_pfm: 3480.556102602133 sim_pfm: 407.5617390759289
episode: 400 training return: tensor(377.9536, device='cuda:0')
episode: 401 training return: tensor(373.1327, device='cuda:0')
episode: 402 training return: tensor(324.7529, device='cuda:0')
episode: 403 training return: tensor(396.5725, device='cuda:0')
epoch: 101 test_true_pfm: 3520.6679415078866 sim_pfm: 418.0891129489755
episode: 404 training return: tensor(378.9402, device='cuda:0')
episode: 405 training return: tensor(362.7180, device='cuda:0')
episode: 406 training return: tensor(359.2306, device='cuda:0')
episode: 407 training return: tensor(394.3416, device='cuda:0')
epoch: 102 test_true_pfm: 3499.1320351003505 sim_pfm: 436.40744572634384
episode: 408 training return: tensor(363.7082, device='cuda:0')
episode: 409 training return: tensor(358.0002, device='cuda:0')
episode: 410 training return: tensor(411.6335, device='cuda:0')
episode: 411 training return: tensor(359.3465, device='cuda:0')
epoch: 103 test_true_pfm: 3483.3653134761153 sim_pfm: 420.85366042043705
episode: 412 training return: tensor(393.5642, device='cuda:0')
episode: 413 training return: tensor(193.3177, device='cuda:0')
episode: 414 training return: tensor(412.3227, device='cuda:0')
episode: 415 training return: tensor(348.2729, device='cuda:0')
epoch: 104 test_true_pfm: 3475.683876428946 sim_pfm: 432.2593718293744
episode: 416 training return: tensor(418.2918, device='cuda:0')
episode: 417 training return: tensor(395.0992, device='cuda:0')
episode: 418 training return: tensor(397.2350, device='cuda:0')
episode: 419 training return: tensor(345.3333, device='cuda:0')
epoch: 105 test_true_pfm: 3525.7713430077547 sim_pfm: 385.90442074586946
episode: 420 training return: tensor(386.2755, device='cuda:0')
episode: 421 training return: tensor(425.8116, device='cuda:0')
episode: 422 training return: tensor(335.6506, device='cuda:0')
episode: 423 training return: tensor(358.4458, device='cuda:0')
epoch: 106 test_true_pfm: 3491.87608268825 sim_pfm: 430.2388293084902
episode: 424 training return: tensor(338.4251, device='cuda:0')
episode: 425 training return: tensor(365.8275, device='cuda:0')
episode: 426 training return: tensor(471.8322, device='cuda:0')
episode: 427 training return: tensor(390.8962, device='cuda:0')
epoch: 107 test_true_pfm: 2871.591174976645 sim_pfm: 444.2835289098148
episode: 428 training return: tensor(379.7059, device='cuda:0')
episode: 429 training return: tensor(387.2544, device='cuda:0')
episode: 430 training return: tensor(385.8515, device='cuda:0')
episode: 431 training return: tensor(382.5062, device='cuda:0')
epoch: 108 test_true_pfm: 3472.0000081416765 sim_pfm: 436.8191412109106
episode: 432 training return: tensor(302.4968, device='cuda:0')
episode: 433 training return: tensor(368.4378, device='cuda:0')
episode: 434 training return: tensor(322.3488, device='cuda:0')
episode: 435 training return: tensor(388.4797, device='cuda:0')
epoch: 109 test_true_pfm: 3523.7838467451343 sim_pfm: 414.8202429965022
episode: 436 training return: tensor(389.0676, device='cuda:0')
episode: 437 training return: tensor(459.7038, device='cuda:0')
episode: 438 training return: tensor(369.0097, device='cuda:0')
episode: 439 training return: tensor(426.7760, device='cuda:0')
epoch: 110 test_true_pfm: 3502.472067153956 sim_pfm: 452.1841176667949
episode: 440 training return: tensor(430.7639, device='cuda:0')
episode: 441 training return: tensor(432.6305, device='cuda:0')
episode: 442 training return: tensor(359.3997, device='cuda:0')
episode: 443 training return: tensor(414.7603, device='cuda:0')
epoch: 111 test_true_pfm: 3530.0223391416253 sim_pfm: 438.1371724558218
episode: 444 training return: tensor(409.6144, device='cuda:0')
episode: 445 training return: tensor(360.0406, device='cuda:0')
episode: 446 training return: tensor(358.1932, device='cuda:0')
episode: 447 training return: tensor(376.6839, device='cuda:0')
epoch: 112 test_true_pfm: 3518.6637665099875 sim_pfm: 422.52030774686136
episode: 448 training return: tensor(451.4532, device='cuda:0')
episode: 449 training return: tensor(393.7610, device='cuda:0')
episode: 450 training return: tensor(319.1144, device='cuda:0')
episode: 451 training return: tensor(305.2354, device='cuda:0')
epoch: 113 test_true_pfm: 3559.5560202622382 sim_pfm: 432.1545253570851
episode: 452 training return: tensor(378.2454, device='cuda:0')
episode: 453 training return: tensor(345.7470, device='cuda:0')
episode: 454 training return: tensor(358.5190, device='cuda:0')
episode: 455 training return: tensor(378.8062, device='cuda:0')
epoch: 114 test_true_pfm: 3458.347668463925 sim_pfm: 436.400698555226
episode: 456 training return: tensor(403.6632, device='cuda:0')
episode: 457 training return: tensor(409.1798, device='cuda:0')
episode: 458 training return: tensor(399.2303, device='cuda:0')
episode: 459 training return: tensor(340.2446, device='cuda:0')
epoch: 115 test_true_pfm: 3511.310687375281 sim_pfm: 403.70546126940945
episode: 460 training return: tensor(414.6794, device='cuda:0')
episode: 461 training return: tensor(184.9554, device='cuda:0')
episode: 462 training return: tensor(406.6674, device='cuda:0')
episode: 463 training return: tensor(355.8655, device='cuda:0')
epoch: 116 test_true_pfm: 3474.440654355469 sim_pfm: 407.02546820028994
episode: 464 training return: tensor(379.5150, device='cuda:0')
episode: 465 training return: tensor(390.9396, device='cuda:0')
episode: 466 training return: tensor(337.0794, device='cuda:0')
episode: 467 training return: tensor(418.5399, device='cuda:0')
epoch: 117 test_true_pfm: 3514.6491045581315 sim_pfm: 413.146268325819
episode: 468 training return: tensor(386.4508, device='cuda:0')
episode: 469 training return: tensor(375.0799, device='cuda:0')
episode: 470 training return: tensor(352.7531, device='cuda:0')
episode: 471 training return: tensor(401.0786, device='cuda:0')
epoch: 118 test_true_pfm: 3544.446867848564 sim_pfm: 417.1563983413119
episode: 472 training return: tensor(399.7398, device='cuda:0')
episode: 473 training return: tensor(407.4023, device='cuda:0')
episode: 474 training return: tensor(323.3219, device='cuda:0')
episode: 475 training return: tensor(394.6562, device='cuda:0')
epoch: 119 test_true_pfm: 3469.861013656098 sim_pfm: 448.9456240299526
episode: 476 training return: tensor(431.9131, device='cuda:0')
episode: 477 training return: tensor(400.0847, device='cuda:0')
episode: 478 training return: tensor(337.3182, device='cuda:0')
episode: 479 training return: tensor(265.0026, device='cuda:0')
epoch: 120 test_true_pfm: 3482.755624427032 sim_pfm: 432.96687974978704
episode: 480 training return: tensor(336.7720, device='cuda:0')
episode: 481 training return: tensor(429.3878, device='cuda:0')
episode: 482 training return: tensor(415.6559, device='cuda:0')
episode: 483 training return: tensor(404.3940, device='cuda:0')
epoch: 121 test_true_pfm: 3557.1671969491285 sim_pfm: 447.98225100695464
episode: 484 training return: tensor(348.8365, device='cuda:0')
episode: 485 training return: tensor(409.2336, device='cuda:0')
episode: 486 training return: tensor(358.0427, device='cuda:0')
episode: 487 training return: tensor(329.4904, device='cuda:0')
epoch: 122 test_true_pfm: 3533.153910141696 sim_pfm: 442.6608836266775
episode: 488 training return: tensor(402.6913, device='cuda:0')
episode: 489 training return: tensor(373.4198, device='cuda:0')
episode: 490 training return: tensor(351.5013, device='cuda:0')
episode: 491 training return: tensor(329.4340, device='cuda:0')
epoch: 123 test_true_pfm: 3542.3794472426757 sim_pfm: 418.7978114549187
episode: 492 training return: tensor(420.0145, device='cuda:0')
episode: 493 training return: tensor(164.0252, device='cuda:0')
episode: 494 training return: tensor(388.0121, device='cuda:0')
episode: 495 training return: tensor(395.1856, device='cuda:0')
epoch: 124 test_true_pfm: 3511.157512157914 sim_pfm: 431.9743410331236
episode: 496 training return: tensor(406.5304, device='cuda:0')
episode: 497 training return: tensor(366.2311, device='cuda:0')
episode: 498 training return: tensor(432.2214, device='cuda:0')
episode: 499 training return: tensor(364.7416, device='cuda:0')
epoch: 125 test_true_pfm: 3537.761828071692 sim_pfm: 463.7510556995597
episode: 500 training return: tensor(380.8567, device='cuda:0')
episode: 501 training return: tensor(347.6749, device='cuda:0')
episode: 502 training return: tensor(427.9383, device='cuda:0')
episode: 503 training return: tensor(340.8016, device='cuda:0')
epoch: 126 test_true_pfm: 3521.028841338992 sim_pfm: 407.31193868709187
episode: 504 training return: tensor(397.8167, device='cuda:0')
episode: 505 training return: tensor(407.2182, device='cuda:0')
episode: 506 training return: tensor(307.1068, device='cuda:0')
episode: 507 training return: tensor(394.4251, device='cuda:0')
epoch: 127 test_true_pfm: 3561.1114585814444 sim_pfm: 469.53010528336745
episode: 508 training return: tensor(394.1035, device='cuda:0')
episode: 509 training return: tensor(412.4498, device='cuda:0')
episode: 510 training return: tensor(428.5379, device='cuda:0')
episode: 511 training return: tensor(471.9273, device='cuda:0')
epoch: 128 test_true_pfm: 3499.0504339868953 sim_pfm: 441.8251063344069
episode: 512 training return: tensor(373.5026, device='cuda:0')
episode: 513 training return: tensor(387.6208, device='cuda:0')
episode: 514 training return: tensor(399.7402, device='cuda:0')
episode: 515 training return: tensor(388.6303, device='cuda:0')
epoch: 129 test_true_pfm: 3481.7523066321387 sim_pfm: 473.9083998357916
episode: 516 training return: tensor(408.6211, device='cuda:0')
episode: 517 training return: tensor(452.0110, device='cuda:0')
episode: 518 training return: tensor(398.7921, device='cuda:0')
episode: 519 training return: tensor(378.0565, device='cuda:0')
epoch: 130 test_true_pfm: 3533.5827507130107 sim_pfm: 439.4137050274294
episode: 520 training return: tensor(439.3705, device='cuda:0')
episode: 521 training return: tensor(429.8877, device='cuda:0')
episode: 522 training return: tensor(405.4219, device='cuda:0')
episode: 523 training return: tensor(327.7874, device='cuda:0')
epoch: 131 test_true_pfm: 3599.8802796585283 sim_pfm: 428.87030819659896
episode: 524 training return: tensor(367.4685, device='cuda:0')
episode: 525 training return: tensor(372.9044, device='cuda:0')
episode: 526 training return: tensor(432.0536, device='cuda:0')
episode: 527 training return: tensor(386.8133, device='cuda:0')
epoch: 132 test_true_pfm: 3510.8463630008846 sim_pfm: 452.74969810656813
episode: 528 training return: tensor(383.3150, device='cuda:0')
episode: 529 training return: tensor(406.9829, device='cuda:0')
episode: 530 training return: tensor(432.1445, device='cuda:0')
episode: 531 training return: tensor(266.9391, device='cuda:0')
epoch: 133 test_true_pfm: 3526.642504833208 sim_pfm: 419.9083165139309
episode: 532 training return: tensor(348.7381, device='cuda:0')
episode: 533 training return: tensor(366.1239, device='cuda:0')
episode: 534 training return: tensor(380.3788, device='cuda:0')
episode: 535 training return: tensor(434.0671, device='cuda:0')
epoch: 134 test_true_pfm: 3500.2849256952527 sim_pfm: 441.38962476099067
episode: 536 training return: tensor(-433.5799, device='cuda:0')
episode: 537 training return: tensor(389.8558, device='cuda:0')
episode: 538 training return: tensor(418.5561, device='cuda:0')
episode: 539 training return: tensor(365.1191, device='cuda:0')
epoch: 135 test_true_pfm: 3487.4925885051994 sim_pfm: 425.01573797535576
episode: 540 training return: tensor(433.7450, device='cuda:0')
episode: 541 training return: tensor(446.0242, device='cuda:0')
episode: 542 training return: tensor(423.9463, device='cuda:0')
episode: 543 training return: tensor(394.8315, device='cuda:0')
epoch: 136 test_true_pfm: 3545.50576715169 sim_pfm: 422.14050199867535
episode: 544 training return: tensor(355.8914, device='cuda:0')
episode: 545 training return: tensor(423.6787, device='cuda:0')
episode: 546 training return: tensor(379.1754, device='cuda:0')
episode: 547 training return: tensor(414.6623, device='cuda:0')
epoch: 137 test_true_pfm: 3475.069929275161 sim_pfm: 424.8961058954592
episode: 548 training return: tensor(411.4097, device='cuda:0')
episode: 549 training return: tensor(431.1286, device='cuda:0')
episode: 550 training return: tensor(406.0774, device='cuda:0')
episode: 551 training return: tensor(384.2836, device='cuda:0')
epoch: 138 test_true_pfm: 3477.1121870833426 sim_pfm: 406.51512075502734
episode: 552 training return: tensor(449.2395, device='cuda:0')
episode: 553 training return: tensor(393.8617, device='cuda:0')
episode: 554 training return: tensor(320.7267, device='cuda:0')
episode: 555 training return: tensor(458.8814, device='cuda:0')
epoch: 139 test_true_pfm: 3550.5786483720635 sim_pfm: 443.1363300137843
episode: 556 training return: tensor(381.8966, device='cuda:0')
episode: 557 training return: tensor(459.0548, device='cuda:0')
episode: 558 training return: tensor(337.2932, device='cuda:0')
episode: 559 training return: tensor(406.4572, device='cuda:0')
epoch: 140 test_true_pfm: 3545.888138968711 sim_pfm: 451.6169833239983
episode: 560 training return: tensor(363.1325, device='cuda:0')
episode: 561 training return: tensor(385.3165, device='cuda:0')
episode: 562 training return: tensor(389.3362, device='cuda:0')
episode: 563 training return: tensor(435.7168, device='cuda:0')
epoch: 141 test_true_pfm: 3477.919438308881 sim_pfm: 425.0843105263775
episode: 564 training return: tensor(397.0295, device='cuda:0')
episode: 565 training return: tensor(432.5287, device='cuda:0')
episode: 566 training return: tensor(320.8531, device='cuda:0')
episode: 567 training return: tensor(467.5968, device='cuda:0')
epoch: 142 test_true_pfm: 3500.945826783869 sim_pfm: 439.35324042783276
episode: 568 training return: tensor(439.6480, device='cuda:0')
episode: 569 training return: tensor(438.7783, device='cuda:0')
episode: 570 training return: tensor(436.8398, device='cuda:0')
episode: 571 training return: tensor(312.7668, device='cuda:0')
epoch: 143 test_true_pfm: 3510.2211705077425 sim_pfm: 466.8794330151092
episode: 572 training return: tensor(396.7702, device='cuda:0')
episode: 573 training return: tensor(456.1508, device='cuda:0')
episode: 574 training return: tensor(291.4531, device='cuda:0')
episode: 575 training return: tensor(346.2034, device='cuda:0')
epoch: 144 test_true_pfm: 3519.484184697154 sim_pfm: 462.868527744043
episode: 576 training return: tensor(381.0446, device='cuda:0')
episode: 577 training return: tensor(431.2968, device='cuda:0')
episode: 578 training return: tensor(405.2372, device='cuda:0')
episode: 579 training return: tensor(419.6114, device='cuda:0')
epoch: 145 test_true_pfm: 3508.4138395201676 sim_pfm: 463.9028692845216
episode: 580 training return: tensor(178.2040, device='cuda:0')
episode: 581 training return: tensor(380.3163, device='cuda:0')
episode: 582 training return: tensor(410.6577, device='cuda:0')
episode: 583 training return: tensor(447.2377, device='cuda:0')
epoch: 146 test_true_pfm: 3526.640908078179 sim_pfm: 439.072101442415
episode: 584 training return: tensor(393.8389, device='cuda:0')
episode: 585 training return: tensor(383.6034, device='cuda:0')
episode: 586 training return: tensor(311.9612, device='cuda:0')
episode: 587 training return: tensor(368.8245, device='cuda:0')
epoch: 147 test_true_pfm: 3498.827913268565 sim_pfm: 436.6786081099417
episode: 588 training return: tensor(331.7605, device='cuda:0')
episode: 589 training return: tensor(387.1914, device='cuda:0')
episode: 590 training return: tensor(311.1969, device='cuda:0')
episode: 591 training return: tensor(447.8777, device='cuda:0')
epoch: 148 test_true_pfm: 3514.701942578078 sim_pfm: 442.4318934374023
episode: 592 training return: tensor(428.1808, device='cuda:0')
episode: 593 training return: tensor(338.0881, device='cuda:0')
episode: 594 training return: tensor(371.7032, device='cuda:0')
episode: 595 training return: tensor(397.8139, device='cuda:0')
epoch: 149 test_true_pfm: 3507.4687070087916 sim_pfm: 445.0188660102431
episode: 596 training return: tensor(414.3812, device='cuda:0')
episode: 597 training return: tensor(395.4377, device='cuda:0')
episode: 598 training return: tensor(283.0543, device='cuda:0')
episode: 599 training return: tensor(392.3000, device='cuda:0')
epoch: 150 test_true_pfm: 3470.166641363154 sim_pfm: 404.4507540477983
