['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'behavior', '--traj', 'mixed', '--seed', '3', '--data', '100000']
epoch: 0 training_loss 0.22252286680042743 test_loss: 0.1242306113243103
epoch: 1 training_loss 0.13284232307225466 test_loss: 0.12368992567062378
epoch: 2 training_loss 0.11827225692570209 test_loss: 0.10641705989837646
epoch: 3 training_loss 0.11223231790587306 test_loss: 0.1120657205581665
epoch: 4 training_loss 0.10575162380933761 test_loss: 0.10973941087722779
epoch: 5 training_loss 0.097990441955626 test_loss: 0.09377856254577636
epoch: 6 training_loss 0.10317684888839722 test_loss: 0.10009011030197143
epoch: 7 training_loss 0.08585748435929418 test_loss: 0.09483926296234131
epoch: 8 training_loss 0.09720017328858375 test_loss: 0.09471941590309144
epoch: 9 training_loss 0.09182087073102593 test_loss: 0.10785609483718872
epoch: 10 training_loss 0.0909324594028294 test_loss: 0.10701026916503906
epoch: 11 training_loss 0.09470345765352249 test_loss: 0.10535025596618652
epoch: 12 training_loss 0.09170451056212187 test_loss: 0.0850365161895752
epoch: 13 training_loss 0.09039032440632581 test_loss: 0.08144792914390564
epoch: 14 training_loss 0.08460477225482464 test_loss: 0.0963728666305542
epoch: 15 training_loss 0.0951626855880022 test_loss: 0.07884694337844848
epoch: 16 training_loss 0.08994243431836367 test_loss: 0.1094517707824707
epoch: 17 training_loss 0.09289523942396044 test_loss: 0.09761347770690917
epoch: 18 training_loss 0.08417495279572904 test_loss: 0.07805261611938477
epoch: 19 training_loss 0.09609872260130942 test_loss: 0.09326854348182678
epoch: 20 training_loss 0.0912471241876483 test_loss: 0.0812680184841156
epoch: 21 training_loss 0.08824811992235482 test_loss: 0.08870312571525574
epoch: 22 training_loss 0.09069315245375037 test_loss: 0.09052452445030212
epoch: 23 training_loss 0.08811982663348317 test_loss: 0.09167494177818299
epoch: 24 training_loss 0.09600812185555696 test_loss: 0.09496795535087585
epoch: 25 training_loss 0.08003728810697794 test_loss: 0.10162346363067627
epoch: 26 training_loss 0.08341944672167301 test_loss: 0.1052627444267273
epoch: 27 training_loss 0.08166813360527157 test_loss: 0.07495279908180237
epoch: 28 training_loss 0.08789995092898607 test_loss: 0.09014438390731812
epoch: 29 training_loss 0.09057222930714488 test_loss: 0.083597332239151
epoch: 30 training_loss 0.0798453394509852 test_loss: 0.09522801637649536
epoch: 31 training_loss 0.09176860900595785 test_loss: 0.09967669248580932
epoch: 32 training_loss 0.08403435956686735 test_loss: 0.0776953637599945
epoch: 33 training_loss 0.09056108724325895 test_loss: 0.09124432802200318
epoch: 34 training_loss 0.08456843620166182 test_loss: 0.0852534830570221
epoch: 35 training_loss 0.08442150926217437 test_loss: 0.10995852947235107
epoch: 36 training_loss 0.08081279708072543 test_loss: 0.0776794970035553
epoch: 37 training_loss 0.08721426965668798 test_loss: 0.09430058002471924
epoch: 38 training_loss 0.08627216825261712 test_loss: 0.08901401758193969
epoch: 39 training_loss 0.08202299820259214 test_loss: 0.08801429271697998
epoch: 40 training_loss 0.07903593538329005 test_loss: 0.08490644097328186
epoch: 41 training_loss 0.08812089854851365 test_loss: 0.09846428632736207
epoch: 42 training_loss 0.09197021204978227 test_loss: 0.09061095714569092
epoch: 43 training_loss 0.07885876467451453 test_loss: 0.07735869884490967
epoch: 44 training_loss 0.0843959267437458 test_loss: 0.07617738842964172
epoch: 45 training_loss 0.08648160261102021 test_loss: 0.06874986886978149
epoch: 46 training_loss 0.0773161687143147 test_loss: 0.07724953293800355
epoch: 47 training_loss 0.09168877059593797 test_loss: 0.10508779287338257
epoch: 48 training_loss 0.07976245442405343 test_loss: 0.08378357291221619
epoch: 49 training_loss 0.07629704605787993 test_loss: 0.08615584373474121
epoch: 50 training_loss 0.08071110558696092 test_loss: 0.08128213882446289
epoch: 51 training_loss 0.08525469159707427 test_loss: 0.08877794742584229
epoch: 52 training_loss 0.09109365889802575 test_loss: 0.08065754771232606
epoch: 53 training_loss 0.07491316736675799 test_loss: 0.08077119588851929
epoch: 54 training_loss 0.08241163488477468 test_loss: 0.08035321235656738
epoch: 55 training_loss 0.08449648255482316 test_loss: 0.07629379034042358
epoch: 56 training_loss 0.08335251388140023 test_loss: 0.07360320091247559
epoch: 57 training_loss 0.08044186985120177 test_loss: 0.06738951802253723
epoch: 58 training_loss 0.07673618596047163 test_loss: 0.08761882185935974
epoch: 59 training_loss 0.08424656499177217 test_loss: 0.0857649028301239
epoch: 60 training_loss 0.08038702772930265 test_loss: 0.08102821111679077
epoch: 61 training_loss 0.07940114611759781 test_loss: 0.07854376435279846
epoch: 62 training_loss 0.08245705500245094 test_loss: 0.06711097359657288
epoch: 63 training_loss 0.08380126660689712 test_loss: 0.07762172818183899
epoch: 64 training_loss 0.08182101855054498 test_loss: 0.09260180592536926
epoch: 65 training_loss 0.08426684334874153 test_loss: 0.08739389181137085
epoch: 66 training_loss 0.07746767096221446 test_loss: 0.08294786214828491
epoch: 67 training_loss 0.08030595092102885 test_loss: 0.07398414611816406
epoch: 68 training_loss 0.07998493317514659 test_loss: 0.07641724944114685
epoch: 69 training_loss 0.08364470981061459 test_loss: 0.0717068612575531
epoch: 70 training_loss 0.07567602936178446 test_loss: 0.09551661610603332
epoch: 71 training_loss 0.07607497433200479 test_loss: 0.08577674627304077
epoch: 72 training_loss 0.0792481303587556 test_loss: 0.08384099006652831
epoch: 73 training_loss 0.077999595310539 test_loss: 0.09091822504997253
epoch: 74 training_loss 0.08094741592183709 test_loss: 0.06373958587646485
epoch: 75 training_loss 0.08331004459410905 test_loss: 0.0790367603302002
epoch: 76 training_loss 0.08419223682954907 test_loss: 0.07493400573730469
epoch: 77 training_loss 0.08095309102907777 test_loss: 0.06806994080543519
epoch: 78 training_loss 0.07811968771740795 test_loss: 0.08050124049186706
epoch: 79 training_loss 0.07667026914656162 test_loss: 0.07504735589027405
epoch: 80 training_loss 0.08314402107149363 test_loss: 0.08072224259376526
epoch: 81 training_loss 0.0797813062928617 test_loss: 0.09064449667930603
epoch: 82 training_loss 0.08451928773894907 test_loss: 0.07511077523231506
epoch: 83 training_loss 0.08096798401325941 test_loss: 0.07656424045562744
epoch: 84 training_loss 0.07883289087563754 test_loss: 0.07182795405387879
epoch: 85 training_loss 0.07793825320899486 test_loss: 0.07372941970825195
epoch: 86 training_loss 0.08315113792195916 test_loss: 0.07925018668174744
epoch: 87 training_loss 0.07827670820057392 test_loss: 0.06068901419639587
epoch: 88 training_loss 0.08252478597685695 test_loss: 0.0745608389377594
epoch: 89 training_loss 0.08217345297336578 test_loss: 0.07789109945297241
epoch: 90 training_loss 0.08124316420406102 test_loss: 0.06899254322052002
epoch: 91 training_loss 0.07832681283354759 test_loss: 0.08799371719360352
epoch: 92 training_loss 0.07126000022515655 test_loss: 0.07902697324752808
epoch: 93 training_loss 0.07945846416056156 test_loss: 0.08423276543617249
epoch: 94 training_loss 0.07861453836783766 test_loss: 0.08231752514839172
epoch: 95 training_loss 0.07861123014241457 test_loss: 0.06931645274162293
epoch: 96 training_loss 0.0763409005664289 test_loss: 0.07662808895111084
epoch: 97 training_loss 0.07948886843398213 test_loss: 0.08113696575164794
epoch: 98 training_loss 0.08206945188343524 test_loss: 0.07964557409286499
epoch: 99 training_loss 0.07769489496946334 test_loss: 0.07658917903900146
epoch: 100 training_loss 0.07595571843907237 test_loss: 0.07942138314247131
epoch: 101 training_loss 0.07378856560215354 test_loss: 0.07176719307899475
epoch: 102 training_loss 0.07346588725224137 test_loss: 0.0754666268825531
epoch: 103 training_loss 0.08086930746212602 test_loss: 0.08224878311157227
epoch: 104 training_loss 0.07990645330399275 test_loss: 0.08248798251152038
epoch: 105 training_loss 0.07780190063640476 test_loss: 0.08353230357170105
epoch: 106 training_loss 0.07657885851338506 test_loss: 0.07636228203773499
epoch: 107 training_loss 0.07913765610195696 test_loss: 0.08046196699142456
epoch: 108 training_loss 0.07760464321821928 test_loss: 0.08718858361244201
epoch: 109 training_loss 0.08041812820360064 test_loss: 0.07352332472801208
epoch: 110 training_loss 0.0799370032735169 test_loss: 0.07423489093780518
epoch: 111 training_loss 0.07591893267817795 test_loss: 0.06734958291053772
epoch: 112 training_loss 0.08402359079569578 test_loss: 0.07503105998039246
epoch: 113 training_loss 0.07970405591651797 test_loss: 0.0726594090461731
epoch: 114 training_loss 0.07218081556260586 test_loss: 0.0809797465801239
epoch: 115 training_loss 0.07448946477845311 test_loss: 0.0798047125339508
epoch: 116 training_loss 0.08106952007859945 test_loss: 0.08065784573554993
epoch: 117 training_loss 0.0770819291099906 test_loss: 0.0768689215183258
epoch: 118 training_loss 0.08044530138373375 test_loss: 0.08504549264907837
epoch: 119 training_loss 0.07729843588545919 test_loss: 0.092938894033432
epoch: 120 training_loss 0.08257749535841867 test_loss: 0.08714123368263245
epoch: 121 training_loss 0.07704849271103739 test_loss: 0.07874664068222045
epoch: 122 training_loss 0.08014070343226194 test_loss: 0.07944047451019287
epoch: 123 training_loss 0.07434813246130943 test_loss: 0.06754196286201478
epoch: 124 training_loss 0.07399281674996018 test_loss: 0.09261361956596374
epoch: 125 training_loss 0.07964152664877475 test_loss: 0.0747154712677002
epoch: 126 training_loss 0.08176906326785684 test_loss: 0.06802886128425598
epoch: 127 training_loss 0.07687652420252561 test_loss: 0.08175808191299438
epoch: 128 training_loss 0.07899794951081276 test_loss: 0.07265823483467101
epoch: 129 training_loss 0.07744292296469212 test_loss: 0.08217798471450806
epoch: 130 training_loss 0.07515430792234838 test_loss: 0.09605339169502258
epoch: 131 training_loss 0.07864564472809434 test_loss: 0.08352821469306945
epoch: 132 training_loss 0.07793987819924951 test_loss: 0.07049768567085266
epoch: 133 training_loss 0.08354668844491243 test_loss: 0.08513352870941163
epoch: 134 training_loss 0.07341448019258678 test_loss: 0.07031917572021484
epoch: 135 training_loss 0.07662043403834104 test_loss: 0.08185184001922607
epoch: 136 training_loss 0.07601091168820857 test_loss: 0.07458142638206482
epoch: 137 training_loss 0.06873310818336904 test_loss: 0.08957245349884033
epoch: 138 training_loss 0.0777343244291842 test_loss: 0.07854067683219909
epoch: 139 training_loss 0.07806589026004077 test_loss: 0.08527920246124268
epoch: 140 training_loss 0.07995246773585678 test_loss: 0.07022925615310668
epoch: 141 training_loss 0.07910175956785678 test_loss: 0.0730320930480957
epoch: 142 training_loss 0.07652962259948254 test_loss: 0.07554745078086852
epoch: 143 training_loss 0.07831271052360535 test_loss: 0.08244228959083558
epoch: 144 training_loss 0.08498396830633283 test_loss: 0.08720209002494812
epoch: 145 training_loss 0.0737679261341691 test_loss: 0.08196715116500855
epoch: 146 training_loss 0.0790606102719903 test_loss: 0.07441654205322265
epoch: 147 training_loss 0.07759728649631142 test_loss: 0.09099509119987488
epoch: 148 training_loss 0.07554957525804638 test_loss: 0.07363315820693969
epoch: 149 training_loss 0.0735110691934824 test_loss: 0.06788960099220276
epoch: 0 training_loss 36.88997032165528 test_loss: 20.19744873046875
epoch: 1 training_loss 16.977625961303712 test_loss: 13.840945434570312
epoch: 2 training_loss 12.962803926467895 test_loss: 11.958545684814453
epoch: 3 training_loss 10.96914994239807 test_loss: 10.007156372070312
epoch: 4 training_loss 9.428409152030945 test_loss: 8.963093566894532
epoch: 5 training_loss 8.666924724578857 test_loss: 8.28585433959961
epoch: 6 training_loss 8.065605759620667 test_loss: 7.9630889892578125
epoch: 7 training_loss 7.52289197921753 test_loss: 6.851902770996094
epoch: 8 training_loss 7.146748399734497 test_loss: 6.65965576171875
epoch: 9 training_loss 6.748440899848938 test_loss: 6.618951416015625
epoch: 10 training_loss 6.2610533618927 test_loss: 6.190680313110351
epoch: 11 training_loss 6.072237339019775 test_loss: 5.869385528564453
epoch: 12 training_loss 5.901585936546326 test_loss: 5.638817596435547
epoch: 13 training_loss 5.72431097984314 test_loss: 5.310469055175782
epoch: 14 training_loss 5.655780367851257 test_loss: 5.505027008056641
epoch: 15 training_loss 5.397063403129578 test_loss: 5.398624420166016
epoch: 16 training_loss 5.28158784866333 test_loss: 5.343896484375
epoch: 17 training_loss 5.0698771262168885 test_loss: 5.03808479309082
epoch: 18 training_loss 4.939918949604034 test_loss: 4.675574111938476
epoch: 19 training_loss 4.950215821266174 test_loss: 5.073818969726562
epoch: 20 training_loss 4.787380120754242 test_loss: 4.627991104125977
epoch: 21 training_loss 4.6216079092025755 test_loss: 4.62688980102539
epoch: 22 training_loss 4.675400764942169 test_loss: 4.549489593505859
epoch: 23 training_loss 4.504744353294373 test_loss: 4.6114051818847654
epoch: 24 training_loss 4.488750689029693 test_loss: 4.2037609100341795
epoch: 25 training_loss 4.446749551296234 test_loss: 4.343207931518554
epoch: 26 training_loss 4.383185863494873 test_loss: 4.128580474853516
epoch: 27 training_loss 4.204867997169495 test_loss: 4.151361083984375
epoch: 28 training_loss 4.198940501213074 test_loss: 3.9188411712646483
epoch: 29 training_loss 4.242105789184571 test_loss: 4.164264678955078
epoch: 30 training_loss 4.029627606868744 test_loss: 4.118935012817383
epoch: 31 training_loss 4.086693360805511 test_loss: 3.839629364013672
epoch: 32 training_loss 3.803236665725708 test_loss: 4.113662338256836
epoch: 33 training_loss 3.972243435382843 test_loss: 3.785917282104492
epoch: 34 training_loss 3.9564938592910766 test_loss: 3.8253849029541014
epoch: 35 training_loss 3.799187002182007 test_loss: 3.7342273712158205
epoch: 36 training_loss 3.8614141011238097 test_loss: 3.8679237365722656
epoch: 37 training_loss 3.7937833523750304 test_loss: 3.7996135711669923
epoch: 38 training_loss 3.7334044647216795 test_loss: 3.6431270599365235
epoch: 39 training_loss 3.609028675556183 test_loss: 3.7895889282226562
epoch: 40 training_loss 3.7176791858673095 test_loss: 3.751449966430664
epoch: 41 training_loss 3.6316747760772703 test_loss: 3.6657264709472654
epoch: 42 training_loss 3.702862536907196 test_loss: 3.7217971801757814
epoch: 43 training_loss 3.5320432472229 test_loss: 3.537567901611328
epoch: 44 training_loss 3.518183498382568 test_loss: 3.4037776947021485
epoch: 45 training_loss 3.501829454898834 test_loss: 3.6170494079589846
epoch: 46 training_loss 3.43685572385788 test_loss: 3.5210830688476564
epoch: 47 training_loss 3.341927380561829 test_loss: 3.4731998443603516
epoch: 48 training_loss 3.5421356678009035 test_loss: 3.250321960449219
epoch: 49 training_loss 3.4257094979286196 test_loss: 3.2512706756591796
epoch: 50 training_loss 3.426394374370575 test_loss: 3.5810096740722654
epoch: 51 training_loss 3.4590078592300415 test_loss: 3.42919807434082
epoch: 52 training_loss 3.323664586544037 test_loss: 3.2238807678222656
epoch: 53 training_loss 3.320863902568817 test_loss: 3.364704895019531
epoch: 54 training_loss 3.198022871017456 test_loss: 3.2480022430419924
epoch: 55 training_loss 3.3257236099243164 test_loss: 3.2105758666992186
epoch: 56 training_loss 3.297207839488983 test_loss: 3.1213802337646483
epoch: 57 training_loss 3.300972912311554 test_loss: 3.320332336425781
epoch: 58 training_loss 3.233509542942047 test_loss: 3.2702491760253904
epoch: 59 training_loss 3.1908099460601806 test_loss: 3.1013797760009765
epoch: 60 training_loss 3.2489908218383787 test_loss: 3.0475053787231445
epoch: 61 training_loss 3.0833901453018187 test_loss: 3.3115093231201174
epoch: 62 training_loss 3.25205206155777 test_loss: 3.249312973022461
epoch: 63 training_loss 3.141583430767059 test_loss: 3.072675895690918
epoch: 64 training_loss 3.063730037212372 test_loss: 3.1802148818969727
epoch: 65 training_loss 3.12365686416626 test_loss: 3.143549346923828
epoch: 66 training_loss 3.0886184644699095 test_loss: 3.0913908004760744
epoch: 67 training_loss 3.0899343299865722 test_loss: 2.928226661682129
epoch: 68 training_loss 3.0881369853019716 test_loss: 2.9794519424438475
epoch: 69 training_loss 3.1162630891799927 test_loss: 3.3019832611083983
epoch: 70 training_loss 3.1394509434700013 test_loss: 3.1125463485717773
epoch: 71 training_loss 3.0786932802200315 test_loss: 3.1008718490600584
epoch: 72 training_loss 3.036223032474518 test_loss: 2.980259895324707
epoch: 73 training_loss 3.0087475275993345 test_loss: 3.0200803756713865
epoch: 74 training_loss 2.975934681892395 test_loss: 2.9286882400512697
epoch: 75 training_loss 3.0711578345298767 test_loss: 2.9833532333374024
epoch: 76 training_loss 2.958031439781189 test_loss: 2.8092899322509766
epoch: 77 training_loss 3.072662069797516 test_loss: 2.9895517349243166
epoch: 78 training_loss 2.971140468120575 test_loss: 3.1196767807006838
epoch: 79 training_loss 2.9410430693626406 test_loss: 3.107293701171875
epoch: 80 training_loss 2.8947418975830077 test_loss: 3.0062122344970703
epoch: 81 training_loss 2.8487322902679444 test_loss: 2.8349531173706053
epoch: 82 training_loss 2.9286668562889098 test_loss: 2.806587982177734
epoch: 83 training_loss 2.89766090631485 test_loss: 2.8812028884887697
epoch: 84 training_loss 2.8819064474105835 test_loss: 3.0184268951416016
epoch: 85 training_loss 2.9221361207962038 test_loss: 2.6717323303222655
epoch: 86 training_loss 2.843608977794647 test_loss: 2.7020551681518556
epoch: 87 training_loss 2.809781761169434 test_loss: 2.873882293701172
epoch: 88 training_loss 2.7936310744285584 test_loss: 2.87778263092041
epoch: 89 training_loss 2.783890912532806 test_loss: 2.8470857620239256
epoch: 90 training_loss 2.866847219467163 test_loss: 2.7831262588500976
epoch: 91 training_loss 2.808007712364197 test_loss: 2.6700418472290037
epoch: 92 training_loss 2.81514785528183 test_loss: 2.8378118515014648
epoch: 93 training_loss 2.7364533948898315 test_loss: 2.799629020690918
epoch: 94 training_loss 2.8086177885532377 test_loss: 2.846151351928711
epoch: 95 training_loss 2.7967570066452025 test_loss: 2.9701799392700194
epoch: 96 training_loss 2.7137086892127993 test_loss: 2.8385276794433594
epoch: 97 training_loss 2.7359138560295104 test_loss: 2.8835165023803713
epoch: 98 training_loss 2.682357680797577 test_loss: 2.622837257385254
epoch: 99 training_loss 2.7512850666046145 test_loss: 2.5254848480224608
epoch: 100 training_loss 2.7415104711055758 test_loss: 2.865121841430664
epoch: 101 training_loss 2.6027164459228516 test_loss: 2.784619903564453
epoch: 102 training_loss 2.738487358093262 test_loss: 2.512357711791992
epoch: 103 training_loss 2.774377474784851 test_loss: 2.6636709213256835
epoch: 104 training_loss 2.7057195568084715 test_loss: 2.8294069290161135
epoch: 105 training_loss 2.7669120907783507 test_loss: 2.7036230087280275
epoch: 106 training_loss 2.724542968273163 test_loss: 2.544947624206543
epoch: 107 training_loss 2.6886188590526583 test_loss: 2.614963912963867
epoch: 108 training_loss 2.676871703863144 test_loss: 2.6945869445800783
epoch: 109 training_loss 2.6602188777923583 test_loss: 2.76784725189209
epoch: 110 training_loss 2.7445361948013307 test_loss: 2.809122085571289
epoch: 111 training_loss 2.6435375475883482 test_loss: 2.761393737792969
epoch: 112 training_loss 2.6308267855644227 test_loss: 2.6712003707885743
epoch: 113 training_loss 2.6491393446922302 test_loss: 2.6534656524658202
epoch: 114 training_loss 2.6574342751502993 test_loss: 2.5206912994384765
epoch: 115 training_loss 2.6781773138046265 test_loss: 2.550740051269531
epoch: 116 training_loss 2.6070312893390657 test_loss: 2.5428529739379884
epoch: 117 training_loss 2.611485651731491 test_loss: 2.63482608795166
epoch: 118 training_loss 2.69519823551178 test_loss: 2.753870391845703
epoch: 119 training_loss 2.5929695522785186 test_loss: 2.6152389526367186
epoch: 120 training_loss 2.633521772623062 test_loss: 2.533592414855957
epoch: 121 training_loss 2.582831175327301 test_loss: 2.7510108947753906
epoch: 122 training_loss 2.502101328372955 test_loss: 2.540370750427246
epoch: 123 training_loss 2.6263120222091674 test_loss: 2.4978761672973633
epoch: 124 training_loss 2.636720197200775 test_loss: 2.7239654541015623
epoch: 125 training_loss 2.529407784938812 test_loss: 2.579637145996094
epoch: 126 training_loss 2.5766491985321043 test_loss: 2.5383058547973634
epoch: 127 training_loss 2.6602412939071653 test_loss: 2.4771907806396483
epoch: 128 training_loss 2.6920904433727264 test_loss: 2.5617971420288086
epoch: 129 training_loss 2.5317420780658724 test_loss: 2.4632953643798827
epoch: 130 training_loss 2.5832877206802367 test_loss: 2.3891435623168946
epoch: 131 training_loss 2.548077620267868 test_loss: 2.5241933822631837
epoch: 132 training_loss 2.553747658729553 test_loss: 2.6494029998779296
epoch: 133 training_loss 2.590770719051361 test_loss: 2.6424539566040037
epoch: 134 training_loss 2.508165103197098 test_loss: 2.482841682434082
epoch: 135 training_loss 2.620865001678467 test_loss: 2.6173810958862305
epoch: 136 training_loss 2.5362118220329286 test_loss: 2.5110107421875
epoch: 137 training_loss 2.5244361364841463 test_loss: 2.50408821105957
epoch: 138 training_loss 2.5427462124824523 test_loss: 2.437714767456055
epoch: 139 training_loss 2.4757308745384217 test_loss: 2.3920183181762695
epoch: 140 training_loss 2.602389121055603 test_loss: 2.6561695098876954
epoch: 141 training_loss 2.4338919961452485 test_loss: 2.418216323852539
epoch: 142 training_loss 2.496600513458252 test_loss: 2.5961328506469727
epoch: 143 training_loss 2.4755729520320893 test_loss: 2.5788959503173827
epoch: 144 training_loss 2.4349579560756682 test_loss: 2.5426214218139647
epoch: 145 training_loss 2.5064705348014833 test_loss: 2.4071233749389647
epoch: 146 training_loss 2.5385266649723053 test_loss: 2.4316015243530273
epoch: 147 training_loss 2.4908869862556458 test_loss: 2.4453044891357423
epoch: 148 training_loss 2.5091486704349517 test_loss: 2.53912467956543
epoch: 149 training_loss 2.4408077490329743 test_loss: 2.493897819519043
3724.8644010581797
episode: 0 training return: tensor(143.4015, device='cuda:0')
episode: 1 training return: tensor(-42.0023, device='cuda:0')
episode: 2 training return: tensor(172.2900, device='cuda:0')
episode: 3 training return: tensor(-363.5776, device='cuda:0')
epoch: 1 test_true_pfm: 3608.326867356793 sim_pfm: 158.86540393947507
episode: 4 training return: tensor(208.5614, device='cuda:0')
episode: 5 training return: tensor(205.0497, device='cuda:0')
episode: 6 training return: tensor(258.9721, device='cuda:0')
episode: 7 training return: tensor(-161.0304, device='cuda:0')
epoch: 2 test_true_pfm: 3693.650762841227 sim_pfm: 289.92978665347135
episode: 8 training return: tensor(-579.8441, device='cuda:0')
episode: 9 training return: tensor(247.7233, device='cuda:0')
episode: 10 training return: tensor(94.4299, device='cuda:0')
episode: 11 training return: tensor(83.0137, device='cuda:0')
epoch: 3 test_true_pfm: 3629.0638413870115 sim_pfm: 74.56992757122498
episode: 12 training return: tensor(173.4537, device='cuda:0')
episode: 13 training return: tensor(141.9953, device='cuda:0')
episode: 14 training return: tensor(312.8416, device='cuda:0')
episode: 15 training return: tensor(21.6687, device='cuda:0')
epoch: 4 test_true_pfm: 3558.9282874592755 sim_pfm: 195.71120650029238
episode: 16 training return: tensor(-4.0062, device='cuda:0')
episode: 17 training return: tensor(118.9341, device='cuda:0')
episode: 18 training return: tensor(315.5536, device='cuda:0')
episode: 19 training return: tensor(272.3565, device='cuda:0')
epoch: 5 test_true_pfm: 3759.075999706438 sim_pfm: 259.702901393699
episode: 20 training return: tensor(118.5182, device='cuda:0')
episode: 21 training return: tensor(215.2085, device='cuda:0')
episode: 22 training return: tensor(33.6629, device='cuda:0')
episode: 23 training return: tensor(19.6837, device='cuda:0')
epoch: 6 test_true_pfm: 3559.6521117140123 sim_pfm: 32.56652557490937
episode: 24 training return: tensor(58.8323, device='cuda:0')
episode: 25 training return: tensor(78.3390, device='cuda:0')
episode: 26 training return: tensor(23.6291, device='cuda:0')
episode: 27 training return: tensor(127.5423, device='cuda:0')
epoch: 7 test_true_pfm: 2607.4419504949906 sim_pfm: -399.28450011599733
episode: 28 training return: tensor(81.3374, device='cuda:0')
episode: 29 training return: tensor(249.4186, device='cuda:0')
episode: 30 training return: tensor(26.7256, device='cuda:0')
episode: 31 training return: tensor(105.1237, device='cuda:0')
epoch: 8 test_true_pfm: 3793.7210510534846 sim_pfm: -404.9214765711998
episode: 32 training return: tensor(371.2983, device='cuda:0')
episode: 33 training return: tensor(105.9249, device='cuda:0')
episode: 34 training return: tensor(386.8076, device='cuda:0')
episode: 35 training return: tensor(30.1884, device='cuda:0')
epoch: 9 test_true_pfm: 3926.766859862024 sim_pfm: 172.61480708713256
episode: 36 training return: tensor(101.3857, device='cuda:0')
episode: 37 training return: tensor(10.8924, device='cuda:0')
episode: 38 training return: tensor(220.1680, device='cuda:0')
episode: 39 training return: tensor(232.6206, device='cuda:0')
epoch: 10 test_true_pfm: 2225.1270184580744 sim_pfm: -209.51538400138574
episode: 40 training return: tensor(90.4522, device='cuda:0')
episode: 41 training return: tensor(88.6823, device='cuda:0')
episode: 42 training return: tensor(140.7480, device='cuda:0')
episode: 43 training return: tensor(210.2307, device='cuda:0')
epoch: 11 test_true_pfm: 3998.734800599388 sim_pfm: 440.05779926742736
episode: 44 training return: tensor(249.0864, device='cuda:0')
episode: 45 training return: tensor(31.9167, device='cuda:0')
episode: 46 training return: tensor(372.4704, device='cuda:0')
episode: 47 training return: tensor(129.3035, device='cuda:0')
epoch: 12 test_true_pfm: 3721.5052000484807 sim_pfm: -141.58995835647997
episode: 48 training return: tensor(183.1579, device='cuda:0')
episode: 49 training return: tensor(158.1371, device='cuda:0')
episode: 50 training return: tensor(262.5490, device='cuda:0')
episode: 51 training return: tensor(140.7034, device='cuda:0')
epoch: 13 test_true_pfm: 3862.667566596545 sim_pfm: 289.1599411542993
episode: 52 training return: tensor(39.5561, device='cuda:0')
episode: 53 training return: tensor(-776.9464, device='cuda:0')
episode: 54 training return: tensor(413.9899, device='cuda:0')
episode: 55 training return: tensor(219.8423, device='cuda:0')
epoch: 14 test_true_pfm: 2839.3610458213484 sim_pfm: -22.297419718098052
episode: 56 training return: tensor(205.6698, device='cuda:0')
episode: 57 training return: tensor(412.1219, device='cuda:0')
episode: 58 training return: tensor(65.4812, device='cuda:0')
episode: 59 training return: tensor(228.0354, device='cuda:0')
epoch: 15 test_true_pfm: 4093.2508598193767 sim_pfm: -359.81271839231096
episode: 60 training return: tensor(377.3528, device='cuda:0')
episode: 61 training return: tensor(327.0751, device='cuda:0')
episode: 62 training return: tensor(441.6083, device='cuda:0')
episode: 63 training return: tensor(157.7696, device='cuda:0')
epoch: 16 test_true_pfm: 521.5731012177736 sim_pfm: -414.5224556350343
episode: 64 training return: tensor(425.3747, device='cuda:0')
episode: 65 training return: tensor(442.2013, device='cuda:0')
episode: 66 training return: tensor(135.1107, device='cuda:0')
episode: 67 training return: tensor(144.5085, device='cuda:0')
epoch: 17 test_true_pfm: 2798.5838030870277 sim_pfm: -760.6064902482127
episode: 68 training return: tensor(184.0515, device='cuda:0')
episode: 69 training return: tensor(306.7366, device='cuda:0')
episode: 70 training return: tensor(303.2982, device='cuda:0')
episode: 71 training return: tensor(196.0009, device='cuda:0')
epoch: 18 test_true_pfm: 3899.7009499716623 sim_pfm: 285.5233405629406
episode: 72 training return: tensor(185.0070, device='cuda:0')
episode: 73 training return: tensor(78.1432, device='cuda:0')
episode: 74 training return: tensor(149.6512, device='cuda:0')
episode: 75 training return: tensor(158.7157, device='cuda:0')
epoch: 19 test_true_pfm: 1854.464358205123 sim_pfm: -772.0724531319187
episode: 76 training return: tensor(61.4104, device='cuda:0')
episode: 77 training return: tensor(219.6355, device='cuda:0')
episode: 78 training return: tensor(220.2053, device='cuda:0')
episode: 79 training return: tensor(158.7765, device='cuda:0')
epoch: 20 test_true_pfm: 1895.3602159543946 sim_pfm: -339.337600440175
episode: 80 training return: tensor(328.3269, device='cuda:0')
episode: 81 training return: tensor(240.3140, device='cuda:0')
episode: 82 training return: tensor(414.3235, device='cuda:0')
episode: 83 training return: tensor(431.9058, device='cuda:0')
epoch: 21 test_true_pfm: 4074.85381405801 sim_pfm: -500.0189835263106
episode: 84 training return: tensor(391.8797, device='cuda:0')
episode: 85 training return: tensor(312.7409, device='cuda:0')
episode: 86 training return: tensor(47.7452, device='cuda:0')
episode: 87 training return: tensor(-789.1205, device='cuda:0')
epoch: 22 test_true_pfm: 707.6711740686438 sim_pfm: 92.37651895860715
episode: 88 training return: tensor(405.1765, device='cuda:0')
episode: 89 training return: tensor(169.4137, device='cuda:0')
episode: 90 training return: tensor(429.5888, device='cuda:0')
episode: 91 training return: tensor(180.4907, device='cuda:0')
epoch: 23 test_true_pfm: 2925.613865429967 sim_pfm: 30.032710204812854
episode: 92 training return: tensor(249.0422, device='cuda:0')
episode: 93 training return: tensor(256.9814, device='cuda:0')
episode: 94 training return: tensor(319.0734, device='cuda:0')
episode: 95 training return: tensor(104.1556, device='cuda:0')
epoch: 24 test_true_pfm: 2897.0015105344373 sim_pfm: 0.9755327726403872
episode: 96 training return: tensor(235.5487, device='cuda:0')
episode: 97 training return: tensor(234.4693, device='cuda:0')
episode: 98 training return: tensor(233.0705, device='cuda:0')
episode: 99 training return: tensor(191.8625, device='cuda:0')
epoch: 25 test_true_pfm: 3991.3912332514305 sim_pfm: 56.65822711695606
episode: 100 training return: tensor(362.0545, device='cuda:0')
episode: 101 training return: tensor(296.5704, device='cuda:0')
episode: 102 training return: tensor(298.7069, device='cuda:0')
episode: 103 training return: tensor(207.3928, device='cuda:0')
epoch: 26 test_true_pfm: 3982.433618540272 sim_pfm: 324.37684772515786
episode: 104 training return: tensor(298.4366, device='cuda:0')
episode: 105 training return: tensor(235.4954, device='cuda:0')
episode: 106 training return: tensor(190.4835, device='cuda:0')
episode: 107 training return: tensor(267.9918, device='cuda:0')
epoch: 27 test_true_pfm: 3923.8322589268005 sim_pfm: -111.6052140533963
episode: 108 training return: tensor(393.2289, device='cuda:0')
episode: 109 training return: tensor(199.3732, device='cuda:0')
episode: 110 training return: tensor(352.9850, device='cuda:0')
episode: 111 training return: tensor(196.7520, device='cuda:0')
epoch: 28 test_true_pfm: 3989.78825871516 sim_pfm: 375.0632354497211
episode: 112 training return: tensor(225.1371, device='cuda:0')
episode: 113 training return: tensor(184.0705, device='cuda:0')
episode: 114 training return: tensor(199.0440, device='cuda:0')
episode: 115 training return: tensor(360.0298, device='cuda:0')
epoch: 29 test_true_pfm: 3939.813475764319 sim_pfm: 319.6064494767634
episode: 116 training return: tensor(364.0288, device='cuda:0')
episode: 117 training return: tensor(65.5013, device='cuda:0')
episode: 118 training return: tensor(-773.5091, device='cuda:0')
episode: 119 training return: tensor(277.1527, device='cuda:0')
epoch: 30 test_true_pfm: 2830.267846115567 sim_pfm: 357.1848094118565
episode: 120 training return: tensor(359.7192, device='cuda:0')
episode: 121 training return: tensor(166.0559, device='cuda:0')
episode: 122 training return: tensor(119.1221, device='cuda:0')
episode: 123 training return: tensor(249.3441, device='cuda:0')
epoch: 31 test_true_pfm: 601.844413606287 sim_pfm: -31.220510899845976
episode: 124 training return: tensor(299.7742, device='cuda:0')
episode: 125 training return: tensor(402.2844, device='cuda:0')
episode: 126 training return: tensor(215.2810, device='cuda:0')
episode: 127 training return: tensor(329.5662, device='cuda:0')
epoch: 32 test_true_pfm: 4000.72418297772 sim_pfm: 22.087786626148347
episode: 128 training return: tensor(381.4308, device='cuda:0')
episode: 129 training return: tensor(241.7890, device='cuda:0')
episode: 130 training return: tensor(197.2552, device='cuda:0')
episode: 131 training return: tensor(157.9369, device='cuda:0')
epoch: 33 test_true_pfm: 3858.640002010178 sim_pfm: 300.22539341791224
episode: 132 training return: tensor(345.3334, device='cuda:0')
episode: 133 training return: tensor(328.0831, device='cuda:0')
episode: 134 training return: tensor(283.8893, device='cuda:0')
episode: 135 training return: tensor(242.0471, device='cuda:0')
epoch: 34 test_true_pfm: 3935.8797902252813 sim_pfm: 275.7500676094011
episode: 136 training return: tensor(-718.6852, device='cuda:0')
episode: 137 training return: tensor(358.8449, device='cuda:0')
episode: 138 training return: tensor(226.7011, device='cuda:0')
episode: 139 training return: tensor(372.9999, device='cuda:0')
epoch: 35 test_true_pfm: 2957.9763637388082 sim_pfm: 332.9622500004286
episode: 140 training return: tensor(408.4622, device='cuda:0')
episode: 141 training return: tensor(352.3756, device='cuda:0')
episode: 142 training return: tensor(295.7028, device='cuda:0')
episode: 143 training return: tensor(338.9198, device='cuda:0')
epoch: 36 test_true_pfm: 4052.9080627404105 sim_pfm: 406.61065777618205
episode: 144 training return: tensor(206.8988, device='cuda:0')
episode: 145 training return: tensor(341.9551, device='cuda:0')
episode: 146 training return: tensor(411.7482, device='cuda:0')
episode: 147 training return: tensor(268.3454, device='cuda:0')
epoch: 37 test_true_pfm: 3001.392968688358 sim_pfm: -321.415839497
episode: 148 training return: tensor(312.4379, device='cuda:0')
episode: 149 training return: tensor(379.6837, device='cuda:0')
episode: 150 training return: tensor(229.5466, device='cuda:0')
episode: 151 training return: tensor(272.5797, device='cuda:0')
epoch: 38 test_true_pfm: 4083.231976190549 sim_pfm: 413.5459951118003
episode: 152 training return: tensor(125.1318, device='cuda:0')
episode: 153 training return: tensor(294.8491, device='cuda:0')
episode: 154 training return: tensor(300.5424, device='cuda:0')
episode: 155 training return: tensor(346.9971, device='cuda:0')
epoch: 39 test_true_pfm: 3990.4140296364894 sim_pfm: 394.81739145627944
episode: 156 training return: tensor(437.6186, device='cuda:0')
episode: 157 training return: tensor(316.9584, device='cuda:0')
episode: 158 training return: tensor(423.0944, device='cuda:0')
episode: 159 training return: tensor(311.3271, device='cuda:0')
epoch: 40 test_true_pfm: 1795.9961092484189 sim_pfm: -309.73181215208024
episode: 160 training return: tensor(354.9989, device='cuda:0')
episode: 161 training return: tensor(246.4063, device='cuda:0')
episode: 162 training return: tensor(393.9527, device='cuda:0')
episode: 163 training return: tensor(407.1358, device='cuda:0')
epoch: 41 test_true_pfm: 3950.2978503372974 sim_pfm: 322.3203665445714
episode: 164 training return: tensor(225.7040, device='cuda:0')
episode: 165 training return: tensor(-737.1701, device='cuda:0')
episode: 166 training return: tensor(265.4062, device='cuda:0')
episode: 167 training return: tensor(425.1955, device='cuda:0')
epoch: 42 test_true_pfm: 4061.0915031042146 sim_pfm: 18.012916008320946
episode: 168 training return: tensor(218.2823, device='cuda:0')
episode: 169 training return: tensor(329.1970, device='cuda:0')
episode: 170 training return: tensor(-757.4344, device='cuda:0')
episode: 171 training return: tensor(-775.4644, device='cuda:0')
epoch: 43 test_true_pfm: 507.9091781441877 sim_pfm: -306.5796800554381
episode: 172 training return: tensor(274.9549, device='cuda:0')
episode: 173 training return: tensor(277.4022, device='cuda:0')
episode: 174 training return: tensor(252.9846, device='cuda:0')
episode: 175 training return: tensor(340.1205, device='cuda:0')
epoch: 44 test_true_pfm: 3904.3030211780497 sim_pfm: 300.20176662157365
episode: 176 training return: tensor(425.7933, device='cuda:0')
episode: 177 training return: tensor(343.6530, device='cuda:0')
episode: 178 training return: tensor(433.8200, device='cuda:0')
episode: 179 training return: tensor(325.7613, device='cuda:0')
epoch: 45 test_true_pfm: 1721.0891277280125 sim_pfm: -802.8034267461238
episode: 180 training return: tensor(412.2929, device='cuda:0')
episode: 181 training return: tensor(-756.1410, device='cuda:0')
episode: 182 training return: tensor(191.5925, device='cuda:0')
episode: 183 training return: tensor(258.7064, device='cuda:0')
epoch: 46 test_true_pfm: 3866.0269149538813 sim_pfm: 304.03175401464495
episode: 184 training return: tensor(418.8227, device='cuda:0')
episode: 185 training return: tensor(203.3044, device='cuda:0')
episode: 186 training return: tensor(323.8629, device='cuda:0')
episode: 187 training return: tensor(273.2158, device='cuda:0')
epoch: 47 test_true_pfm: 2821.4240648819955 sim_pfm: 385.4720801174117
episode: 188 training return: tensor(68.7756, device='cuda:0')
episode: 189 training return: tensor(247.3390, device='cuda:0')
episode: 190 training return: tensor(324.0230, device='cuda:0')
episode: 191 training return: tensor(298.6795, device='cuda:0')
epoch: 48 test_true_pfm: 2198.2679245612944 sim_pfm: 1.0028729012216597
episode: 192 training return: tensor(325.5682, device='cuda:0')
episode: 193 training return: tensor(157.7463, device='cuda:0')
episode: 194 training return: tensor(415.6778, device='cuda:0')
episode: 195 training return: tensor(388.1628, device='cuda:0')
epoch: 49 test_true_pfm: 3948.831858213331 sim_pfm: 346.15506922490505
episode: 196 training return: tensor(370.4135, device='cuda:0')
episode: 197 training return: tensor(358.0085, device='cuda:0')
episode: 198 training return: tensor(96.6476, device='cuda:0')
episode: 199 training return: tensor(327.7702, device='cuda:0')
epoch: 50 test_true_pfm: 1849.005348082367 sim_pfm: -387.2549270724994
episode: 200 training return: tensor(281.7423, device='cuda:0')
episode: 201 training return: tensor(317.9301, device='cuda:0')
episode: 202 training return: tensor(-729.0176, device='cuda:0')
episode: 203 training return: tensor(198.1376, device='cuda:0')
epoch: 51 test_true_pfm: 4033.3185461583103 sim_pfm: 385.4248087442247
episode: 204 training return: tensor(231.3662, device='cuda:0')
episode: 205 training return: tensor(383.6535, device='cuda:0')
episode: 206 training return: tensor(342.8849, device='cuda:0')
episode: 207 training return: tensor(204.6453, device='cuda:0')
epoch: 52 test_true_pfm: 3915.8050728499006 sim_pfm: 265.2302690068609
episode: 208 training return: tensor(279.3809, device='cuda:0')
episode: 209 training return: tensor(373.6566, device='cuda:0')
episode: 210 training return: tensor(-773.0858, device='cuda:0')
episode: 211 training return: tensor(249.4510, device='cuda:0')
epoch: 53 test_true_pfm: 3839.1917815755055 sim_pfm: 308.71858618121286
episode: 212 training return: tensor(274.2510, device='cuda:0')
episode: 213 training return: tensor(303.8349, device='cuda:0')
episode: 214 training return: tensor(390.8872, device='cuda:0')
episode: 215 training return: tensor(459.5879, device='cuda:0')
epoch: 54 test_true_pfm: 3327.4570521807254 sim_pfm: 26.96054846913709
episode: 216 training return: tensor(332.9035, device='cuda:0')
episode: 217 training return: tensor(347.7937, device='cuda:0')
episode: 218 training return: tensor(421.8418, device='cuda:0')
episode: 219 training return: tensor(254.6342, device='cuda:0')
epoch: 55 test_true_pfm: 3877.9903949758523 sim_pfm: 354.625170611757
episode: 220 training return: tensor(290.0241, device='cuda:0')
episode: 221 training return: tensor(277.6287, device='cuda:0')
episode: 222 training return: tensor(216.1467, device='cuda:0')
episode: 223 training return: tensor(250.1270, device='cuda:0')
epoch: 56 test_true_pfm: 2918.0181969275495 sim_pfm: 40.175014556686314
episode: 224 training return: tensor(229.8150, device='cuda:0')
episode: 225 training return: tensor(230.0025, device='cuda:0')
episode: 226 training return: tensor(273.7886, device='cuda:0')
episode: 227 training return: tensor(261.8463, device='cuda:0')
epoch: 57 test_true_pfm: 1759.8357130698344 sim_pfm: -15.07863159570843
episode: 228 training return: tensor(425.8310, device='cuda:0')
episode: 229 training return: tensor(409.5506, device='cuda:0')
episode: 230 training return: tensor(226.8716, device='cuda:0')
episode: 231 training return: tensor(-712.6152, device='cuda:0')
epoch: 58 test_true_pfm: 1872.3423817382682 sim_pfm: 11.681635674535451
episode: 232 training return: tensor(427.1051, device='cuda:0')
episode: 233 training return: tensor(-656.3595, device='cuda:0')
episode: 234 training return: tensor(268.3430, device='cuda:0')
episode: 235 training return: tensor(357.7330, device='cuda:0')
epoch: 59 test_true_pfm: 2143.2501672450035 sim_pfm: 407.26107619330287
episode: 236 training return: tensor(330.8967, device='cuda:0')
episode: 237 training return: tensor(234.2943, device='cuda:0')
episode: 238 training return: tensor(420.4141, device='cuda:0')
episode: 239 training return: tensor(449.1448, device='cuda:0')
epoch: 60 test_true_pfm: 2763.0769600460067 sim_pfm: -436.5907672637938
episode: 240 training return: tensor(-815.6503, device='cuda:0')
episode: 241 training return: tensor(262.4716, device='cuda:0')
episode: 242 training return: tensor(296.9014, device='cuda:0')
episode: 243 training return: tensor(236.9919, device='cuda:0')
epoch: 61 test_true_pfm: 1790.4793747784877 sim_pfm: -24.46597771452313
episode: 244 training return: tensor(149.8931, device='cuda:0')
episode: 245 training return: tensor(469.5983, device='cuda:0')
episode: 246 training return: tensor(286.8496, device='cuda:0')
episode: 247 training return: tensor(180.5388, device='cuda:0')
epoch: 62 test_true_pfm: 3815.6208445727957 sim_pfm: -132.7454125972096
episode: 248 training return: tensor(343.2867, device='cuda:0')
episode: 249 training return: tensor(442.6606, device='cuda:0')
episode: 250 training return: tensor(334.5900, device='cuda:0')
episode: 251 training return: tensor(355.1929, device='cuda:0')
epoch: 63 test_true_pfm: 2752.4406701423736 sim_pfm: 37.49831547902431
episode: 252 training return: tensor(325.9291, device='cuda:0')
episode: 253 training return: tensor(454.0874, device='cuda:0')
episode: 254 training return: tensor(291.1158, device='cuda:0')
episode: 255 training return: tensor(439.0530, device='cuda:0')
epoch: 64 test_true_pfm: 2937.573815937407 sim_pfm: -25.017023090079118
episode: 256 training return: tensor(365.9022, device='cuda:0')
episode: 257 training return: tensor(275.5964, device='cuda:0')
episode: 258 training return: tensor(414.1364, device='cuda:0')
episode: 259 training return: tensor(369.7831, device='cuda:0')
epoch: 65 test_true_pfm: 536.8149549335632 sim_pfm: -354.75605933131493
episode: 260 training return: tensor(234.2518, device='cuda:0')
episode: 261 training return: tensor(207.7527, device='cuda:0')
episode: 262 training return: tensor(-828.0259, device='cuda:0')
episode: 263 training return: tensor(183.9588, device='cuda:0')
epoch: 66 test_true_pfm: 4016.940122866839 sim_pfm: 379.62255540292244
episode: 264 training return: tensor(243.5000, device='cuda:0')
episode: 265 training return: tensor(387.5996, device='cuda:0')
episode: 266 training return: tensor(222.9540, device='cuda:0')
episode: 267 training return: tensor(338.7075, device='cuda:0')
epoch: 67 test_true_pfm: 4009.3597181090495 sim_pfm: 415.7682275585442
episode: 268 training return: tensor(165.2521, device='cuda:0')
episode: 269 training return: tensor(219.0164, device='cuda:0')
episode: 270 training return: tensor(350.2655, device='cuda:0')
episode: 271 training return: tensor(191.7061, device='cuda:0')
epoch: 68 test_true_pfm: 3995.2551059194266 sim_pfm: 376.8889570538304
episode: 272 training return: tensor(295.5794, device='cuda:0')
episode: 273 training return: tensor(354.2239, device='cuda:0')
episode: 274 training return: tensor(350.7878, device='cuda:0')
episode: 275 training return: tensor(378.9008, device='cuda:0')
epoch: 69 test_true_pfm: 3956.4597348770026 sim_pfm: 387.71809661737643
episode: 276 training return: tensor(380.6360, device='cuda:0')
episode: 277 training return: tensor(376.2327, device='cuda:0')
episode: 278 training return: tensor(388.7929, device='cuda:0')
episode: 279 training return: tensor(466.9969, device='cuda:0')
epoch: 70 test_true_pfm: 1778.8059952083738 sim_pfm: -394.54030824028695
episode: 280 training return: tensor(260.8312, device='cuda:0')
episode: 281 training return: tensor(-763.7542, device='cuda:0')
episode: 282 training return: tensor(254.0520, device='cuda:0')
episode: 283 training return: tensor(432.7856, device='cuda:0')
epoch: 71 test_true_pfm: 1908.775793300346 sim_pfm: -15.214707424204486
episode: 284 training return: tensor(301.2377, device='cuda:0')
episode: 285 training return: tensor(327.9798, device='cuda:0')
episode: 286 training return: tensor(329.8267, device='cuda:0')
episode: 287 training return: tensor(208.1194, device='cuda:0')
epoch: 72 test_true_pfm: 3985.663463900695 sim_pfm: -444.6274727560619
episode: 288 training return: tensor(-500.2910, device='cuda:0')
episode: 289 training return: tensor(326.7436, device='cuda:0')
episode: 290 training return: tensor(240.9702, device='cuda:0')
episode: 291 training return: tensor(436.5562, device='cuda:0')
epoch: 73 test_true_pfm: 3955.904972118768 sim_pfm: -54.168975694405766
episode: 292 training return: tensor(250.5780, device='cuda:0')
episode: 293 training return: tensor(274.1535, device='cuda:0')
episode: 294 training return: tensor(346.2188, device='cuda:0')
episode: 295 training return: tensor(233.4393, device='cuda:0')
epoch: 74 test_true_pfm: 3866.4474948007905 sim_pfm: 353.39299897719565
episode: 296 training return: tensor(369.0237, device='cuda:0')
episode: 297 training return: tensor(365.4487, device='cuda:0')
episode: 298 training return: tensor(289.4388, device='cuda:0')
episode: 299 training return: tensor(251.1140, device='cuda:0')
epoch: 75 test_true_pfm: 3905.352059522287 sim_pfm: 424.16284587482613
episode: 300 training return: tensor(221.9042, device='cuda:0')
episode: 301 training return: tensor(339.2921, device='cuda:0')
episode: 302 training return: tensor(350.7291, device='cuda:0')
episode: 303 training return: tensor(360.0820, device='cuda:0')
epoch: 76 test_true_pfm: 3529.4525629052428 sim_pfm: 319.0541564688513
episode: 304 training return: tensor(344.0820, device='cuda:0')
episode: 305 training return: tensor(358.2018, device='cuda:0')
episode: 306 training return: tensor(301.6848, device='cuda:0')
episode: 307 training return: tensor(420.6222, device='cuda:0')
epoch: 77 test_true_pfm: 634.0398028730381 sim_pfm: -731.4004631491649
episode: 308 training return: tensor(383.9034, device='cuda:0')
episode: 309 training return: tensor(348.6213, device='cuda:0')
episode: 310 training return: tensor(-778.4822, device='cuda:0')
episode: 311 training return: tensor(305.9579, device='cuda:0')
epoch: 78 test_true_pfm: 3887.63313245199 sim_pfm: 395.83570489931543
episode: 312 training return: tensor(401.9570, device='cuda:0')
episode: 313 training return: tensor(258.4629, device='cuda:0')
episode: 314 training return: tensor(386.9854, device='cuda:0')
episode: 315 training return: tensor(208.0777, device='cuda:0')
epoch: 79 test_true_pfm: 3932.642281516894 sim_pfm: 345.91502409001504
episode: 316 training return: tensor(276.4325, device='cuda:0')
episode: 317 training return: tensor(201.1125, device='cuda:0')
episode: 318 training return: tensor(221.2991, device='cuda:0')
episode: 319 training return: tensor(395.2126, device='cuda:0')
epoch: 80 test_true_pfm: 3820.6578643438284 sim_pfm: 328.1663224052754
episode: 320 training return: tensor(330.0048, device='cuda:0')
episode: 321 training return: tensor(-806.4581, device='cuda:0')
episode: 322 training return: tensor(194.5708, device='cuda:0')
episode: 323 training return: tensor(225.3466, device='cuda:0')
epoch: 81 test_true_pfm: 611.722607206437 sim_pfm: -339.17033608427545
episode: 324 training return: tensor(218.5526, device='cuda:0')
episode: 325 training return: tensor(-636.7452, device='cuda:0')
episode: 326 training return: tensor(190.4334, device='cuda:0')
episode: 327 training return: tensor(376.1680, device='cuda:0')
epoch: 82 test_true_pfm: 3871.4017267660142 sim_pfm: 300.99272053531604
episode: 328 training return: tensor(-607.1442, device='cuda:0')
episode: 329 training return: tensor(354.0003, device='cuda:0')
episode: 330 training return: tensor(420.1000, device='cuda:0')
episode: 331 training return: tensor(380.6992, device='cuda:0')
epoch: 83 test_true_pfm: 4084.5889671206364 sim_pfm: 356.31111319423263
episode: 332 training return: tensor(386.2019, device='cuda:0')
episode: 333 training return: tensor(192.2722, device='cuda:0')
episode: 334 training return: tensor(231.6986, device='cuda:0')
episode: 335 training return: tensor(322.1536, device='cuda:0')
epoch: 84 test_true_pfm: 2860.1312874830123 sim_pfm: -333.51715995772975
episode: 336 training return: tensor(410.7135, device='cuda:0')
episode: 337 training return: tensor(269.7466, device='cuda:0')
episode: 338 training return: tensor(289.0898, device='cuda:0')
episode: 339 training return: tensor(410.4917, device='cuda:0')
epoch: 85 test_true_pfm: 2757.5841327628586 sim_pfm: -427.0000209355688
episode: 340 training return: tensor(479.4033, device='cuda:0')
episode: 341 training return: tensor(-722.7782, device='cuda:0')
episode: 342 training return: tensor(231.0546, device='cuda:0')
episode: 343 training return: tensor(381.6856, device='cuda:0')
epoch: 86 test_true_pfm: 1640.7719996406452 sim_pfm: -27.826083721554216
episode: 344 training return: tensor(310.2620, device='cuda:0')
episode: 345 training return: tensor(324.0399, device='cuda:0')
episode: 346 training return: tensor(354.1442, device='cuda:0')
episode: 347 training return: tensor(405.1072, device='cuda:0')
epoch: 87 test_true_pfm: 3986.080200715503 sim_pfm: 317.71341648115776
episode: 348 training return: tensor(505.1451, device='cuda:0')
episode: 349 training return: tensor(208.3210, device='cuda:0')
episode: 350 training return: tensor(204.2325, device='cuda:0')
episode: 351 training return: tensor(319.0790, device='cuda:0')
epoch: 88 test_true_pfm: 4028.7939071181027 sim_pfm: 387.7636420225899
episode: 352 training return: tensor(261.7747, device='cuda:0')
episode: 353 training return: tensor(255.2700, device='cuda:0')
episode: 354 training return: tensor(317.8175, device='cuda:0')
episode: 355 training return: tensor(-764.1672, device='cuda:0')
epoch: 89 test_true_pfm: 4020.0339115087986 sim_pfm: 36.56376505305525
episode: 356 training return: tensor(109.6639, device='cuda:0')
episode: 357 training return: tensor(344.4190, device='cuda:0')
episode: 358 training return: tensor(318.5764, device='cuda:0')
episode: 359 training return: tensor(377.5581, device='cuda:0')
epoch: 90 test_true_pfm: 4037.603864791258 sim_pfm: 323.4831721750864
episode: 360 training return: tensor(263.0398, device='cuda:0')
episode: 361 training return: tensor(324.7332, device='cuda:0')
episode: 362 training return: tensor(283.9488, device='cuda:0')
episode: 363 training return: tensor(-765.8464, device='cuda:0')
epoch: 91 test_true_pfm: 4012.112818758062 sim_pfm: 359.6684843524029
episode: 364 training return: tensor(386.3387, device='cuda:0')
episode: 365 training return: tensor(267.7704, device='cuda:0')
episode: 366 training return: tensor(268.5502, device='cuda:0')
episode: 367 training return: tensor(308.0540, device='cuda:0')
epoch: 92 test_true_pfm: 3936.925320813218 sim_pfm: 330.0935950287385
episode: 368 training return: tensor(227.3223, device='cuda:0')
episode: 369 training return: tensor(431.9563, device='cuda:0')
episode: 370 training return: tensor(393.8891, device='cuda:0')
episode: 371 training return: tensor(264.4932, device='cuda:0')
epoch: 93 test_true_pfm: 3937.717062277523 sim_pfm: 302.6621685973175
episode: 372 training return: tensor(313.0801, device='cuda:0')
episode: 373 training return: tensor(396.9788, device='cuda:0')
episode: 374 training return: tensor(204.2416, device='cuda:0')
episode: 375 training return: tensor(274.6681, device='cuda:0')
epoch: 94 test_true_pfm: 3995.197543621849 sim_pfm: 394.7661539468293
episode: 376 training return: tensor(293.7978, device='cuda:0')
episode: 377 training return: tensor(321.4786, device='cuda:0')
episode: 378 training return: tensor(381.1111, device='cuda:0')
episode: 379 training return: tensor(277.8210, device='cuda:0')
epoch: 95 test_true_pfm: 4032.675604564129 sim_pfm: 327.3358697471558
episode: 380 training return: tensor(379.6492, device='cuda:0')
episode: 381 training return: tensor(365.8581, device='cuda:0')
episode: 382 training return: tensor(233.8209, device='cuda:0')
episode: 383 training return: tensor(280.1218, device='cuda:0')
epoch: 96 test_true_pfm: 3799.5940064390234 sim_pfm: 394.92744535013725
episode: 384 training return: tensor(317.4524, device='cuda:0')
episode: 385 training return: tensor(315.0188, device='cuda:0')
episode: 386 training return: tensor(271.6573, device='cuda:0')
episode: 387 training return: tensor(243.7244, device='cuda:0')
epoch: 97 test_true_pfm: 2899.891755553034 sim_pfm: -10.060891625122167
episode: 388 training return: tensor(447.1944, device='cuda:0')
episode: 389 training return: tensor(398.3679, device='cuda:0')
episode: 390 training return: tensor(333.1012, device='cuda:0')
episode: 391 training return: tensor(376.0618, device='cuda:0')
epoch: 98 test_true_pfm: 3987.4553922384875 sim_pfm: 431.80646825902903
episode: 392 training return: tensor(355.5777, device='cuda:0')
episode: 393 training return: tensor(270.4795, device='cuda:0')
episode: 394 training return: tensor(354.0048, device='cuda:0')
episode: 395 training return: tensor(335.6561, device='cuda:0')
epoch: 99 test_true_pfm: 3939.4325805142967 sim_pfm: 325.2590923778965
episode: 396 training return: tensor(195.2688, device='cuda:0')
episode: 397 training return: tensor(-596.2456, device='cuda:0')
episode: 398 training return: tensor(364.6295, device='cuda:0')
episode: 399 training return: tensor(231.0707, device='cuda:0')
epoch: 100 test_true_pfm: 3939.967784269707 sim_pfm: 340.8662873098704
episode: 400 training return: tensor(190.5558, device='cuda:0')
episode: 401 training return: tensor(299.1492, device='cuda:0')
episode: 402 training return: tensor(307.3527, device='cuda:0')
episode: 403 training return: tensor(251.0291, device='cuda:0')
epoch: 101 test_true_pfm: 1661.4937367329312 sim_pfm: 344.42217859375523
episode: 404 training return: tensor(273.1772, device='cuda:0')
episode: 405 training return: tensor(427.2360, device='cuda:0')
episode: 406 training return: tensor(208.4817, device='cuda:0')
episode: 407 training return: tensor(234.9411, device='cuda:0')
epoch: 102 test_true_pfm: 3961.621454058007 sim_pfm: 290.5360077854032
episode: 408 training return: tensor(268.6871, device='cuda:0')
episode: 409 training return: tensor(326.8276, device='cuda:0')
episode: 410 training return: tensor(309.5225, device='cuda:0')
episode: 411 training return: tensor(333.1788, device='cuda:0')
epoch: 103 test_true_pfm: 3001.0853142444535 sim_pfm: 32.09673959591115
episode: 412 training return: tensor(342.7413, device='cuda:0')
episode: 413 training return: tensor(468.3561, device='cuda:0')
episode: 414 training return: tensor(343.6252, device='cuda:0')
episode: 415 training return: tensor(217.2837, device='cuda:0')
epoch: 104 test_true_pfm: 3978.8533650980353 sim_pfm: 290.77178870046436
episode: 416 training return: tensor(221.8157, device='cuda:0')
episode: 417 training return: tensor(244.3819, device='cuda:0')
episode: 418 training return: tensor(337.3490, device='cuda:0')
episode: 419 training return: tensor(319.9766, device='cuda:0')
epoch: 105 test_true_pfm: 3969.0074367033253 sim_pfm: 388.4109154997471
episode: 420 training return: tensor(320.7034, device='cuda:0')
episode: 421 training return: tensor(383.1159, device='cuda:0')
episode: 422 training return: tensor(413.5839, device='cuda:0')
episode: 423 training return: tensor(359.5299, device='cuda:0')
epoch: 106 test_true_pfm: 4018.351639976149 sim_pfm: 355.25655962500605
episode: 424 training return: tensor(311.4676, device='cuda:0')
episode: 425 training return: tensor(113.9438, device='cuda:0')
episode: 426 training return: tensor(442.7490, device='cuda:0')
episode: 427 training return: tensor(174.9904, device='cuda:0')
epoch: 107 test_true_pfm: 4083.5499958403684 sim_pfm: 83.0495449915373
episode: 428 training return: tensor(287.9156, device='cuda:0')
episode: 429 training return: tensor(401.2596, device='cuda:0')
episode: 430 training return: tensor(194.0432, device='cuda:0')
episode: 431 training return: tensor(434.8529, device='cuda:0')
epoch: 108 test_true_pfm: 3851.2821299849347 sim_pfm: 254.84454038063026
episode: 432 training return: tensor(265.5766, device='cuda:0')
episode: 433 training return: tensor(276.1029, device='cuda:0')
episode: 434 training return: tensor(219.1578, device='cuda:0')
episode: 435 training return: tensor(461.6622, device='cuda:0')
epoch: 109 test_true_pfm: 4050.139405813181 sim_pfm: 412.63848450420966
episode: 436 training return: tensor(360.2489, device='cuda:0')
episode: 437 training return: tensor(418.6490, device='cuda:0')
episode: 438 training return: tensor(300.9456, device='cuda:0')
episode: 439 training return: tensor(356.4631, device='cuda:0')
epoch: 110 test_true_pfm: 3937.7502920663005 sim_pfm: 304.8120326748079
episode: 440 training return: tensor(256.1058, device='cuda:0')
episode: 441 training return: tensor(319.4847, device='cuda:0')
episode: 442 training return: tensor(399.9526, device='cuda:0')
episode: 443 training return: tensor(390.4485, device='cuda:0')
epoch: 111 test_true_pfm: 4061.185573277049 sim_pfm: 413.60193174296484
episode: 444 training return: tensor(394.9526, device='cuda:0')
episode: 445 training return: tensor(215.1257, device='cuda:0')
episode: 446 training return: tensor(414.2001, device='cuda:0')
episode: 447 training return: tensor(417.5107, device='cuda:0')
epoch: 112 test_true_pfm: 4009.137730112225 sim_pfm: 366.7146264361897
episode: 448 training return: tensor(345.2440, device='cuda:0')
episode: 449 training return: tensor(254.6806, device='cuda:0')
episode: 450 training return: tensor(361.5497, device='cuda:0')
episode: 451 training return: tensor(406.2877, device='cuda:0')
epoch: 113 test_true_pfm: 3964.9451998389864 sim_pfm: 338.17848281839787
episode: 452 training return: tensor(268.8900, device='cuda:0')
episode: 453 training return: tensor(400.0266, device='cuda:0')
episode: 454 training return: tensor(344.9682, device='cuda:0')
episode: 455 training return: tensor(438.1982, device='cuda:0')
epoch: 114 test_true_pfm: 4089.6676764938106 sim_pfm: 339.0725650236903
episode: 456 training return: tensor(445.8255, device='cuda:0')
episode: 457 training return: tensor(258.3234, device='cuda:0')
episode: 458 training return: tensor(259.7234, device='cuda:0')
episode: 459 training return: tensor(251.2737, device='cuda:0')
epoch: 115 test_true_pfm: 4045.287154818316 sim_pfm: 404.37340000787907
episode: 460 training return: tensor(417.7814, device='cuda:0')
episode: 461 training return: tensor(327.3289, device='cuda:0')
episode: 462 training return: tensor(228.4723, device='cuda:0')
episode: 463 training return: tensor(251.2694, device='cuda:0')
epoch: 116 test_true_pfm: 3931.904552133907 sim_pfm: 362.2306534073335
episode: 464 training return: tensor(372.7336, device='cuda:0')
episode: 465 training return: tensor(356.0712, device='cuda:0')
episode: 466 training return: tensor(358.6928, device='cuda:0')
episode: 467 training return: tensor(391.1223, device='cuda:0')
epoch: 117 test_true_pfm: 3834.4698831394776 sim_pfm: 241.94716880024257
episode: 468 training return: tensor(378.2277, device='cuda:0')
episode: 469 training return: tensor(417.3562, device='cuda:0')
episode: 470 training return: tensor(451.0485, device='cuda:0')
episode: 471 training return: tensor(440.7981, device='cuda:0')
epoch: 118 test_true_pfm: 4038.3300028474428 sim_pfm: 405.9920774726973
episode: 472 training return: tensor(419.2823, device='cuda:0')
episode: 473 training return: tensor(220.9352, device='cuda:0')
episode: 474 training return: tensor(319.7202, device='cuda:0')
episode: 475 training return: tensor(272.6540, device='cuda:0')
epoch: 119 test_true_pfm: 4067.2736107134388 sim_pfm: 356.76178535753087
episode: 476 training return: tensor(421.4485, device='cuda:0')
episode: 477 training return: tensor(355.5928, device='cuda:0')
episode: 478 training return: tensor(136.3625, device='cuda:0')
episode: 479 training return: tensor(267.8598, device='cuda:0')
epoch: 120 test_true_pfm: 4096.29678644031 sim_pfm: 341.6168596225519
episode: 480 training return: tensor(236.1597, device='cuda:0')
episode: 481 training return: tensor(459.8813, device='cuda:0')
episode: 482 training return: tensor(346.8380, device='cuda:0')
episode: 483 training return: tensor(284.5294, device='cuda:0')
epoch: 121 test_true_pfm: 3966.748606964504 sim_pfm: 315.7208878969056
episode: 484 training return: tensor(284.0155, device='cuda:0')
episode: 485 training return: tensor(433.2625, device='cuda:0')
episode: 486 training return: tensor(265.5894, device='cuda:0')
episode: 487 training return: tensor(365.6584, device='cuda:0')
epoch: 122 test_true_pfm: 584.7541079443731 sim_pfm: -767.6085979600903
episode: 488 training return: tensor(292.8304, device='cuda:0')
episode: 489 training return: tensor(292.4715, device='cuda:0')
episode: 490 training return: tensor(278.9657, device='cuda:0')
episode: 491 training return: tensor(187.3954, device='cuda:0')
epoch: 123 test_true_pfm: 3951.141570610895 sim_pfm: 373.37349192862166
episode: 492 training return: tensor(332.5945, device='cuda:0')
episode: 493 training return: tensor(291.4838, device='cuda:0')
episode: 494 training return: tensor(-739.7863, device='cuda:0')
episode: 495 training return: tensor(287.3085, device='cuda:0')
epoch: 124 test_true_pfm: 2996.312411856687 sim_pfm: -356.7796688735252
episode: 496 training return: tensor(343.5994, device='cuda:0')
episode: 497 training return: tensor(354.9513, device='cuda:0')
episode: 498 training return: tensor(299.3477, device='cuda:0')
episode: 499 training return: tensor(358.7547, device='cuda:0')
epoch: 125 test_true_pfm: 4095.4503932898538 sim_pfm: 322.1847432958505
episode: 500 training return: tensor(377.4531, device='cuda:0')
episode: 501 training return: tensor(283.7437, device='cuda:0')
episode: 502 training return: tensor(163.1994, device='cuda:0')
episode: 503 training return: tensor(352.9129, device='cuda:0')
epoch: 126 test_true_pfm: 3997.848586824032 sim_pfm: 357.5778359627972
episode: 504 training return: tensor(198.4233, device='cuda:0')
episode: 505 training return: tensor(188.9547, device='cuda:0')
episode: 506 training return: tensor(331.5285, device='cuda:0')
episode: 507 training return: tensor(390.0206, device='cuda:0')
epoch: 127 test_true_pfm: 4006.5051732664087 sim_pfm: 417.7633546704504
episode: 508 training return: tensor(271.4884, device='cuda:0')
episode: 509 training return: tensor(-834.0771, device='cuda:0')
episode: 510 training return: tensor(370.2487, device='cuda:0')
episode: 511 training return: tensor(249.5864, device='cuda:0')
epoch: 128 test_true_pfm: 3957.4302879964357 sim_pfm: 283.31889206380583
episode: 512 training return: tensor(309.5118, device='cuda:0')
episode: 513 training return: tensor(381.4491, device='cuda:0')
episode: 514 training return: tensor(218.1860, device='cuda:0')
episode: 515 training return: tensor(396.1399, device='cuda:0')
epoch: 129 test_true_pfm: 3980.4857795228386 sim_pfm: 337.6761165811719
episode: 516 training return: tensor(271.4203, device='cuda:0')
episode: 517 training return: tensor(182.3184, device='cuda:0')
episode: 518 training return: tensor(324.3651, device='cuda:0')
episode: 519 training return: tensor(411.6409, device='cuda:0')
epoch: 130 test_true_pfm: 3904.9686909250863 sim_pfm: 257.4763140467597
episode: 520 training return: tensor(411.0024, device='cuda:0')
episode: 521 training return: tensor(377.4887, device='cuda:0')
episode: 522 training return: tensor(327.5775, device='cuda:0')
episode: 523 training return: tensor(365.5164, device='cuda:0')
epoch: 131 test_true_pfm: 3976.8372421307736 sim_pfm: 349.54110044269083
episode: 524 training return: tensor(271.1895, device='cuda:0')
episode: 525 training return: tensor(174.6265, device='cuda:0')
episode: 526 training return: tensor(-829.6569, device='cuda:0')
episode: 527 training return: tensor(260.5179, device='cuda:0')
epoch: 132 test_true_pfm: 2672.966371242746 sim_pfm: 375.1393838691777
episode: 528 training return: tensor(389.6887, device='cuda:0')
episode: 529 training return: tensor(163.0674, device='cuda:0')
episode: 530 training return: tensor(498.9632, device='cuda:0')
episode: 531 training return: tensor(119.8470, device='cuda:0')
epoch: 133 test_true_pfm: 3817.583260307763 sim_pfm: 316.22997657739324
episode: 532 training return: tensor(347.3099, device='cuda:0')
episode: 533 training return: tensor(219.3815, device='cuda:0')
episode: 534 training return: tensor(287.2597, device='cuda:0')
episode: 535 training return: tensor(275.5389, device='cuda:0')
epoch: 134 test_true_pfm: 4052.3432330034134 sim_pfm: 385.32513238491566
episode: 536 training return: tensor(-849.8566, device='cuda:0')
episode: 537 training return: tensor(342.1147, device='cuda:0')
episode: 538 training return: tensor(324.5607, device='cuda:0')
episode: 539 training return: tensor(364.7971, device='cuda:0')
epoch: 135 test_true_pfm: 3859.9974022414212 sim_pfm: 348.54500526437187
episode: 540 training return: tensor(354.1598, device='cuda:0')
episode: 541 training return: tensor(356.9788, device='cuda:0')
episode: 542 training return: tensor(347.8837, device='cuda:0')
episode: 543 training return: tensor(279.0871, device='cuda:0')
epoch: 136 test_true_pfm: 3962.036530318616 sim_pfm: 384.823554629945
episode: 544 training return: tensor(333.1340, device='cuda:0')
episode: 545 training return: tensor(221.8185, device='cuda:0')
episode: 546 training return: tensor(211.5495, device='cuda:0')
episode: 547 training return: tensor(354.3802, device='cuda:0')
epoch: 137 test_true_pfm: 3825.178857052442 sim_pfm: 251.05462339590304
episode: 548 training return: tensor(336.8347, device='cuda:0')
episode: 549 training return: tensor(354.1318, device='cuda:0')
episode: 550 training return: tensor(240.9200, device='cuda:0')
episode: 551 training return: tensor(390.1519, device='cuda:0')
epoch: 138 test_true_pfm: 3983.388858183958 sim_pfm: 355.72948633226525
episode: 552 training return: tensor(258.5299, device='cuda:0')
episode: 553 training return: tensor(312.8298, device='cuda:0')
episode: 554 training return: tensor(89.6998, device='cuda:0')
episode: 555 training return: tensor(278.6075, device='cuda:0')
epoch: 139 test_true_pfm: 3998.6978323188305 sim_pfm: 349.41752167309943
episode: 556 training return: tensor(362.0941, device='cuda:0')
episode: 557 training return: tensor(196.8467, device='cuda:0')
episode: 558 training return: tensor(383.0556, device='cuda:0')
episode: 559 training return: tensor(381.6819, device='cuda:0')
epoch: 140 test_true_pfm: 4012.2035617712195 sim_pfm: 373.7963190478428
episode: 560 training return: tensor(413.0808, device='cuda:0')
episode: 561 training return: tensor(301.9946, device='cuda:0')
episode: 562 training return: tensor(309.6420, device='cuda:0')
episode: 563 training return: tensor(429.3209, device='cuda:0')
epoch: 141 test_true_pfm: 3962.6631475071463 sim_pfm: 378.35582185158273
episode: 564 training return: tensor(373.6874, device='cuda:0')
episode: 565 training return: tensor(368.5330, device='cuda:0')
episode: 566 training return: tensor(330.6326, device='cuda:0')
episode: 567 training return: tensor(380.2262, device='cuda:0')
epoch: 142 test_true_pfm: 4011.367597900136 sim_pfm: 403.77904949116055
episode: 568 training return: tensor(366.2180, device='cuda:0')
episode: 569 training return: tensor(229.9261, device='cuda:0')
episode: 570 training return: tensor(332.0703, device='cuda:0')
episode: 571 training return: tensor(288.2313, device='cuda:0')
epoch: 143 test_true_pfm: 1730.0440189973144 sim_pfm: 24.05646168172825
episode: 572 training return: tensor(275.5520, device='cuda:0')
episode: 573 training return: tensor(416.8391, device='cuda:0')
episode: 574 training return: tensor(382.2389, device='cuda:0')
episode: 575 training return: tensor(425.3700, device='cuda:0')
epoch: 144 test_true_pfm: 3926.1831636794645 sim_pfm: 394.07573270592064
episode: 576 training return: tensor(424.3026, device='cuda:0')
episode: 577 training return: tensor(315.1264, device='cuda:0')
episode: 578 training return: tensor(453.7373, device='cuda:0')
episode: 579 training return: tensor(382.0345, device='cuda:0')
epoch: 145 test_true_pfm: 3955.308148584592 sim_pfm: 403.51003298124607
episode: 580 training return: tensor(351.8647, device='cuda:0')
episode: 581 training return: tensor(321.3564, device='cuda:0')
episode: 582 training return: tensor(398.1594, device='cuda:0')
episode: 583 training return: tensor(301.5614, device='cuda:0')
epoch: 146 test_true_pfm: 4038.842604805868 sim_pfm: 366.67228292429354
episode: 584 training return: tensor(295.6978, device='cuda:0')
episode: 585 training return: tensor(295.7102, device='cuda:0')
episode: 586 training return: tensor(319.6419, device='cuda:0')
episode: 587 training return: tensor(425.7285, device='cuda:0')
epoch: 147 test_true_pfm: 3960.496788279221 sim_pfm: 413.2753657882956
episode: 588 training return: tensor(355.3269, device='cuda:0')
episode: 589 training return: tensor(337.8676, device='cuda:0')
episode: 590 training return: tensor(379.2675, device='cuda:0')
episode: 591 training return: tensor(339.1046, device='cuda:0')
epoch: 148 test_true_pfm: 3942.610622069349 sim_pfm: 344.8691244212969
episode: 592 training return: tensor(309.0570, device='cuda:0')
episode: 593 training return: tensor(370.7344, device='cuda:0')
episode: 594 training return: tensor(361.2265, device='cuda:0')
episode: 595 training return: tensor(360.4396, device='cuda:0')
epoch: 149 test_true_pfm: 4013.603120435188 sim_pfm: 410.47143773839343
episode: 596 training return: tensor(411.0336, device='cuda:0')
episode: 597 training return: tensor(466.4893, device='cuda:0')
episode: 598 training return: tensor(424.9180, device='cuda:0')
episode: 599 training return: tensor(379.0714, device='cuda:0')
epoch: 150 test_true_pfm: 4006.380145059224 sim_pfm: 248.31195586617105
