['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '4', '--data', '30000']
epoch: 0 training_loss 0.237241620272398 test_loss: 0.21595439910888672
epoch: 1 training_loss 0.19951894484460353 test_loss: 0.20125465393066405
epoch: 2 training_loss 0.20066890709102153 test_loss: 0.2039850950241089
epoch: 3 training_loss 0.19748651050031185 test_loss: 0.18961495161056519
epoch: 4 training_loss 0.19499108508229257 test_loss: 0.17748327255249025
epoch: 5 training_loss 0.19241891980171202 test_loss: 0.1823416233062744
epoch: 6 training_loss 0.1920819231122732 test_loss: 0.17376420497894288
epoch: 7 training_loss 0.19166871167719365 test_loss: 0.19318677186965943
epoch: 8 training_loss 0.17664149433374404 test_loss: 0.18901156187057494
epoch: 9 training_loss 0.18924415729939936 test_loss: 0.18299572467803954
epoch: 10 training_loss 0.18849668137729167 test_loss: 0.18054287433624266
epoch: 11 training_loss 0.19368491396307946 test_loss: 0.18824011087417603
epoch: 12 training_loss 0.18261355228722095 test_loss: 0.18024402856826782
epoch: 13 training_loss 0.1845921950787306 test_loss: 0.17619378566741944
epoch: 14 training_loss 0.18585941188037394 test_loss: 0.17224262952804564
epoch: 15 training_loss 0.1735509517043829 test_loss: 0.19800095558166503
epoch: 16 training_loss 0.1740288570523262 test_loss: 0.18062700033187867
epoch: 17 training_loss 0.18972475349903106 test_loss: 0.17760756015777587
epoch: 18 training_loss 0.17536923229694368 test_loss: 0.19737988710403442
epoch: 19 training_loss 0.180805524289608 test_loss: 0.16817523241043092
epoch: 20 training_loss 0.18914667420089246 test_loss: 0.17028820514678955
epoch: 21 training_loss 0.1882957600802183 test_loss: 0.18318530321121215
epoch: 22 training_loss 0.18098159857094287 test_loss: 0.18551465272903442
epoch: 23 training_loss 0.18549295172095298 test_loss: 0.16920759677886962
epoch: 24 training_loss 0.18566273108124734 test_loss: 0.18178467750549315
epoch: 25 training_loss 0.18090701833367348 test_loss: 0.17705105543136596
epoch: 26 training_loss 0.1738132644444704 test_loss: 0.18040602207183837
epoch: 27 training_loss 0.1775873750448227 test_loss: 0.16625117063522338
epoch: 28 training_loss 0.18147239550948144 test_loss: 0.16855610609054567
epoch: 29 training_loss 0.1873466520756483 test_loss: 0.18046501874923707
epoch: 30 training_loss 0.1783382596820593 test_loss: 0.17347131967544555
epoch: 31 training_loss 0.18236660540103913 test_loss: 0.19086434841156005
epoch: 32 training_loss 0.18600425645709037 test_loss: 0.17364927530288696
epoch: 33 training_loss 0.1807622716575861 test_loss: 0.16890835762023926
epoch: 34 training_loss 0.18437675818800925 test_loss: 0.18728762865066528
epoch: 35 training_loss 0.18109260112047196 test_loss: 0.16738225221633912
epoch: 36 training_loss 0.1799090924859047 test_loss: 0.19208749532699584
epoch: 37 training_loss 0.1821987023949623 test_loss: 0.1762021780014038
epoch: 38 training_loss 0.17318956583738326 test_loss: 0.17838134765625
epoch: 39 training_loss 0.17924073874950408 test_loss: 0.1825260877609253
epoch: 40 training_loss 0.17703790210187434 test_loss: 0.18988230228424072
epoch: 41 training_loss 0.17965555489063262 test_loss: 0.19125345945358277
epoch: 42 training_loss 0.1774984996765852 test_loss: 0.17396783828735352
epoch: 43 training_loss 0.17724148526787759 test_loss: 0.17479130029678344
epoch: 44 training_loss 0.1820144610852003 test_loss: 0.18407292366027833
epoch: 45 training_loss 0.17816801972687243 test_loss: 0.16296875476837158
epoch: 46 training_loss 0.18254814367741345 test_loss: 0.18552603721618652
epoch: 47 training_loss 0.1701447144150734 test_loss: 0.1798700213432312
epoch: 48 training_loss 0.18514309145510197 test_loss: 0.1751653790473938
epoch: 49 training_loss 0.18170687183737755 test_loss: 0.17823032140731812
epoch: 50 training_loss 0.17615219920873643 test_loss: 0.16650351285934448
epoch: 51 training_loss 0.17829649671912193 test_loss: 0.16304898262023926
epoch: 52 training_loss 0.17781176425516607 test_loss: 0.16876071691513062
epoch: 53 training_loss 0.1777754870057106 test_loss: 0.17142300605773925
epoch: 54 training_loss 0.17711645640432835 test_loss: 0.17609494924545288
epoch: 55 training_loss 0.17727841943502426 test_loss: 0.1877621293067932
epoch: 56 training_loss 0.1799947053939104 test_loss: 0.17506638765335084
epoch: 57 training_loss 0.1864731579273939 test_loss: 0.19017839431762695
epoch: 58 training_loss 0.17886302199214696 test_loss: 0.17010121345520018
epoch: 59 training_loss 0.18404626786708833 test_loss: 0.17938153743743895
epoch: 60 training_loss 0.18378489792346955 test_loss: 0.181515896320343
epoch: 61 training_loss 0.17645625568926335 test_loss: 0.1823704481124878
epoch: 62 training_loss 0.18170476742088795 test_loss: 0.18005883693695068
epoch: 63 training_loss 0.18002947695553304 test_loss: 0.18895530700683594
epoch: 64 training_loss 0.1860665152221918 test_loss: 0.16478687524795532
epoch: 65 training_loss 0.18887358985841274 test_loss: 0.16394222974777223
epoch: 66 training_loss 0.17603895522654056 test_loss: 0.18945573568344115
epoch: 67 training_loss 0.17975328281521796 test_loss: 0.16738157272338866
epoch: 68 training_loss 0.17910801827907563 test_loss: 0.1700378656387329
epoch: 69 training_loss 0.17141931392252446 test_loss: 0.18898234367370606
epoch: 70 training_loss 0.167707464620471 test_loss: 0.18945003747940065
epoch: 71 training_loss 0.18370748318731786 test_loss: 0.1856036067008972
epoch: 72 training_loss 0.17824348993599415 test_loss: 0.1721442699432373
epoch: 73 training_loss 0.17448909692466258 test_loss: 0.1715104103088379
epoch: 74 training_loss 0.17984927624464034 test_loss: 0.16675511598587037
epoch: 75 training_loss 0.176075282394886 test_loss: 0.1920090436935425
epoch: 76 training_loss 0.17674832351505756 test_loss: 0.17858103513717652
epoch: 77 training_loss 0.17483624830842018 test_loss: 0.1661431074142456
epoch: 78 training_loss 0.17939791589975357 test_loss: 0.1630123734474182
epoch: 79 training_loss 0.18048940122127533 test_loss: 0.1871604084968567
epoch: 80 training_loss 0.17749597035348416 test_loss: 0.15225809812545776
epoch: 81 training_loss 0.1831501829624176 test_loss: 0.16263967752456665
epoch: 82 training_loss 0.18047706812620162 test_loss: 0.18483647108078002
epoch: 83 training_loss 0.1695219173282385 test_loss: 0.16808011531829833
epoch: 84 training_loss 0.18141339987516403 test_loss: 0.17343930006027222
epoch: 85 training_loss 0.16960611037909984 test_loss: 0.1747766137123108
epoch: 86 training_loss 0.18165980875492097 test_loss: 0.1564418077468872
epoch: 87 training_loss 0.18497571848332883 test_loss: 0.16644591093063354
epoch: 88 training_loss 0.17617541685700416 test_loss: 0.16974934339523315
epoch: 89 training_loss 0.17653155848383903 test_loss: 0.16147127151489257
epoch: 90 training_loss 0.176947984918952 test_loss: 0.17667338848114014
epoch: 91 training_loss 0.17301328107714653 test_loss: 0.1697592854499817
epoch: 92 training_loss 0.1756334000825882 test_loss: 0.17806953191757202
epoch: 93 training_loss 0.17951140075922012 test_loss: 0.1866573691368103
epoch: 94 training_loss 0.1756042993813753 test_loss: 0.17690304517745972
epoch: 95 training_loss 0.1813659880310297 test_loss: 0.17256358861923218
epoch: 96 training_loss 0.1834597823023796 test_loss: 0.18849002122879027
epoch: 97 training_loss 0.18688464112579822 test_loss: 0.17108802795410155
epoch: 98 training_loss 0.17124763414263725 test_loss: 0.16239919662475585
epoch: 99 training_loss 0.1802734335884452 test_loss: 0.16273030042648315
epoch: 100 training_loss 0.1794400654733181 test_loss: 0.16037517786026
epoch: 101 training_loss 0.17254998832941054 test_loss: 0.1642765760421753
epoch: 102 training_loss 0.1790701825171709 test_loss: 0.17188433408737183
epoch: 103 training_loss 0.17771599918603898 test_loss: 0.1597437858581543
epoch: 104 training_loss 0.1838546358793974 test_loss: 0.17850526571273803
epoch: 105 training_loss 0.17457993932068347 test_loss: 0.1812907099723816
epoch: 106 training_loss 0.180364945307374 test_loss: 0.17207435369491578
epoch: 107 training_loss 0.18043219588696957 test_loss: 0.17479774951934815
epoch: 108 training_loss 0.17881179228425026 test_loss: 0.18401103019714354
epoch: 109 training_loss 0.17071871381253004 test_loss: 0.17358598709106446
epoch: 110 training_loss 0.1753318754583597 test_loss: 0.17012015581130982
epoch: 111 training_loss 0.18182772725820542 test_loss: 0.16996665000915528
epoch: 112 training_loss 0.18273814767599106 test_loss: 0.16817357540130615
epoch: 113 training_loss 0.181304262727499 test_loss: 0.16606990098953248
epoch: 114 training_loss 0.17703415781259538 test_loss: 0.17855477333068848
epoch: 115 training_loss 0.17654913187026977 test_loss: 0.1731106162071228
epoch: 116 training_loss 0.17747953698039054 test_loss: 0.18520350456237794
epoch: 117 training_loss 0.1866917596757412 test_loss: 0.1715061068534851
epoch: 118 training_loss 0.1734691084548831 test_loss: 0.18903907537460327
epoch: 119 training_loss 0.18167312264442445 test_loss: 0.17057846784591674
epoch: 120 training_loss 0.16927099522203207 test_loss: 0.18264139890670777
epoch: 121 training_loss 0.17792966820299624 test_loss: 0.17099521160125733
epoch: 122 training_loss 0.17359017960727216 test_loss: 0.18630410432815553
epoch: 123 training_loss 0.17766102142632006 test_loss: 0.18548059463500977
epoch: 124 training_loss 0.16717568382620812 test_loss: 0.16438472270965576
epoch: 125 training_loss 0.17882230922579764 test_loss: 0.17469356060028077
epoch: 126 training_loss 0.17155601039528848 test_loss: 0.17930929660797118
epoch: 127 training_loss 0.1808431427925825 test_loss: 0.16902488470077515
epoch: 128 training_loss 0.18222520634531975 test_loss: 0.17097307443618776
epoch: 129 training_loss 0.17915385589003563 test_loss: 0.1722835898399353
epoch: 130 training_loss 0.17746560737490655 test_loss: 0.17242162227630614
epoch: 131 training_loss 0.17952805563807486 test_loss: 0.16651449203491211
epoch: 132 training_loss 0.1791955304890871 test_loss: 0.18822025060653685
epoch: 133 training_loss 0.16845319129526615 test_loss: 0.17175806760787965
epoch: 134 training_loss 0.16973694495856761 test_loss: 0.16895596981048583
epoch: 135 training_loss 0.17402768410742284 test_loss: 0.17165290117263793
epoch: 136 training_loss 0.17106374584138392 test_loss: 0.1783847212791443
epoch: 137 training_loss 0.18105791680514813 test_loss: 0.16507177352905272
epoch: 138 training_loss 0.17168184749782087 test_loss: 0.170828914642334
epoch: 139 training_loss 0.17373970732092858 test_loss: 0.16256723403930665
epoch: 140 training_loss 0.1751213052123785 test_loss: 0.18389852046966554
epoch: 141 training_loss 0.17354241847991944 test_loss: 0.1646593451499939
epoch: 142 training_loss 0.1723599746078253 test_loss: 0.18161102533340454
epoch: 143 training_loss 0.17459299273788928 test_loss: 0.1762125849723816
epoch: 144 training_loss 0.16402187794446946 test_loss: 0.17735869884490968
epoch: 145 training_loss 0.1746734355390072 test_loss: 0.1750168800354004
epoch: 146 training_loss 0.17232677765190602 test_loss: 0.180217969417572
epoch: 147 training_loss 0.17358139369636774 test_loss: 0.18656636476516725
epoch: 148 training_loss 0.1741047827899456 test_loss: 0.17069363594055176
epoch: 149 training_loss 0.17129908487200737 test_loss: 0.16773717403411864
epoch: 0 training_loss 7.9883406400680546 test_loss: 5.089273071289062
epoch: 1 training_loss 3.942942340373993 test_loss: 3.322915267944336
epoch: 2 training_loss 2.8396743869781496 test_loss: 2.4906797409057617
epoch: 3 training_loss 2.2894551479816436 test_loss: 2.102774238586426
epoch: 4 training_loss 1.9483944416046142 test_loss: 1.8156204223632812
epoch: 5 training_loss 1.754065259695053 test_loss: 1.6240100860595703
epoch: 6 training_loss 1.5908280789852143 test_loss: 1.5241288185119628
epoch: 7 training_loss 1.4889119505882262 test_loss: 1.4361858367919922
epoch: 8 training_loss 1.3900484621524811 test_loss: 1.4458574295043944
epoch: 9 training_loss 1.3387670564651488 test_loss: 1.3532341957092284
epoch: 10 training_loss 1.2621665859222413 test_loss: 1.303409957885742
epoch: 11 training_loss 1.2015576791763305 test_loss: 1.152705192565918
epoch: 12 training_loss 1.1489771670103073 test_loss: 1.1635678291320801
epoch: 13 training_loss 1.1309343761205672 test_loss: 1.1363758087158202
epoch: 14 training_loss 1.078582963347435 test_loss: 1.0830673217773437
epoch: 15 training_loss 1.0435540109872818 test_loss: 1.0817926406860352
epoch: 16 training_loss 1.0220840185880662 test_loss: 1.026677417755127
epoch: 17 training_loss 1.0018403202295303 test_loss: 1.0016679763793945
epoch: 18 training_loss 0.9590855181217194 test_loss: 0.9677695274353028
epoch: 19 training_loss 0.9618096041679383 test_loss: 0.9517305374145508
epoch: 20 training_loss 0.9298012006282806 test_loss: 0.9382074356079102
epoch: 21 training_loss 0.9347151052951813 test_loss: 0.9004404067993164
epoch: 22 training_loss 0.9276176816225052 test_loss: 0.8876706123352051
epoch: 23 training_loss 0.8812559133768082 test_loss: 0.8687650680541992
epoch: 24 training_loss 0.8722616666555405 test_loss: 0.8995107650756836
epoch: 25 training_loss 0.8566070407629013 test_loss: 0.8798370361328125
epoch: 26 training_loss 0.8432465207576751 test_loss: 0.8423810005187988
epoch: 27 training_loss 0.8449244034290314 test_loss: 0.8253783226013184
epoch: 28 training_loss 0.8314445549249649 test_loss: 0.8247726440429688
epoch: 29 training_loss 0.798398454785347 test_loss: 0.7886434078216553
epoch: 30 training_loss 0.7840897870063782 test_loss: 0.8041099548339844
epoch: 31 training_loss 0.7903739410638809 test_loss: 0.8148600578308105
epoch: 32 training_loss 0.7785435003042221 test_loss: 0.7692063331604004
epoch: 33 training_loss 0.7715261524915695 test_loss: 0.7711139678955078
epoch: 34 training_loss 0.7472029513120652 test_loss: 0.782551383972168
epoch: 35 training_loss 0.7583523547649383 test_loss: 0.7604018211364746
epoch: 36 training_loss 0.7458400696516037 test_loss: 0.7467848777770996
epoch: 37 training_loss 0.740338152050972 test_loss: 0.757906723022461
epoch: 38 training_loss 0.7372248601913453 test_loss: 0.7580224514007569
epoch: 39 training_loss 0.7327724063396454 test_loss: 0.7260436058044434
epoch: 40 training_loss 0.7268856304883957 test_loss: 0.7314497947692871
epoch: 41 training_loss 0.7338617414236068 test_loss: 0.6950171947479248
epoch: 42 training_loss 0.7124123847484589 test_loss: 0.7323519706726074
epoch: 43 training_loss 0.7060991394519806 test_loss: 0.7079563140869141
epoch: 44 training_loss 0.6989334040880203 test_loss: 0.6952355861663818
epoch: 45 training_loss 0.7001545971632004 test_loss: 0.7216249465942383
epoch: 46 training_loss 0.6833829808235169 test_loss: 0.6727821826934814
epoch: 47 training_loss 0.6826688259840011 test_loss: 0.7031366348266601
epoch: 48 training_loss 0.6797756659984588 test_loss: 0.6722657203674316
epoch: 49 training_loss 0.6676918941736222 test_loss: 0.7162869453430176
epoch: 50 training_loss 0.6660692101716995 test_loss: 0.6807862758636475
epoch: 51 training_loss 0.6641870939731598 test_loss: 0.6688501358032226
epoch: 52 training_loss 0.6604948276281357 test_loss: 0.6645825862884521
epoch: 53 training_loss 0.6717665076255799 test_loss: 0.6892754077911377
epoch: 54 training_loss 0.6645229077339172 test_loss: 0.6603995323181152
epoch: 55 training_loss 0.6532305067777634 test_loss: 0.6571091175079345
epoch: 56 training_loss 0.6450138324499131 test_loss: 0.6588071346282959
epoch: 57 training_loss 0.6434074568748475 test_loss: 0.6395199775695801
epoch: 58 training_loss 0.6344720143079757 test_loss: 0.6365833282470703
epoch: 59 training_loss 0.6379047161340714 test_loss: 0.645102596282959
epoch: 60 training_loss 0.632011485695839 test_loss: 0.6537134170532226
epoch: 61 training_loss 0.6372544759511948 test_loss: 0.6378902435302735
epoch: 62 training_loss 0.6225194996595382 test_loss: 0.659425687789917
epoch: 63 training_loss 0.6229633295536041 test_loss: 0.6145167350769043
epoch: 64 training_loss 0.6214042609930038 test_loss: 0.6338933944702149
epoch: 65 training_loss 0.6253711605072021 test_loss: 0.628547477722168
epoch: 66 training_loss 0.6157881510257721 test_loss: 0.6146433353424072
epoch: 67 training_loss 0.6198088282346725 test_loss: 0.6517283916473389
epoch: 68 training_loss 0.6128988891839982 test_loss: 0.6162878513336182
epoch: 69 training_loss 0.6036955553293228 test_loss: 0.6037110328674317
epoch: 70 training_loss 0.6101883631944657 test_loss: 0.594481372833252
epoch: 71 training_loss 0.6108574593067169 test_loss: 0.6345779895782471
epoch: 72 training_loss 0.6141035908460617 test_loss: 0.6240466594696045
epoch: 73 training_loss 0.6035210019350052 test_loss: 0.5995388031005859
epoch: 74 training_loss 0.5870622622966767 test_loss: 0.611696720123291
epoch: 75 training_loss 0.6248625439405441 test_loss: 0.602426815032959
epoch: 76 training_loss 0.5853271096944809 test_loss: 0.5926392078399658
epoch: 77 training_loss 0.5921952033042908 test_loss: 0.5839146137237549
epoch: 78 training_loss 0.5924398601055145 test_loss: 0.6329504966735839
epoch: 79 training_loss 0.5893844276666641 test_loss: 0.5947409629821777
epoch: 80 training_loss 0.6004235762357711 test_loss: 0.6063086509704589
epoch: 81 training_loss 0.5820082437992096 test_loss: 0.5820430755615235
epoch: 82 training_loss 0.5884387189149857 test_loss: 0.6022334575653077
epoch: 83 training_loss 0.5824921971559525 test_loss: 0.5968676567077636
epoch: 84 training_loss 0.582728943824768 test_loss: 0.5753772258758545
epoch: 85 training_loss 0.5731768900156021 test_loss: 0.6076177597045899
epoch: 86 training_loss 0.5790264397859574 test_loss: 0.5810805320739746
epoch: 87 training_loss 0.5771803230047226 test_loss: 0.5608830928802491
epoch: 88 training_loss 0.5709363180398941 test_loss: 0.568522596359253
epoch: 89 training_loss 0.578154765367508 test_loss: 0.5509552478790283
epoch: 90 training_loss 0.5656175589561463 test_loss: 0.5793214797973633
epoch: 91 training_loss 0.5723173356056214 test_loss: 0.5707494735717773
epoch: 92 training_loss 0.5632548555731773 test_loss: 0.5564056873321533
epoch: 93 training_loss 0.5680858865380287 test_loss: 0.573175048828125
epoch: 94 training_loss 0.5515732276439667 test_loss: 0.5562263011932373
epoch: 95 training_loss 0.5617859509587287 test_loss: 0.5665807723999023
epoch: 96 training_loss 0.5642715692520142 test_loss: 0.5670502662658692
epoch: 97 training_loss 0.5667757457494735 test_loss: 0.5459495544433594
epoch: 98 training_loss 0.5814048933982849 test_loss: 0.584510326385498
epoch: 99 training_loss 0.5670997536182404 test_loss: 0.5479970455169678
epoch: 100 training_loss 0.5503189960122108 test_loss: 0.5562784194946289
epoch: 101 training_loss 0.568866523206234 test_loss: 0.6211309432983398
epoch: 102 training_loss 0.5538486546278 test_loss: 0.5660029888153076
epoch: 103 training_loss 0.5569087675213814 test_loss: 0.5414648532867432
epoch: 104 training_loss 0.5703440845012665 test_loss: 0.5744986534118652
epoch: 105 training_loss 0.5458829537034035 test_loss: 0.5561639308929444
epoch: 106 training_loss 0.5473694092035294 test_loss: 0.5372246265411377
epoch: 107 training_loss 0.5510929489135742 test_loss: 0.5502076148986816
epoch: 108 training_loss 0.5435230737924576 test_loss: 0.5586550235748291
epoch: 109 training_loss 0.5525934833288193 test_loss: 0.5399235248565674
epoch: 110 training_loss 0.5459699946641922 test_loss: 0.5365365505218506
epoch: 111 training_loss 0.5367659804224968 test_loss: 0.5530742168426513
epoch: 112 training_loss 0.5453254878520966 test_loss: 0.5403122901916504
epoch: 113 training_loss 0.5302448809146881 test_loss: 0.555417823791504
epoch: 114 training_loss 0.538412806391716 test_loss: 0.5737821102142334
epoch: 115 training_loss 0.5418816983699799 test_loss: 0.545716667175293
epoch: 116 training_loss 0.5397937044501304 test_loss: 0.554867935180664
epoch: 117 training_loss 0.542911893427372 test_loss: 0.5437232971191406
epoch: 118 training_loss 0.542537747323513 test_loss: 0.6021613597869873
epoch: 119 training_loss 0.5392455878853798 test_loss: 0.5579326629638672
epoch: 120 training_loss 0.5321758040785789 test_loss: 0.5628720283508301
epoch: 121 training_loss 0.5434077998995781 test_loss: 0.5700396537780762
epoch: 122 training_loss 0.5317537316679954 test_loss: 0.5385828495025635
epoch: 123 training_loss 0.5388515359163284 test_loss: 0.5431861400604248
epoch: 124 training_loss 0.5334066557884216 test_loss: 0.5212310791015625
epoch: 125 training_loss 0.5357738965749741 test_loss: 0.5218456268310547
epoch: 126 training_loss 0.5251793071627617 test_loss: 0.5305351257324219
epoch: 127 training_loss 0.5288322800397873 test_loss: 0.5409749984741211
epoch: 128 training_loss 0.53601378262043 test_loss: 0.525178050994873
epoch: 129 training_loss 0.5299960780143738 test_loss: 0.5249204635620117
epoch: 130 training_loss 0.5414828231930733 test_loss: 0.5361607074737549
epoch: 131 training_loss 0.5121075981855392 test_loss: 0.5163411140441895
epoch: 132 training_loss 0.5173889929056168 test_loss: 0.5160831928253173
epoch: 133 training_loss 0.5234759446978569 test_loss: 0.52037353515625
epoch: 134 training_loss 0.5220779317617417 test_loss: 0.5093668937683106
epoch: 135 training_loss 0.5294332778453827 test_loss: 0.5682503700256347
epoch: 136 training_loss 0.523947479724884 test_loss: 0.5210318088531494
epoch: 137 training_loss 0.5310620310902595 test_loss: 0.5818434238433838
epoch: 138 training_loss 0.5193897491693497 test_loss: 0.53353853225708
epoch: 139 training_loss 0.5118848469853401 test_loss: 0.5617849826812744
epoch: 140 training_loss 0.5181281721591949 test_loss: 0.511497688293457
epoch: 141 training_loss 0.5159532925486565 test_loss: 0.5221670150756836
epoch: 142 training_loss 0.5123064044117928 test_loss: 0.513323163986206
epoch: 143 training_loss 0.5168359786272049 test_loss: 0.5200934886932373
epoch: 144 training_loss 0.5139922422170639 test_loss: 0.5419250011444092
epoch: 145 training_loss 0.5166007736325264 test_loss: 0.525673770904541
epoch: 146 training_loss 0.5104730877280236 test_loss: 0.513543176651001
epoch: 147 training_loss 0.5189741095900535 test_loss: 0.5505001068115234
epoch: 148 training_loss 0.5145507189631462 test_loss: 0.5067418098449707
epoch: 149 training_loss 0.5111118364334106 test_loss: 0.5285768985748291
1458.0329783685024
episode: 0 training return: tensor(-192.1377, device='cuda:0')
episode: 1 training return: tensor(-216.9029, device='cuda:0')
episode: 2 training return: tensor(-319.0824, device='cuda:0')
episode: 3 training return: tensor(-238.8743, device='cuda:0')
epoch: 1 test_true_pfm: 1982.1073165514952 sim_pfm: -290.49536029730615
episode: 4 training return: tensor(-179.5414, device='cuda:0')
episode: 5 training return: tensor(-240.8323, device='cuda:0')
episode: 6 training return: tensor(-317.5755, device='cuda:0')
episode: 7 training return: tensor(-320.2561, device='cuda:0')
epoch: 2 test_true_pfm: 1994.252671982874 sim_pfm: -335.80520511091646
episode: 8 training return: tensor(-320.6271, device='cuda:0')
episode: 9 training return: tensor(-287.3005, device='cuda:0')
episode: 10 training return: tensor(-124.3538, device='cuda:0')
episode: 11 training return: tensor(-331.8384, device='cuda:0')
epoch: 3 test_true_pfm: 2224.9261192540957 sim_pfm: -286.3136949626108
episode: 12 training return: tensor(-287.4200, device='cuda:0')
episode: 13 training return: tensor(386.0001, device='cuda:0')
episode: 14 training return: tensor(-209.7612, device='cuda:0')
episode: 15 training return: tensor(-279.2207, device='cuda:0')
epoch: 4 test_true_pfm: 1444.0259692866548 sim_pfm: -265.44815087749157
episode: 16 training return: tensor(57.5937, device='cuda:0')
episode: 17 training return: tensor(-173.8006, device='cuda:0')
episode: 18 training return: tensor(-186.6781, device='cuda:0')
episode: 19 training return: tensor(-230.4077, device='cuda:0')
epoch: 5 test_true_pfm: 1277.4025725398428 sim_pfm: -342.250613631603
episode: 20 training return: tensor(180.5780, device='cuda:0')
episode: 21 training return: tensor(-354.5985, device='cuda:0')
episode: 22 training return: tensor(13.2730, device='cuda:0')
episode: 23 training return: tensor(296.6234, device='cuda:0')
epoch: 6 test_true_pfm: 2715.7915775748224 sim_pfm: -152.27426805785703
episode: 24 training return: tensor(-170.0190, device='cuda:0')
episode: 25 training return: tensor(184.4351, device='cuda:0')
episode: 26 training return: tensor(362.2458, device='cuda:0')
episode: 27 training return: tensor(-230.6797, device='cuda:0')
epoch: 7 test_true_pfm: 1578.0807995040625 sim_pfm: -301.26451370242285
episode: 28 training return: tensor(-376.9936, device='cuda:0')
episode: 29 training return: tensor(-132.1029, device='cuda:0')
episode: 30 training return: tensor(-397.6083, device='cuda:0')
episode: 31 training return: tensor(-349.5559, device='cuda:0')
epoch: 8 test_true_pfm: 1357.8087844348793 sim_pfm: -349.71386908747564
episode: 32 training return: tensor(-372.0046, device='cuda:0')
episode: 33 training return: tensor(207.8392, device='cuda:0')
episode: 34 training return: tensor(-294.7578, device='cuda:0')
episode: 35 training return: tensor(-337.4622, device='cuda:0')
epoch: 9 test_true_pfm: 1437.8375081543115 sim_pfm: -313.921923519345
episode: 36 training return: tensor(-313.5533, device='cuda:0')
episode: 37 training return: tensor(-321.3432, device='cuda:0')
episode: 38 training return: tensor(-161.5077, device='cuda:0')
episode: 39 training return: tensor(-354.2282, device='cuda:0')
epoch: 10 test_true_pfm: 2126.905198369637 sim_pfm: -336.0944928779888
episode: 40 training return: tensor(196.8575, device='cuda:0')
episode: 41 training return: tensor(-307.4434, device='cuda:0')
episode: 42 training return: tensor(-314.4449, device='cuda:0')
episode: 43 training return: tensor(253.7837, device='cuda:0')
epoch: 11 test_true_pfm: 1364.0187655386414 sim_pfm: -344.7055075343621
episode: 44 training return: tensor(-367.2150, device='cuda:0')
episode: 45 training return: tensor(22.7627, device='cuda:0')
episode: 46 training return: tensor(-180.6390, device='cuda:0')
episode: 47 training return: tensor(-357.8175, device='cuda:0')
epoch: 12 test_true_pfm: 1590.5969329941347 sim_pfm: -243.66565622012908
episode: 48 training return: tensor(-222.9757, device='cuda:0')
episode: 49 training return: tensor(-363.4949, device='cuda:0')
episode: 50 training return: tensor(-195.6149, device='cuda:0')
episode: 51 training return: tensor(68.4052, device='cuda:0')
epoch: 13 test_true_pfm: 1394.9919456311516 sim_pfm: -299.9818536155702
episode: 52 training return: tensor(268.8193, device='cuda:0')
episode: 53 training return: tensor(-317.0425, device='cuda:0')
episode: 54 training return: tensor(-327.5614, device='cuda:0')
episode: 55 training return: tensor(400.5402, device='cuda:0')
epoch: 14 test_true_pfm: 1561.5760707093775 sim_pfm: -258.845206186641
episode: 56 training return: tensor(-261.2215, device='cuda:0')
episode: 57 training return: tensor(-310.9291, device='cuda:0')
episode: 58 training return: tensor(-317.2208, device='cuda:0')
episode: 59 training return: tensor(-98.7870, device='cuda:0')
epoch: 15 test_true_pfm: 1629.913132977593 sim_pfm: -234.83315299722986
episode: 60 training return: tensor(101.2236, device='cuda:0')
episode: 61 training return: tensor(-329.0749, device='cuda:0')
episode: 62 training return: tensor(-223.7195, device='cuda:0')
episode: 63 training return: tensor(224.0695, device='cuda:0')
epoch: 16 test_true_pfm: 1541.3760005018094 sim_pfm: -237.92323638165058
episode: 64 training return: tensor(-323.2837, device='cuda:0')
episode: 65 training return: tensor(-85.4871, device='cuda:0')
episode: 66 training return: tensor(211.3007, device='cuda:0')
episode: 67 training return: tensor(-309.6074, device='cuda:0')
epoch: 17 test_true_pfm: 2331.6650317678545 sim_pfm: -233.89600416371832
episode: 68 training return: tensor(-273.8701, device='cuda:0')
episode: 69 training return: tensor(-244.4509, device='cuda:0')
episode: 70 training return: tensor(-309.6396, device='cuda:0')
episode: 71 training return: tensor(-296.1661, device='cuda:0')
epoch: 18 test_true_pfm: 2147.8948756604673 sim_pfm: -135.23417826912677
episode: 72 training return: tensor(-370.2491, device='cuda:0')
episode: 73 training return: tensor(369.1990, device='cuda:0')
episode: 74 training return: tensor(-313.3889, device='cuda:0')
episode: 75 training return: tensor(-287.0208, device='cuda:0')
epoch: 19 test_true_pfm: 2812.5748467781127 sim_pfm: -2.2180894994332143
episode: 76 training return: tensor(-209.7068, device='cuda:0')
episode: 77 training return: tensor(-305.5065, device='cuda:0')
episode: 78 training return: tensor(-252.5235, device='cuda:0')
episode: 79 training return: tensor(-311.9452, device='cuda:0')
epoch: 20 test_true_pfm: 1895.021282615536 sim_pfm: -218.5855364948511
episode: 80 training return: tensor(0.1466, device='cuda:0')
episode: 81 training return: tensor(-138.1279, device='cuda:0')
episode: 82 training return: tensor(-280.8351, device='cuda:0')
episode: 83 training return: tensor(-338.1589, device='cuda:0')
epoch: 21 test_true_pfm: 2442.7733058351314 sim_pfm: -156.96425843048686
episode: 84 training return: tensor(-329.0555, device='cuda:0')
episode: 85 training return: tensor(-305.7376, device='cuda:0')
episode: 86 training return: tensor(-247.0094, device='cuda:0')
episode: 87 training return: tensor(-304.1558, device='cuda:0')
epoch: 22 test_true_pfm: 2240.082902013751 sim_pfm: -188.54477385105565
episode: 88 training return: tensor(-188.4425, device='cuda:0')
episode: 89 training return: tensor(-191.4178, device='cuda:0')
episode: 90 training return: tensor(-312.0507, device='cuda:0')
episode: 91 training return: tensor(43.6848, device='cuda:0')
epoch: 23 test_true_pfm: 1902.9494999593578 sim_pfm: -241.58046868008873
episode: 92 training return: tensor(-377.7021, device='cuda:0')
episode: 93 training return: tensor(-280.3912, device='cuda:0')
episode: 94 training return: tensor(-148.0712, device='cuda:0')
episode: 95 training return: tensor(-323.1629, device='cuda:0')
epoch: 24 test_true_pfm: 1617.1197973202106 sim_pfm: -35.24093933670277
episode: 96 training return: tensor(123.7458, device='cuda:0')
episode: 97 training return: tensor(-319.0410, device='cuda:0')
episode: 98 training return: tensor(-266.0450, device='cuda:0')
episode: 99 training return: tensor(-238.2276, device='cuda:0')
epoch: 25 test_true_pfm: 1949.417632169766 sim_pfm: -127.13140024423289
episode: 100 training return: tensor(-199.1422, device='cuda:0')
episode: 101 training return: tensor(-364.1649, device='cuda:0')
episode: 102 training return: tensor(-14.2271, device='cuda:0')
episode: 103 training return: tensor(-88.7788, device='cuda:0')
epoch: 26 test_true_pfm: 1929.3307743883063 sim_pfm: 27.551891627799098
episode: 104 training return: tensor(259.3304, device='cuda:0')
episode: 105 training return: tensor(-237.2686, device='cuda:0')
episode: 106 training return: tensor(-307.7425, device='cuda:0')
episode: 107 training return: tensor(-291.9813, device='cuda:0')
epoch: 27 test_true_pfm: 1941.3097204329417 sim_pfm: -227.2603333412359
episode: 108 training return: tensor(-183.4221, device='cuda:0')
episode: 109 training return: tensor(-213.1246, device='cuda:0')
episode: 110 training return: tensor(-315.5519, device='cuda:0')
episode: 111 training return: tensor(-359.1515, device='cuda:0')
epoch: 28 test_true_pfm: 1780.9414847286316 sim_pfm: -233.2766274456711
episode: 112 training return: tensor(-331.8206, device='cuda:0')
episode: 113 training return: tensor(-331.7300, device='cuda:0')
episode: 114 training return: tensor(-312.5565, device='cuda:0')
episode: 115 training return: tensor(346.6660, device='cuda:0')
epoch: 29 test_true_pfm: 1732.0395340371908 sim_pfm: -254.1539761212965
episode: 116 training return: tensor(-284.9613, device='cuda:0')
episode: 117 training return: tensor(-312.7887, device='cuda:0')
episode: 118 training return: tensor(-221.4724, device='cuda:0')
episode: 119 training return: tensor(-361.1946, device='cuda:0')
epoch: 30 test_true_pfm: 1556.0335882105603 sim_pfm: -157.0814869615715
episode: 120 training return: tensor(-83.0889, device='cuda:0')
episode: 121 training return: tensor(-326.1402, device='cuda:0')
episode: 122 training return: tensor(-175.9114, device='cuda:0')
episode: 123 training return: tensor(-292.2750, device='cuda:0')
epoch: 31 test_true_pfm: 2196.9161191519256 sim_pfm: 288.85853796090424
episode: 124 training return: tensor(-65.4163, device='cuda:0')
episode: 125 training return: tensor(361.1169, device='cuda:0')
episode: 126 training return: tensor(-364.6797, device='cuda:0')
episode: 127 training return: tensor(-223.3141, device='cuda:0')
epoch: 32 test_true_pfm: 1881.929982677524 sim_pfm: -105.42639854734686
episode: 128 training return: tensor(-134.0444, device='cuda:0')
episode: 129 training return: tensor(-304.1515, device='cuda:0')
episode: 130 training return: tensor(319.2229, device='cuda:0')
episode: 131 training return: tensor(-263.4450, device='cuda:0')
epoch: 33 test_true_pfm: 2648.1472493207725 sim_pfm: -129.06600499750735
episode: 132 training return: tensor(-332.0736, device='cuda:0')
episode: 133 training return: tensor(-323.0995, device='cuda:0')
episode: 134 training return: tensor(379.2492, device='cuda:0')
episode: 135 training return: tensor(137.7864, device='cuda:0')
epoch: 34 test_true_pfm: 1590.5321953561652 sim_pfm: 80.9692388145583
episode: 136 training return: tensor(-359.3239, device='cuda:0')
episode: 137 training return: tensor(-300.4155, device='cuda:0')
episode: 138 training return: tensor(-340.0104, device='cuda:0')
episode: 139 training return: tensor(-94.2382, device='cuda:0')
epoch: 35 test_true_pfm: 1716.6443443581854 sim_pfm: -208.59422249591444
episode: 140 training return: tensor(-203.0523, device='cuda:0')
episode: 141 training return: tensor(28.1557, device='cuda:0')
episode: 142 training return: tensor(-399.9808, device='cuda:0')
episode: 143 training return: tensor(-63.9296, device='cuda:0')
epoch: 36 test_true_pfm: 1857.4783897038353 sim_pfm: -94.52890485575578
episode: 144 training return: tensor(-300.8692, device='cuda:0')
episode: 145 training return: tensor(87.2366, device='cuda:0')
episode: 146 training return: tensor(-229.1295, device='cuda:0')
episode: 147 training return: tensor(-262.4593, device='cuda:0')
epoch: 37 test_true_pfm: 1805.749414373289 sim_pfm: 5.6922982388641685
episode: 148 training return: tensor(-245.2506, device='cuda:0')
episode: 149 training return: tensor(-360.9105, device='cuda:0')
episode: 150 training return: tensor(349.7092, device='cuda:0')
episode: 151 training return: tensor(-273.6544, device='cuda:0')
epoch: 38 test_true_pfm: 1646.8469660637838 sim_pfm: 89.70959308512586
episode: 152 training return: tensor(-309.3087, device='cuda:0')
episode: 153 training return: tensor(-265.4865, device='cuda:0')
episode: 154 training return: tensor(277.5334, device='cuda:0')
episode: 155 training return: tensor(-362.5519, device='cuda:0')
epoch: 39 test_true_pfm: 2105.055986375613 sim_pfm: -233.7451413635669
episode: 156 training return: tensor(90.8165, device='cuda:0')
episode: 157 training return: tensor(325.1794, device='cuda:0')
episode: 158 training return: tensor(-253.0650, device='cuda:0')
episode: 159 training return: tensor(-14.3648, device='cuda:0')
epoch: 40 test_true_pfm: 2144.697746936223 sim_pfm: -280.9178771823451
episode: 160 training return: tensor(-189.6664, device='cuda:0')
episode: 161 training return: tensor(-116.1654, device='cuda:0')
episode: 162 training return: tensor(-113.4897, device='cuda:0')
episode: 163 training return: tensor(-227.2980, device='cuda:0')
epoch: 41 test_true_pfm: 1550.1929402273854 sim_pfm: -197.60135662400475
episode: 164 training return: tensor(-261.4880, device='cuda:0')
episode: 165 training return: tensor(-79.4480, device='cuda:0')
episode: 166 training return: tensor(-320.0439, device='cuda:0')
episode: 167 training return: tensor(56.8896, device='cuda:0')
epoch: 42 test_true_pfm: 2498.0551591777266 sim_pfm: -157.35232851774586
episode: 168 training return: tensor(-133.6558, device='cuda:0')
episode: 169 training return: tensor(-330.9221, device='cuda:0')
episode: 170 training return: tensor(58.0260, device='cuda:0')
episode: 171 training return: tensor(-23.8637, device='cuda:0')
epoch: 43 test_true_pfm: 1950.6882448371525 sim_pfm: -89.33380412782815
episode: 172 training return: tensor(-241.5716, device='cuda:0')
episode: 173 training return: tensor(-279.3861, device='cuda:0')
episode: 174 training return: tensor(242.8579, device='cuda:0')
episode: 175 training return: tensor(-364.1596, device='cuda:0')
epoch: 44 test_true_pfm: 1664.280288207489 sim_pfm: -272.8910808212434
episode: 176 training return: tensor(-197.5892, device='cuda:0')
episode: 177 training return: tensor(-189.3642, device='cuda:0')
episode: 178 training return: tensor(-294.1570, device='cuda:0')
episode: 179 training return: tensor(-318.4811, device='cuda:0')
epoch: 45 test_true_pfm: 1925.2244959617262 sim_pfm: -42.992919360714346
episode: 180 training return: tensor(81.1408, device='cuda:0')
episode: 181 training return: tensor(-10.6086, device='cuda:0')
episode: 182 training return: tensor(77.5257, device='cuda:0')
episode: 183 training return: tensor(-285.6588, device='cuda:0')
epoch: 46 test_true_pfm: 1923.8295707244433 sim_pfm: -167.4314997366746
episode: 184 training return: tensor(-257.6489, device='cuda:0')
episode: 185 training return: tensor(-360.5199, device='cuda:0')
episode: 186 training return: tensor(-329.7154, device='cuda:0')
episode: 187 training return: tensor(-342.4422, device='cuda:0')
epoch: 47 test_true_pfm: 1732.9747405859846 sim_pfm: -222.67367963966294
episode: 188 training return: tensor(-343.8081, device='cuda:0')
episode: 189 training return: tensor(-215.6240, device='cuda:0')
episode: 190 training return: tensor(-224.8758, device='cuda:0')
episode: 191 training return: tensor(-245.7545, device='cuda:0')
epoch: 48 test_true_pfm: 1784.6067658540317 sim_pfm: -310.20458878049976
episode: 192 training return: tensor(-327.5165, device='cuda:0')
episode: 193 training return: tensor(-186.3050, device='cuda:0')
episode: 194 training return: tensor(-284.5374, device='cuda:0')
episode: 195 training return: tensor(-354.9742, device='cuda:0')
epoch: 49 test_true_pfm: 1685.1848299004894 sim_pfm: -40.22296856165243
episode: 196 training return: tensor(-298.3024, device='cuda:0')
episode: 197 training return: tensor(-305.5994, device='cuda:0')
episode: 198 training return: tensor(-351.5697, device='cuda:0')
episode: 199 training return: tensor(-362.1039, device='cuda:0')
epoch: 50 test_true_pfm: 2176.710270230964 sim_pfm: -228.83010032384968
episode: 200 training return: tensor(-198.0697, device='cuda:0')
episode: 201 training return: tensor(-226.5022, device='cuda:0')
episode: 202 training return: tensor(114.5812, device='cuda:0')
episode: 203 training return: tensor(315.1589, device='cuda:0')
epoch: 51 test_true_pfm: 2229.8024993985177 sim_pfm: 60.917478240822675
episode: 204 training return: tensor(-233.0758, device='cuda:0')
episode: 205 training return: tensor(-216.2051, device='cuda:0')
episode: 206 training return: tensor(-223.0827, device='cuda:0')
episode: 207 training return: tensor(-357.8662, device='cuda:0')
epoch: 52 test_true_pfm: 1662.3112558622088 sim_pfm: -180.23113035728844
episode: 208 training return: tensor(-175.5587, device='cuda:0')
episode: 209 training return: tensor(-206.2189, device='cuda:0')
episode: 210 training return: tensor(-209.9489, device='cuda:0')
episode: 211 training return: tensor(-235.3170, device='cuda:0')
epoch: 53 test_true_pfm: 2122.3391377066196 sim_pfm: -105.62823258774976
episode: 212 training return: tensor(381.3023, device='cuda:0')
episode: 213 training return: tensor(-229.2639, device='cuda:0')
episode: 214 training return: tensor(397.1536, device='cuda:0')
episode: 215 training return: tensor(-336.5821, device='cuda:0')
epoch: 54 test_true_pfm: 1645.771188496429 sim_pfm: -39.78663147910265
episode: 216 training return: tensor(-240.3917, device='cuda:0')
episode: 217 training return: tensor(-354.8230, device='cuda:0')
episode: 218 training return: tensor(-321.9495, device='cuda:0')
episode: 219 training return: tensor(368.1396, device='cuda:0')
epoch: 55 test_true_pfm: 1816.5191836602617 sim_pfm: -187.60664584483797
episode: 220 training return: tensor(-325.7365, device='cuda:0')
episode: 221 training return: tensor(-308.9564, device='cuda:0')
episode: 222 training return: tensor(-144.8242, device='cuda:0')
episode: 223 training return: tensor(-344.9672, device='cuda:0')
epoch: 56 test_true_pfm: 2407.5504641846446 sim_pfm: -110.89817018034712
episode: 224 training return: tensor(-243.7308, device='cuda:0')
episode: 225 training return: tensor(181.3394, device='cuda:0')
episode: 226 training return: tensor(-35.9437, device='cuda:0')
episode: 227 training return: tensor(-312.7191, device='cuda:0')
epoch: 57 test_true_pfm: 1712.9346964950391 sim_pfm: -235.73699754442592
episode: 228 training return: tensor(107.6212, device='cuda:0')
episode: 229 training return: tensor(-331.1323, device='cuda:0')
episode: 230 training return: tensor(-323.5650, device='cuda:0')
episode: 231 training return: tensor(-228.5506, device='cuda:0')
epoch: 58 test_true_pfm: 1747.7325837455535 sim_pfm: -82.3807340040997
episode: 232 training return: tensor(-263.2950, device='cuda:0')
episode: 233 training return: tensor(-290.6219, device='cuda:0')
episode: 234 training return: tensor(277.6555, device='cuda:0')
episode: 235 training return: tensor(-93.4119, device='cuda:0')
epoch: 59 test_true_pfm: 1793.3099767830288 sim_pfm: -203.4918382235725
episode: 236 training return: tensor(-40.4156, device='cuda:0')
episode: 237 training return: tensor(-342.5504, device='cuda:0')
episode: 238 training return: tensor(-117.8612, device='cuda:0')
episode: 239 training return: tensor(-19.4948, device='cuda:0')
epoch: 60 test_true_pfm: 1922.3273576565916 sim_pfm: -273.59778527684585
episode: 240 training return: tensor(-327.8717, device='cuda:0')
episode: 241 training return: tensor(-335.9292, device='cuda:0')
episode: 242 training return: tensor(178.6200, device='cuda:0')
episode: 243 training return: tensor(-352.1956, device='cuda:0')
epoch: 61 test_true_pfm: 2492.60171527204 sim_pfm: -15.56785892584594
episode: 244 training return: tensor(-293.1351, device='cuda:0')
episode: 245 training return: tensor(-243.4535, device='cuda:0')
episode: 246 training return: tensor(-276.2967, device='cuda:0')
episode: 247 training return: tensor(188.4693, device='cuda:0')
epoch: 62 test_true_pfm: 2533.437681619071 sim_pfm: -236.88828266939768
episode: 248 training return: tensor(-302.3379, device='cuda:0')
episode: 249 training return: tensor(-186.5985, device='cuda:0')
episode: 250 training return: tensor(244.2656, device='cuda:0')
episode: 251 training return: tensor(386.8156, device='cuda:0')
epoch: 63 test_true_pfm: 2252.9298961527966 sim_pfm: -54.75043688233321
episode: 252 training return: tensor(-225.0372, device='cuda:0')
episode: 253 training return: tensor(-123.6819, device='cuda:0')
episode: 254 training return: tensor(-224.3126, device='cuda:0')
episode: 255 training return: tensor(-327.3036, device='cuda:0')
epoch: 64 test_true_pfm: 1582.705709160213 sim_pfm: -41.582104058480276
episode: 256 training return: tensor(-293.2852, device='cuda:0')
episode: 257 training return: tensor(-327.2668, device='cuda:0')
episode: 258 training return: tensor(-83.6965, device='cuda:0')
episode: 259 training return: tensor(-169.1810, device='cuda:0')
epoch: 65 test_true_pfm: 2640.806388939871 sim_pfm: -6.80990940076299
episode: 260 training return: tensor(-343.6805, device='cuda:0')
episode: 261 training return: tensor(-209.6967, device='cuda:0')
episode: 262 training return: tensor(-278.8401, device='cuda:0')
episode: 263 training return: tensor(-267.6174, device='cuda:0')
epoch: 66 test_true_pfm: 1516.0249217281905 sim_pfm: -126.34201442021488
episode: 264 training return: tensor(-311.2384, device='cuda:0')
episode: 265 training return: tensor(-112.4694, device='cuda:0')
episode: 266 training return: tensor(211.9751, device='cuda:0')
episode: 267 training return: tensor(389.9551, device='cuda:0')
epoch: 67 test_true_pfm: 2198.7376459760394 sim_pfm: 44.749036247842014
episode: 268 training return: tensor(-341.0339, device='cuda:0')
episode: 269 training return: tensor(-235.2387, device='cuda:0')
episode: 270 training return: tensor(-286.7255, device='cuda:0')
episode: 271 training return: tensor(-273.7526, device='cuda:0')
epoch: 68 test_true_pfm: 1823.0571989754299 sim_pfm: -23.412419854314066
episode: 272 training return: tensor(12.4776, device='cuda:0')
episode: 273 training return: tensor(-337.2324, device='cuda:0')
episode: 274 training return: tensor(302.5509, device='cuda:0')
episode: 275 training return: tensor(168.9397, device='cuda:0')
epoch: 69 test_true_pfm: 1420.5311455314034 sim_pfm: -180.86000116867945
episode: 276 training return: tensor(-125.5095, device='cuda:0')
episode: 277 training return: tensor(-115.9871, device='cuda:0')
episode: 278 training return: tensor(-331.0626, device='cuda:0')
episode: 279 training return: tensor(2.2632, device='cuda:0')
epoch: 70 test_true_pfm: 2693.211349434731 sim_pfm: -56.895465141220484
episode: 280 training return: tensor(24.9447, device='cuda:0')
episode: 281 training return: tensor(-76.8297, device='cuda:0')
episode: 282 training return: tensor(-324.9839, device='cuda:0')
episode: 283 training return: tensor(-319.2827, device='cuda:0')
epoch: 71 test_true_pfm: 1883.6515855847392 sim_pfm: -176.67055622426173
episode: 284 training return: tensor(-198.9739, device='cuda:0')
episode: 285 training return: tensor(-359.0885, device='cuda:0')
episode: 286 training return: tensor(-246.0052, device='cuda:0')
episode: 287 training return: tensor(-180.3679, device='cuda:0')
epoch: 72 test_true_pfm: 1571.3129233523796 sim_pfm: -43.86369730731045
episode: 288 training return: tensor(-194.1883, device='cuda:0')
episode: 289 training return: tensor(-283.1076, device='cuda:0')
episode: 290 training return: tensor(-189.8042, device='cuda:0')
episode: 291 training return: tensor(-331.2734, device='cuda:0')
epoch: 73 test_true_pfm: 2034.1042964979915 sim_pfm: -267.354133517559
episode: 292 training return: tensor(-196.2138, device='cuda:0')
episode: 293 training return: tensor(-143.3844, device='cuda:0')
episode: 294 training return: tensor(398.7768, device='cuda:0')
episode: 295 training return: tensor(42.0831, device='cuda:0')
epoch: 74 test_true_pfm: 2469.684236688616 sim_pfm: -147.84318561417362
episode: 296 training return: tensor(357.4001, device='cuda:0')
episode: 297 training return: tensor(-322.1440, device='cuda:0')
episode: 298 training return: tensor(-7.1229, device='cuda:0')
episode: 299 training return: tensor(364.2140, device='cuda:0')
epoch: 75 test_true_pfm: 1963.2422998518393 sim_pfm: -185.77816773186592
episode: 300 training return: tensor(-255.0533, device='cuda:0')
episode: 301 training return: tensor(-96.4369, device='cuda:0')
episode: 302 training return: tensor(-368.9771, device='cuda:0')
episode: 303 training return: tensor(-312.4522, device='cuda:0')
epoch: 76 test_true_pfm: 1826.3737510590606 sim_pfm: -32.118750969762914
episode: 304 training return: tensor(-369.4507, device='cuda:0')
episode: 305 training return: tensor(215.5538, device='cuda:0')
episode: 306 training return: tensor(-36.5864, device='cuda:0')
episode: 307 training return: tensor(56.4124, device='cuda:0')
epoch: 77 test_true_pfm: 1708.7435413863423 sim_pfm: -13.64751551952213
episode: 308 training return: tensor(59.7295, device='cuda:0')
episode: 309 training return: tensor(275.0251, device='cuda:0')
episode: 310 training return: tensor(0.5903, device='cuda:0')
episode: 311 training return: tensor(-230.7968, device='cuda:0')
epoch: 78 test_true_pfm: 1823.5138999733297 sim_pfm: -115.36846861697268
episode: 312 training return: tensor(-217.8479, device='cuda:0')
episode: 313 training return: tensor(-211.4227, device='cuda:0')
episode: 314 training return: tensor(-265.4256, device='cuda:0')
episode: 315 training return: tensor(-330.6048, device='cuda:0')
epoch: 79 test_true_pfm: 1765.6354301709632 sim_pfm: 140.53414516321695
episode: 316 training return: tensor(-259.2242, device='cuda:0')
episode: 317 training return: tensor(-143.3130, device='cuda:0')
episode: 318 training return: tensor(132.7183, device='cuda:0')
episode: 319 training return: tensor(-236.3041, device='cuda:0')
epoch: 80 test_true_pfm: 1863.0578011991092 sim_pfm: -56.970470919836465
episode: 320 training return: tensor(-309.4130, device='cuda:0')
episode: 321 training return: tensor(-276.7215, device='cuda:0')
episode: 322 training return: tensor(-181.8041, device='cuda:0')
episode: 323 training return: tensor(-88.7244, device='cuda:0')
epoch: 81 test_true_pfm: 2326.9789603229015 sim_pfm: -35.76154346721402
episode: 324 training return: tensor(-277.4540, device='cuda:0')
episode: 325 training return: tensor(-111.8871, device='cuda:0')
episode: 326 training return: tensor(-266.2154, device='cuda:0')
episode: 327 training return: tensor(-117.6876, device='cuda:0')
epoch: 82 test_true_pfm: 1534.2553160582593 sim_pfm: -67.25847267499194
episode: 328 training return: tensor(-297.3599, device='cuda:0')
episode: 329 training return: tensor(-40.2430, device='cuda:0')
episode: 330 training return: tensor(-318.1204, device='cuda:0')
episode: 331 training return: tensor(-310.5644, device='cuda:0')
epoch: 83 test_true_pfm: 2180.7563253310295 sim_pfm: -213.2748374985434
episode: 332 training return: tensor(-275.5568, device='cuda:0')
episode: 333 training return: tensor(135.3596, device='cuda:0')
episode: 334 training return: tensor(-347.1727, device='cuda:0')
episode: 335 training return: tensor(-307.4404, device='cuda:0')
epoch: 84 test_true_pfm: 2143.479911488945 sim_pfm: -129.1309001641736
episode: 336 training return: tensor(377.1104, device='cuda:0')
episode: 337 training return: tensor(-273.7003, device='cuda:0')
episode: 338 training return: tensor(-116.7301, device='cuda:0')
episode: 339 training return: tensor(-258.5298, device='cuda:0')
epoch: 85 test_true_pfm: 2351.2130613482295 sim_pfm: -1.493563175201416
episode: 340 training return: tensor(-29.5759, device='cuda:0')
episode: 341 training return: tensor(-336.8599, device='cuda:0')
episode: 342 training return: tensor(-123.7684, device='cuda:0')
episode: 343 training return: tensor(-195.9360, device='cuda:0')
epoch: 86 test_true_pfm: 1941.7824714000974 sim_pfm: -39.657347835222026
episode: 344 training return: tensor(373.1959, device='cuda:0')
episode: 345 training return: tensor(-365.0571, device='cuda:0')
episode: 346 training return: tensor(-225.7697, device='cuda:0')
episode: 347 training return: tensor(275.2519, device='cuda:0')
epoch: 87 test_true_pfm: 1569.9298765896835 sim_pfm: -183.18330137899224
episode: 348 training return: tensor(-235.9315, device='cuda:0')
episode: 349 training return: tensor(-127.6223, device='cuda:0')
episode: 350 training return: tensor(2.5018, device='cuda:0')
episode: 351 training return: tensor(-260.3234, device='cuda:0')
epoch: 88 test_true_pfm: 2008.9208403627742 sim_pfm: -13.188417198873745
episode: 352 training return: tensor(197.2785, device='cuda:0')
episode: 353 training return: tensor(-105.9294, device='cuda:0')
episode: 354 training return: tensor(-45.1624, device='cuda:0')
episode: 355 training return: tensor(132.4722, device='cuda:0')
epoch: 89 test_true_pfm: 1435.455055107578 sim_pfm: -212.4960799612454
episode: 356 training return: tensor(374.3265, device='cuda:0')
episode: 357 training return: tensor(-357.0467, device='cuda:0')
episode: 358 training return: tensor(-368.9215, device='cuda:0')
episode: 359 training return: tensor(-211.0644, device='cuda:0')
epoch: 90 test_true_pfm: 1357.6851679138188 sim_pfm: -230.80261305657527
episode: 360 training return: tensor(356.6460, device='cuda:0')
episode: 361 training return: tensor(-363.2587, device='cuda:0')
episode: 362 training return: tensor(-215.7226, device='cuda:0')
episode: 363 training return: tensor(-335.7798, device='cuda:0')
epoch: 91 test_true_pfm: 1507.7178107901036 sim_pfm: -184.73192365987538
episode: 364 training return: tensor(-213.3041, device='cuda:0')
episode: 365 training return: tensor(-229.0809, device='cuda:0')
episode: 366 training return: tensor(-322.7212, device='cuda:0')
episode: 367 training return: tensor(-148.1100, device='cuda:0')
epoch: 92 test_true_pfm: 1770.1868926427608 sim_pfm: -261.7354202842495
episode: 368 training return: tensor(-268.0314, device='cuda:0')
episode: 369 training return: tensor(191.6939, device='cuda:0')
episode: 370 training return: tensor(-328.0100, device='cuda:0')
episode: 371 training return: tensor(-268.9720, device='cuda:0')
epoch: 93 test_true_pfm: 2806.246309175329 sim_pfm: -46.35470480025591
episode: 372 training return: tensor(-314.8289, device='cuda:0')
episode: 373 training return: tensor(-309.8406, device='cuda:0')
episode: 374 training return: tensor(404.5178, device='cuda:0')
episode: 375 training return: tensor(-186.7347, device='cuda:0')
epoch: 94 test_true_pfm: 1920.41059939941 sim_pfm: -252.87115113895075
episode: 376 training return: tensor(-114.6921, device='cuda:0')
episode: 377 training return: tensor(-346.0318, device='cuda:0')
episode: 378 training return: tensor(-320.3555, device='cuda:0')
episode: 379 training return: tensor(-104.6362, device='cuda:0')
epoch: 95 test_true_pfm: 1531.6317424723147 sim_pfm: -180.39555091683482
episode: 380 training return: tensor(-212.3767, device='cuda:0')
episode: 381 training return: tensor(-313.6511, device='cuda:0')
episode: 382 training return: tensor(43.0019, device='cuda:0')
episode: 383 training return: tensor(99.3634, device='cuda:0')
epoch: 96 test_true_pfm: 1582.60856446098 sim_pfm: -201.06443141105896
episode: 384 training return: tensor(-361.2925, device='cuda:0')
episode: 385 training return: tensor(-112.5165, device='cuda:0')
episode: 386 training return: tensor(-220.6334, device='cuda:0')
episode: 387 training return: tensor(41.3391, device='cuda:0')
epoch: 97 test_true_pfm: 1682.3524576283878 sim_pfm: -227.81601176434197
episode: 388 training return: tensor(4.8806, device='cuda:0')
episode: 389 training return: tensor(50.3733, device='cuda:0')
episode: 390 training return: tensor(-278.1170, device='cuda:0')
episode: 391 training return: tensor(158.5792, device='cuda:0')
epoch: 98 test_true_pfm: 1806.4956035098887 sim_pfm: -193.7244771924646
episode: 392 training return: tensor(363.2013, device='cuda:0')
episode: 393 training return: tensor(-261.3085, device='cuda:0')
episode: 394 training return: tensor(-232.1382, device='cuda:0')
episode: 395 training return: tensor(154.9544, device='cuda:0')
epoch: 99 test_true_pfm: 1725.6563125218884 sim_pfm: -288.3379440686549
episode: 396 training return: tensor(-354.9333, device='cuda:0')
episode: 397 training return: tensor(-358.6783, device='cuda:0')
episode: 398 training return: tensor(-352.7856, device='cuda:0')
episode: 399 training return: tensor(-336.9143, device='cuda:0')
epoch: 100 test_true_pfm: 1483.3701806254242 sim_pfm: 167.7483851737537
episode: 400 training return: tensor(-315.8723, device='cuda:0')
episode: 401 training return: tensor(127.1018, device='cuda:0')
episode: 402 training return: tensor(-337.6818, device='cuda:0')
episode: 403 training return: tensor(-106.8004, device='cuda:0')
epoch: 101 test_true_pfm: 1589.634738920874 sim_pfm: -111.01357007630092
episode: 404 training return: tensor(-211.6176, device='cuda:0')
episode: 405 training return: tensor(281.5142, device='cuda:0')
episode: 406 training return: tensor(-51.1954, device='cuda:0')
episode: 407 training return: tensor(-85.2450, device='cuda:0')
epoch: 102 test_true_pfm: 2116.285756261958 sim_pfm: -97.91170274691346
episode: 408 training return: tensor(-56.2601, device='cuda:0')
episode: 409 training return: tensor(53.3692, device='cuda:0')
episode: 410 training return: tensor(-308.8080, device='cuda:0')
episode: 411 training return: tensor(-195.1768, device='cuda:0')
epoch: 103 test_true_pfm: 2354.1346993579814 sim_pfm: -186.38380175231336
episode: 412 training return: tensor(274.0446, device='cuda:0')
episode: 413 training return: tensor(12.7681, device='cuda:0')
episode: 414 training return: tensor(-13.5675, device='cuda:0')
episode: 415 training return: tensor(108.4382, device='cuda:0')
epoch: 104 test_true_pfm: 2380.640807796745 sim_pfm: 14.30337766049585
episode: 416 training return: tensor(-308.3475, device='cuda:0')
episode: 417 training return: tensor(-346.1615, device='cuda:0')
episode: 418 training return: tensor(-230.5657, device='cuda:0')
episode: 419 training return: tensor(-316.6067, device='cuda:0')
epoch: 105 test_true_pfm: 1817.6686256240118 sim_pfm: -209.40488340713395
episode: 420 training return: tensor(-125.9841, device='cuda:0')
episode: 421 training return: tensor(-101.6105, device='cuda:0')
episode: 422 training return: tensor(-212.6366, device='cuda:0')
episode: 423 training return: tensor(387.7272, device='cuda:0')
epoch: 106 test_true_pfm: 2049.8170531726532 sim_pfm: -84.9567839113103
episode: 424 training return: tensor(-351.1540, device='cuda:0')
episode: 425 training return: tensor(-49.3298, device='cuda:0')
episode: 426 training return: tensor(-294.9154, device='cuda:0')
episode: 427 training return: tensor(-295.1534, device='cuda:0')
epoch: 107 test_true_pfm: 1913.1396285169883 sim_pfm: -146.06865714816377
episode: 428 training return: tensor(-119.1893, device='cuda:0')
episode: 429 training return: tensor(122.4302, device='cuda:0')
episode: 430 training return: tensor(-103.9090, device='cuda:0')
episode: 431 training return: tensor(-83.3034, device='cuda:0')
epoch: 108 test_true_pfm: 1436.7582905137251 sim_pfm: -287.13652886655956
episode: 432 training return: tensor(-255.1097, device='cuda:0')
episode: 433 training return: tensor(-237.0109, device='cuda:0')
episode: 434 training return: tensor(-314.7459, device='cuda:0')
episode: 435 training return: tensor(72.6400, device='cuda:0')
epoch: 109 test_true_pfm: 2098.0718912167636 sim_pfm: -68.975932525044
episode: 436 training return: tensor(-266.0261, device='cuda:0')
episode: 437 training return: tensor(-0.6621, device='cuda:0')
episode: 438 training return: tensor(-89.8742, device='cuda:0')
episode: 439 training return: tensor(-76.4972, device='cuda:0')
epoch: 110 test_true_pfm: 2048.276100920219 sim_pfm: -210.5287657349448
episode: 440 training return: tensor(-48.0296, device='cuda:0')
episode: 441 training return: tensor(-375.9276, device='cuda:0')
episode: 442 training return: tensor(-349.4569, device='cuda:0')
episode: 443 training return: tensor(4.3098, device='cuda:0')
epoch: 111 test_true_pfm: 1492.457755823263 sim_pfm: -147.28890954413995
episode: 444 training return: tensor(374.7828, device='cuda:0')
episode: 445 training return: tensor(64.7928, device='cuda:0')
episode: 446 training return: tensor(225.3700, device='cuda:0')
episode: 447 training return: tensor(-269.4541, device='cuda:0')
epoch: 112 test_true_pfm: 2135.692630544649 sim_pfm: -197.07912385607293
episode: 448 training return: tensor(-303.0577, device='cuda:0')
episode: 449 training return: tensor(-305.0520, device='cuda:0')
episode: 450 training return: tensor(-113.1339, device='cuda:0')
episode: 451 training return: tensor(135.8206, device='cuda:0')
epoch: 113 test_true_pfm: 1602.6406936349595 sim_pfm: -141.86900617721645
episode: 452 training return: tensor(-124.1410, device='cuda:0')
episode: 453 training return: tensor(-87.3415, device='cuda:0')
episode: 454 training return: tensor(-271.6210, device='cuda:0')
episode: 455 training return: tensor(-314.3755, device='cuda:0')
epoch: 114 test_true_pfm: 1987.6933216273408 sim_pfm: -248.4395221802018
episode: 456 training return: tensor(-190.2397, device='cuda:0')
episode: 457 training return: tensor(-177.4554, device='cuda:0')
episode: 458 training return: tensor(179.4507, device='cuda:0')
episode: 459 training return: tensor(-117.0833, device='cuda:0')
epoch: 115 test_true_pfm: 1788.7480932133303 sim_pfm: 193.07016543190306
episode: 460 training return: tensor(-226.3718, device='cuda:0')
episode: 461 training return: tensor(-271.0955, device='cuda:0')
episode: 462 training return: tensor(-286.9115, device='cuda:0')
episode: 463 training return: tensor(-214.8043, device='cuda:0')
epoch: 116 test_true_pfm: 2121.428943223265 sim_pfm: -175.5820497655853
episode: 464 training return: tensor(-285.4892, device='cuda:0')
episode: 465 training return: tensor(403.3069, device='cuda:0')
episode: 466 training return: tensor(173.4395, device='cuda:0')
episode: 467 training return: tensor(390.4646, device='cuda:0')
epoch: 117 test_true_pfm: 1440.7196048037367 sim_pfm: -233.96922532934695
episode: 468 training return: tensor(-92.1366, device='cuda:0')
episode: 469 training return: tensor(-107.7053, device='cuda:0')
episode: 470 training return: tensor(-217.1103, device='cuda:0')
episode: 471 training return: tensor(-216.0137, device='cuda:0')
epoch: 118 test_true_pfm: 1585.8690179846847 sim_pfm: -92.20425563801352
episode: 472 training return: tensor(-285.4789, device='cuda:0')
episode: 473 training return: tensor(401.7295, device='cuda:0')
episode: 474 training return: tensor(-99.7558, device='cuda:0')
episode: 475 training return: tensor(-330.9295, device='cuda:0')
epoch: 119 test_true_pfm: 1417.7828764301673 sim_pfm: -146.2943964280033
episode: 476 training return: tensor(-150.3806, device='cuda:0')
episode: 477 training return: tensor(229.2996, device='cuda:0')
episode: 478 training return: tensor(357.5835, device='cuda:0')
episode: 479 training return: tensor(-363.2954, device='cuda:0')
epoch: 120 test_true_pfm: 2021.609365666455 sim_pfm: -38.47492430657925
episode: 480 training return: tensor(-364.7404, device='cuda:0')
episode: 481 training return: tensor(-106.6062, device='cuda:0')
episode: 482 training return: tensor(-287.3600, device='cuda:0')
episode: 483 training return: tensor(-274.7603, device='cuda:0')
epoch: 121 test_true_pfm: 1425.1448960262762 sim_pfm: -248.0590589604884
episode: 484 training return: tensor(-367.6054, device='cuda:0')
episode: 485 training return: tensor(-223.6876, device='cuda:0')
episode: 486 training return: tensor(274.1713, device='cuda:0')
episode: 487 training return: tensor(-358.1778, device='cuda:0')
epoch: 122 test_true_pfm: 1510.7068706929404 sim_pfm: -214.25237003643997
episode: 488 training return: tensor(-110.2604, device='cuda:0')
episode: 489 training return: tensor(167.2350, device='cuda:0')
episode: 490 training return: tensor(-99.6581, device='cuda:0')
episode: 491 training return: tensor(270.2357, device='cuda:0')
epoch: 123 test_true_pfm: 1541.9467165874669 sim_pfm: -283.7393590764647
episode: 492 training return: tensor(-333.1533, device='cuda:0')
episode: 493 training return: tensor(269.3811, device='cuda:0')
episode: 494 training return: tensor(360.2816, device='cuda:0')
episode: 495 training return: tensor(-338.4530, device='cuda:0')
epoch: 124 test_true_pfm: 1599.2747370668978 sim_pfm: -222.32255448012924
episode: 496 training return: tensor(287.7274, device='cuda:0')
episode: 497 training return: tensor(-154.9384, device='cuda:0')
episode: 498 training return: tensor(-369.5601, device='cuda:0')
episode: 499 training return: tensor(-19.2799, device='cuda:0')
epoch: 125 test_true_pfm: 1494.2463215383113 sim_pfm: -249.33459966874216
episode: 500 training return: tensor(-309.4675, device='cuda:0')
episode: 501 training return: tensor(-355.2854, device='cuda:0')
episode: 502 training return: tensor(87.1400, device='cuda:0')
episode: 503 training return: tensor(165.7107, device='cuda:0')
epoch: 126 test_true_pfm: 2074.9257403629217 sim_pfm: -128.0241276403734
episode: 504 training return: tensor(-158.5734, device='cuda:0')
episode: 505 training return: tensor(-335.9774, device='cuda:0')
episode: 506 training return: tensor(-168.5004, device='cuda:0')
episode: 507 training return: tensor(-107.4602, device='cuda:0')
epoch: 127 test_true_pfm: 2314.7365507233985 sim_pfm: -71.84007661080493
episode: 508 training return: tensor(-193.2232, device='cuda:0')
episode: 509 training return: tensor(-230.0989, device='cuda:0')
episode: 510 training return: tensor(-303.4543, device='cuda:0')
episode: 511 training return: tensor(-241.0106, device='cuda:0')
epoch: 128 test_true_pfm: 2051.386685251447 sim_pfm: -302.6059668790937
episode: 512 training return: tensor(-229.8355, device='cuda:0')
episode: 513 training return: tensor(-212.6587, device='cuda:0')
episode: 514 training return: tensor(-43.0816, device='cuda:0')
episode: 515 training return: tensor(-224.6257, device='cuda:0')
epoch: 129 test_true_pfm: 1931.9628214083984 sim_pfm: -236.67261301726103
episode: 516 training return: tensor(-60.7887, device='cuda:0')
episode: 517 training return: tensor(-103.4081, device='cuda:0')
episode: 518 training return: tensor(-9.1494, device='cuda:0')
episode: 519 training return: tensor(356.0825, device='cuda:0')
epoch: 130 test_true_pfm: 2036.4585622712696 sim_pfm: -92.32443701305117
episode: 520 training return: tensor(-255.8020, device='cuda:0')
episode: 521 training return: tensor(201.0493, device='cuda:0')
episode: 522 training return: tensor(-130.7200, device='cuda:0')
episode: 523 training return: tensor(-289.8651, device='cuda:0')
epoch: 131 test_true_pfm: 1621.9032358311295 sim_pfm: -266.6105087597389
episode: 524 training return: tensor(-305.3941, device='cuda:0')
episode: 525 training return: tensor(73.1046, device='cuda:0')
episode: 526 training return: tensor(-360.0028, device='cuda:0')
episode: 527 training return: tensor(-233.9087, device='cuda:0')
epoch: 132 test_true_pfm: 1379.038792987151 sim_pfm: -179.58913625924228
episode: 528 training return: tensor(-312.7319, device='cuda:0')
episode: 529 training return: tensor(136.8884, device='cuda:0')
episode: 530 training return: tensor(-95.1126, device='cuda:0')
episode: 531 training return: tensor(-225.3849, device='cuda:0')
epoch: 133 test_true_pfm: 2204.2258994714925 sim_pfm: -187.49707634886727
episode: 532 training return: tensor(-101.5251, device='cuda:0')
episode: 533 training return: tensor(-322.4823, device='cuda:0')
episode: 534 training return: tensor(-116.8547, device='cuda:0')
episode: 535 training return: tensor(409.8828, device='cuda:0')
epoch: 134 test_true_pfm: 1687.8125054922075 sim_pfm: -190.3048364314891
episode: 536 training return: tensor(-253.5575, device='cuda:0')
episode: 537 training return: tensor(-257.9923, device='cuda:0')
episode: 538 training return: tensor(-326.7138, device='cuda:0')
episode: 539 training return: tensor(-330.7478, device='cuda:0')
epoch: 135 test_true_pfm: 1552.3356265892282 sim_pfm: -87.003514083568
episode: 540 training return: tensor(22.0081, device='cuda:0')
episode: 541 training return: tensor(366.4642, device='cuda:0')
episode: 542 training return: tensor(-343.2796, device='cuda:0')
episode: 543 training return: tensor(-267.3654, device='cuda:0')
epoch: 136 test_true_pfm: 1846.022374836101 sim_pfm: 101.4220693220462
episode: 544 training return: tensor(-297.0048, device='cuda:0')
episode: 545 training return: tensor(-315.6296, device='cuda:0')
episode: 546 training return: tensor(-266.2998, device='cuda:0')
episode: 547 training return: tensor(-114.6105, device='cuda:0')
epoch: 137 test_true_pfm: 1581.768436866508 sim_pfm: -21.288584735186305
episode: 548 training return: tensor(98.8946, device='cuda:0')
episode: 549 training return: tensor(169.4399, device='cuda:0')
episode: 550 training return: tensor(-339.5682, device='cuda:0')
episode: 551 training return: tensor(402.1237, device='cuda:0')
epoch: 138 test_true_pfm: 1626.9690419514038 sim_pfm: -165.17378957026327
episode: 552 training return: tensor(-296.8241, device='cuda:0')
episode: 553 training return: tensor(419.5915, device='cuda:0')
episode: 554 training return: tensor(-343.3528, device='cuda:0')
episode: 555 training return: tensor(374.2867, device='cuda:0')
epoch: 139 test_true_pfm: 2034.9266983664677 sim_pfm: 14.385519641567953
episode: 556 training return: tensor(-262.8609, device='cuda:0')
episode: 557 training return: tensor(197.6082, device='cuda:0')
episode: 558 training return: tensor(-115.4200, device='cuda:0')
episode: 559 training return: tensor(93.8649, device='cuda:0')
epoch: 140 test_true_pfm: 1317.7724575759314 sim_pfm: -245.1723226723067
episode: 560 training return: tensor(362.8788, device='cuda:0')
episode: 561 training return: tensor(270.7234, device='cuda:0')
episode: 562 training return: tensor(-217.3784, device='cuda:0')
episode: 563 training return: tensor(-269.2035, device='cuda:0')
epoch: 141 test_true_pfm: 1446.960295024157 sim_pfm: -279.81163819677505
episode: 564 training return: tensor(-354.1135, device='cuda:0')
episode: 565 training return: tensor(388.3389, device='cuda:0')
episode: 566 training return: tensor(-199.9364, device='cuda:0')
episode: 567 training return: tensor(267.5323, device='cuda:0')
epoch: 142 test_true_pfm: 1728.4084311435806 sim_pfm: -291.73786653073813
episode: 568 training return: tensor(-349.5554, device='cuda:0')
episode: 569 training return: tensor(15.1461, device='cuda:0')
episode: 570 training return: tensor(-102.1906, device='cuda:0')
episode: 571 training return: tensor(-210.2658, device='cuda:0')
epoch: 143 test_true_pfm: 1378.6777592836652 sim_pfm: -242.59619330898082
episode: 572 training return: tensor(-356.3488, device='cuda:0')
episode: 573 training return: tensor(-183.1466, device='cuda:0')
episode: 574 training return: tensor(-331.3774, device='cuda:0')
episode: 575 training return: tensor(-302.5311, device='cuda:0')
epoch: 144 test_true_pfm: 1415.5667648118488 sim_pfm: 116.43889488993834
episode: 576 training return: tensor(-228.5114, device='cuda:0')
episode: 577 training return: tensor(-90.5483, device='cuda:0')
episode: 578 training return: tensor(200.8252, device='cuda:0')
episode: 579 training return: tensor(-232.4278, device='cuda:0')
epoch: 145 test_true_pfm: 1897.4642646384912 sim_pfm: -259.0723899601144
episode: 580 training return: tensor(-281.6040, device='cuda:0')
episode: 581 training return: tensor(-255.7586, device='cuda:0')
episode: 582 training return: tensor(171.9274, device='cuda:0')
episode: 583 training return: tensor(-235.3236, device='cuda:0')
epoch: 146 test_true_pfm: 1617.2677948497992 sim_pfm: -97.82283766536663
episode: 584 training return: tensor(-316.2068, device='cuda:0')
episode: 585 training return: tensor(-276.6900, device='cuda:0')
episode: 586 training return: tensor(-247.3685, device='cuda:0')
episode: 587 training return: tensor(-321.5954, device='cuda:0')
epoch: 147 test_true_pfm: 1662.4806418345545 sim_pfm: -255.96977796506448
episode: 588 training return: tensor(-259.8097, device='cuda:0')
episode: 589 training return: tensor(390.3838, device='cuda:0')
episode: 590 training return: tensor(-174.6295, device='cuda:0')
episode: 591 training return: tensor(206.2196, device='cuda:0')
epoch: 148 test_true_pfm: 2137.0359528216845 sim_pfm: -132.85287481199097
episode: 592 training return: tensor(-113.4767, device='cuda:0')
episode: 593 training return: tensor(-299.9207, device='cuda:0')
episode: 594 training return: tensor(-261.1676, device='cuda:0')
episode: 595 training return: tensor(-306.9742, device='cuda:0')
epoch: 149 test_true_pfm: 1345.472921656343 sim_pfm: -198.65452774852747
episode: 596 training return: tensor(-62.0432, device='cuda:0')
episode: 597 training return: tensor(-115.9191, device='cuda:0')
episode: 598 training return: tensor(48.4807, device='cuda:0')
episode: 599 training return: tensor(365.3045, device='cuda:0')
epoch: 150 test_true_pfm: 1443.2126939320767 sim_pfm: 47.6886962702265
