['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '2', '--data', '10000']
epoch: 0 training_loss 0.24545229487121106 test_loss: 0.20614326000213623
epoch: 1 training_loss 0.19195316813886165 test_loss: 0.20202929973602296
epoch: 2 training_loss 0.1886258275061846 test_loss: 0.21272428035736085
epoch: 3 training_loss 0.19920119687914847 test_loss: 0.20859124660491943
epoch: 4 training_loss 0.19441667705774307 test_loss: 0.20633161067962646
epoch: 5 training_loss 0.19128655783832074 test_loss: 0.20549569129943848
epoch: 6 training_loss 0.18339201137423516 test_loss: 0.20343692302703859
epoch: 7 training_loss 0.18121401004493237 test_loss: 0.21337006092071534
epoch: 8 training_loss 0.18105785809457303 test_loss: 0.20026960372924804
epoch: 9 training_loss 0.18115597918629647 test_loss: 0.1963166356086731
epoch: 10 training_loss 0.18976485431194307 test_loss: 0.2000272512435913
epoch: 11 training_loss 0.18049417912960053 test_loss: 0.2063446283340454
epoch: 12 training_loss 0.1774800293892622 test_loss: 0.1935748815536499
epoch: 13 training_loss 0.1775947842001915 test_loss: 0.19420163631439208
epoch: 14 training_loss 0.188649090975523 test_loss: 0.20169787406921386
epoch: 15 training_loss 0.17063844196498393 test_loss: 0.19931453466415405
epoch: 16 training_loss 0.17564363077282905 test_loss: 0.19431122541427612
epoch: 17 training_loss 0.17798456810414792 test_loss: 0.1994118332862854
epoch: 18 training_loss 0.17952698409557344 test_loss: 0.19099005460739135
epoch: 19 training_loss 0.1780531644821167 test_loss: 0.2013939142227173
epoch: 20 training_loss 0.17432002499699592 test_loss: 0.19217754602432252
epoch: 21 training_loss 0.16920263566076754 test_loss: 0.18953824043273926
epoch: 22 training_loss 0.17798048935830593 test_loss: 0.19553213119506835
epoch: 23 training_loss 0.17649078890681266 test_loss: 0.1958193898200989
epoch: 24 training_loss 0.17471499055624007 test_loss: 0.19659359455108644
epoch: 25 training_loss 0.18333262249827384 test_loss: 0.19089890718460084
epoch: 26 training_loss 0.16912631548941134 test_loss: 0.20869836807250977
epoch: 27 training_loss 0.16812141455709934 test_loss: 0.1912568688392639
epoch: 28 training_loss 0.17950856924057007 test_loss: 0.2028820753097534
epoch: 29 training_loss 0.17402295589447023 test_loss: 0.20529572963714598
epoch: 30 training_loss 0.17942654177546502 test_loss: 0.2032421588897705
epoch: 31 training_loss 0.17862764582037927 test_loss: 0.1950138807296753
epoch: 32 training_loss 0.17456177085638047 test_loss: 0.21507618427276612
epoch: 33 training_loss 0.17593244731426239 test_loss: 0.20407941341400146
epoch: 34 training_loss 0.1791233955323696 test_loss: 0.19413844347000123
epoch: 35 training_loss 0.171071365326643 test_loss: 0.2135849952697754
epoch: 36 training_loss 0.17005436874926091 test_loss: 0.20450363159179688
epoch: 37 training_loss 0.17629624426364898 test_loss: 0.2248157262802124
epoch: 38 training_loss 0.17738532580435276 test_loss: 0.19827160835266114
epoch: 39 training_loss 0.17744853898882865 test_loss: 0.19428895711898803
epoch: 40 training_loss 0.1730893798917532 test_loss: 0.20972816944122313
epoch: 41 training_loss 0.17212716974318026 test_loss: 0.21404428482055665
epoch: 42 training_loss 0.17084455505013466 test_loss: 0.20117697715759278
epoch: 43 training_loss 0.174749576151371 test_loss: 0.20268721580505372
epoch: 44 training_loss 0.17234830789268016 test_loss: 0.1986111044883728
epoch: 45 training_loss 0.17338349871337413 test_loss: 0.2122708559036255
epoch: 46 training_loss 0.17653818890452386 test_loss: 0.20399787425994872
epoch: 47 training_loss 0.17146934941411018 test_loss: 0.1893852710723877
epoch: 48 training_loss 0.1702251447737217 test_loss: 0.20925934314727784
epoch: 49 training_loss 0.16818088479340076 test_loss: 0.20447771549224852
epoch: 50 training_loss 0.1674617411196232 test_loss: 0.1945682406425476
epoch: 51 training_loss 0.17628982849419117 test_loss: 0.2011958360671997
epoch: 52 training_loss 0.172046158015728 test_loss: 0.18723185062408448
epoch: 53 training_loss 0.1732025421410799 test_loss: 0.1968082904815674
epoch: 54 training_loss 0.16334224421530963 test_loss: 0.20193252563476563
epoch: 55 training_loss 0.1721835132688284 test_loss: 0.20335533618927001
epoch: 56 training_loss 0.16804467007517815 test_loss: 0.18295189142227172
epoch: 57 training_loss 0.17220357343554496 test_loss: 0.1965328574180603
epoch: 58 training_loss 0.16994120441377164 test_loss: 0.19696371555328368
epoch: 59 training_loss 0.17072915986180306 test_loss: 0.18914470672607422
epoch: 60 training_loss 0.1632320275530219 test_loss: 0.1852584719657898
epoch: 61 training_loss 0.16723786406219004 test_loss: 0.2039799451828003
epoch: 62 training_loss 0.1664180938154459 test_loss: 0.2199694871902466
epoch: 63 training_loss 0.1724751617014408 test_loss: 0.20522887706756593
epoch: 64 training_loss 0.1777101031690836 test_loss: 0.19049075841903687
epoch: 65 training_loss 0.16349843442440032 test_loss: 0.20111842155456544
epoch: 66 training_loss 0.16706343196332454 test_loss: 0.18776988983154297
epoch: 67 training_loss 0.1652837086468935 test_loss: 0.2000774621963501
epoch: 68 training_loss 0.16779526725411414 test_loss: 0.1920316219329834
epoch: 69 training_loss 0.16433951850980522 test_loss: 0.19626065492630004
epoch: 70 training_loss 0.16548716925084592 test_loss: 0.21340584754943848
epoch: 71 training_loss 0.1721973180770874 test_loss: 0.20044827461242676
epoch: 72 training_loss 0.1628514514118433 test_loss: 0.19032838344573974
epoch: 73 training_loss 0.16706933006644248 test_loss: 0.19067976474761963
epoch: 74 training_loss 0.17177504613995553 test_loss: 0.19895268678665162
epoch: 75 training_loss 0.17384895436465742 test_loss: 0.1948147177696228
epoch: 76 training_loss 0.17021592050790788 test_loss: 0.20833449363708495
epoch: 77 training_loss 0.1691877591237426 test_loss: 0.19239954948425292
epoch: 78 training_loss 0.16419490210711957 test_loss: 0.1966231107711792
epoch: 79 training_loss 0.16953718706965445 test_loss: 0.1981894850730896
epoch: 80 training_loss 0.17037831358611583 test_loss: 0.20392487049102784
epoch: 81 training_loss 0.174391111060977 test_loss: 0.19524810314178467
epoch: 82 training_loss 0.16512650415301322 test_loss: 0.20980219841003417
epoch: 83 training_loss 0.17070781648159028 test_loss: 0.19580031633377076
epoch: 84 training_loss 0.16831656213849783 test_loss: 0.2034883975982666
epoch: 85 training_loss 0.16842921152710916 test_loss: 0.19272770881652831
epoch: 86 training_loss 0.168070877417922 test_loss: 0.19095945358276367
epoch: 87 training_loss 0.16915768764913083 test_loss: 0.20436110496520996
epoch: 88 training_loss 0.16604924477636815 test_loss: 0.20203373432159424
epoch: 89 training_loss 0.16886532105505467 test_loss: 0.1790832757949829
epoch: 90 training_loss 0.16905909210443495 test_loss: 0.19340083599090577
epoch: 91 training_loss 0.16111557625234127 test_loss: 0.1964900851249695
epoch: 92 training_loss 0.16360684469342232 test_loss: 0.1981012463569641
epoch: 93 training_loss 0.16683021001517773 test_loss: 0.20213873386383058
epoch: 94 training_loss 0.17153119303286077 test_loss: 0.19036340713500977
epoch: 95 training_loss 0.1734603864699602 test_loss: 0.20512106418609619
epoch: 96 training_loss 0.16457027927041054 test_loss: 0.19602197408676147
epoch: 97 training_loss 0.1680049815773964 test_loss: 0.18488422632217408
epoch: 98 training_loss 0.1645616851747036 test_loss: 0.187025785446167
epoch: 99 training_loss 0.16836434219032526 test_loss: 0.1845933675765991
epoch: 100 training_loss 0.16028973057866097 test_loss: 0.1934533953666687
epoch: 101 training_loss 0.16776088386774063 test_loss: 0.19295787811279297
epoch: 102 training_loss 0.1687122020870447 test_loss: 0.20840697288513182
epoch: 103 training_loss 0.17058837845921515 test_loss: 0.20974578857421874
epoch: 104 training_loss 0.1710793237760663 test_loss: 0.19540925025939943
epoch: 105 training_loss 0.16267353542149066 test_loss: 0.2073225975036621
epoch: 106 training_loss 0.16432603612542152 test_loss: 0.2050611972808838
epoch: 107 training_loss 0.15475801169872283 test_loss: 0.2119922161102295
epoch: 108 training_loss 0.16870929762721062 test_loss: 0.19598363637924193
epoch: 109 training_loss 0.17066097989678383 test_loss: 0.19719293117523193
epoch: 110 training_loss 0.17167710937559605 test_loss: 0.19714490175247193
epoch: 111 training_loss 0.1650823836773634 test_loss: 0.19341485500335692
epoch: 112 training_loss 0.17308973297476768 test_loss: 0.18664003610610963
epoch: 113 training_loss 0.1573468628525734 test_loss: 0.19173817634582518
epoch: 114 training_loss 0.16643741190433503 test_loss: 0.1931280255317688
epoch: 115 training_loss 0.15957317546010016 test_loss: 0.2083808183670044
epoch: 116 training_loss 0.1703332420438528 test_loss: 0.19913067817687988
epoch: 117 training_loss 0.17093043223023416 test_loss: 0.20739660263061524
epoch: 118 training_loss 0.1626005021110177 test_loss: 0.1927950143814087
epoch: 119 training_loss 0.16685414746403693 test_loss: 0.1864706039428711
epoch: 120 training_loss 0.16467959493398665 test_loss: 0.20255019664764404
epoch: 121 training_loss 0.16547568544745445 test_loss: 0.19300252199172974
epoch: 122 training_loss 0.1699300245195627 test_loss: 0.2070467472076416
epoch: 123 training_loss 0.16609076224267483 test_loss: 0.19584362506866454
epoch: 124 training_loss 0.1583924788981676 test_loss: 0.19737870693206788
epoch: 125 training_loss 0.1570444220677018 test_loss: 0.19796061515808105
epoch: 126 training_loss 0.1631582786887884 test_loss: 0.19450485706329346
epoch: 127 training_loss 0.1666392721235752 test_loss: 0.18711894750595093
epoch: 128 training_loss 0.1585807003080845 test_loss: 0.21023304462432862
epoch: 129 training_loss 0.16605131156742572 test_loss: 0.18789887428283691
epoch: 130 training_loss 0.15682917669415475 test_loss: 0.19319918155670165
epoch: 131 training_loss 0.16493116177618503 test_loss: 0.2029733657836914
epoch: 132 training_loss 0.16703882068395615 test_loss: 0.20177819728851318
epoch: 133 training_loss 0.1668954899162054 test_loss: 0.18435771465301515
epoch: 134 training_loss 0.15914163380861282 test_loss: 0.20832304954528807
epoch: 135 training_loss 0.16170143142342566 test_loss: 0.19267410039901733
epoch: 136 training_loss 0.15949811711907386 test_loss: 0.1956334352493286
epoch: 137 training_loss 0.15728844188153743 test_loss: 0.18796979188919066
epoch: 138 training_loss 0.16175148375332354 test_loss: 0.1891847848892212
epoch: 139 training_loss 0.16377796940505504 test_loss: 0.18741551637649537
epoch: 140 training_loss 0.15732557781040668 test_loss: 0.1954893708229065
epoch: 141 training_loss 0.15786985874176027 test_loss: 0.2133345603942871
epoch: 142 training_loss 0.16041186973452567 test_loss: 0.19025422334671022
epoch: 143 training_loss 0.16620399981737136 test_loss: 0.2078690528869629
epoch: 144 training_loss 0.15623109094798565 test_loss: 0.20747919082641603
epoch: 145 training_loss 0.1621164947003126 test_loss: 0.1959166407585144
epoch: 146 training_loss 0.16748143829405307 test_loss: 0.20155601501464843
epoch: 147 training_loss 0.1522054486349225 test_loss: 0.208732008934021
epoch: 148 training_loss 0.1586672405153513 test_loss: 0.1989396333694458
epoch: 149 training_loss 0.16513507895171642 test_loss: 0.20043549537658692
epoch: 0 training_loss 8.180643334388733 test_loss: 4.699826049804687
epoch: 1 training_loss 3.8124246549606324 test_loss: 3.0611820220947266
epoch: 2 training_loss 2.6974765610694886 test_loss: 2.270892333984375
epoch: 3 training_loss 2.121871296167374 test_loss: 1.9223646163940429
epoch: 4 training_loss 1.7881766653060913 test_loss: 1.6853242874145509
epoch: 5 training_loss 1.6297162699699401 test_loss: 1.5299348831176758
epoch: 6 training_loss 1.5121020889282226 test_loss: 1.4100900650024415
epoch: 7 training_loss 1.3905401253700256 test_loss: 1.3369417190551758
epoch: 8 training_loss 1.307960135936737 test_loss: 1.2403363227844237
epoch: 9 training_loss 1.2069942331314087 test_loss: 1.193779754638672
epoch: 10 training_loss 1.1792080354690553 test_loss: 1.1395299911499024
epoch: 11 training_loss 1.1174036997556687 test_loss: 1.0903520584106445
epoch: 12 training_loss 1.091983396410942 test_loss: 1.0610936164855957
epoch: 13 training_loss 1.0540990555286407 test_loss: 1.0343976020812988
epoch: 14 training_loss 1.007906324863434 test_loss: 0.9598573684692383
epoch: 15 training_loss 0.9692203933000565 test_loss: 0.9480223655700684
epoch: 16 training_loss 0.9462639498710632 test_loss: 0.9288087844848633
epoch: 17 training_loss 0.9307338833808899 test_loss: 0.9208863258361817
epoch: 18 training_loss 0.9042080354690551 test_loss: 0.8702538490295411
epoch: 19 training_loss 0.887960109114647 test_loss: 0.8959749221801758
epoch: 20 training_loss 0.8792298865318299 test_loss: 0.8545113563537597
epoch: 21 training_loss 0.8487503910064698 test_loss: 0.8616250038146973
epoch: 22 training_loss 0.8442913204431534 test_loss: 0.8631529808044434
epoch: 23 training_loss 0.8116546922922134 test_loss: 0.8419024467468261
epoch: 24 training_loss 0.8278859746456146 test_loss: 0.8158727645874023
epoch: 25 training_loss 0.8118861925601959 test_loss: 0.8150571823120117
epoch: 26 training_loss 0.7900387066602707 test_loss: 0.7999277114868164
epoch: 27 training_loss 0.7874263507127762 test_loss: 0.7674352169036865
epoch: 28 training_loss 0.773687151670456 test_loss: 0.7550107002258301
epoch: 29 training_loss 0.7712455493211746 test_loss: 0.7599972248077392
epoch: 30 training_loss 0.7665624380111694 test_loss: 0.7634906768798828
epoch: 31 training_loss 0.7716084790229797 test_loss: 0.7556713104248047
epoch: 32 training_loss 0.7507547831535339 test_loss: 0.7329480171203613
epoch: 33 training_loss 0.7345474237203597 test_loss: 0.7224589347839355
epoch: 34 training_loss 0.7407443326711655 test_loss: 0.7177520275115967
epoch: 35 training_loss 0.7205810028314591 test_loss: 0.7117564678192139
epoch: 36 training_loss 0.7183582240343094 test_loss: 0.7146289825439454
epoch: 37 training_loss 0.709388962984085 test_loss: 0.6957438468933106
epoch: 38 training_loss 0.698008873462677 test_loss: 0.6882108688354492
epoch: 39 training_loss 0.7061716932058334 test_loss: 0.7037843227386474
epoch: 40 training_loss 0.6988040620088577 test_loss: 0.667723274230957
epoch: 41 training_loss 0.6953657597303391 test_loss: 0.689476728439331
epoch: 42 training_loss 0.6880005276203156 test_loss: 0.6965315818786622
epoch: 43 training_loss 0.6786307245492935 test_loss: 0.6599098205566406
epoch: 44 training_loss 0.6751199561357498 test_loss: 0.6984328746795654
epoch: 45 training_loss 0.6843822330236435 test_loss: 0.682911491394043
epoch: 46 training_loss 0.6542397630214691 test_loss: 0.6537495613098144
epoch: 47 training_loss 0.6565924412012101 test_loss: 0.6601790428161621
epoch: 48 training_loss 0.65258798122406 test_loss: 0.6432258605957031
epoch: 49 training_loss 0.6637323248386383 test_loss: 0.6409353256225586
epoch: 50 training_loss 0.6312343156337739 test_loss: 0.6678255081176758
epoch: 51 training_loss 0.6400262391567231 test_loss: 0.6490729808807373
epoch: 52 training_loss 0.6239977020025254 test_loss: 0.6281136035919189
epoch: 53 training_loss 0.6306981456279754 test_loss: 0.6545260906219482
epoch: 54 training_loss 0.6375982868671417 test_loss: 0.6348897457122803
epoch: 55 training_loss 0.6191596341133118 test_loss: 0.6156543254852295
epoch: 56 training_loss 0.6213161385059357 test_loss: 0.631389570236206
epoch: 57 training_loss 0.6144135689735413 test_loss: 0.6112998962402344
epoch: 58 training_loss 0.6097107702493667 test_loss: 0.6234025001525879
epoch: 59 training_loss 0.6122059369087219 test_loss: 0.5915802478790283
epoch: 60 training_loss 0.6154933279752731 test_loss: 0.5904458045959473
epoch: 61 training_loss 0.6093324321508408 test_loss: 0.5961549282073975
epoch: 62 training_loss 0.5995770817995072 test_loss: 0.6022397518157959
epoch: 63 training_loss 0.5985224241018295 test_loss: 0.6109647274017334
epoch: 64 training_loss 0.5970104777812958 test_loss: 0.5852410793304443
epoch: 65 training_loss 0.5983098751306534 test_loss: 0.5906209945678711
epoch: 66 training_loss 0.5912384223937989 test_loss: 0.5912115097045898
epoch: 67 training_loss 0.5876228344440461 test_loss: 0.6006629467010498
epoch: 68 training_loss 0.5942810648679733 test_loss: 0.5827434062957764
epoch: 69 training_loss 0.5821126440167427 test_loss: 0.5905452728271484
epoch: 70 training_loss 0.5787811183929443 test_loss: 0.5823913097381592
epoch: 71 training_loss 0.5846771651506424 test_loss: 0.5759293079376221
epoch: 72 training_loss 0.5745375978946686 test_loss: 0.5741498470306396
epoch: 73 training_loss 0.5712391185760498 test_loss: 0.5693012237548828
epoch: 74 training_loss 0.589478988647461 test_loss: 0.578315019607544
epoch: 75 training_loss 0.5695554935932159 test_loss: 0.5948897361755371
epoch: 76 training_loss 0.5607814797759056 test_loss: 0.5731300830841064
epoch: 77 training_loss 0.5702328303456307 test_loss: 0.5810202121734619
epoch: 78 training_loss 0.5597305235266685 test_loss: 0.5765832901000977
epoch: 79 training_loss 0.5727567720413208 test_loss: 0.5626060009002686
epoch: 80 training_loss 0.5626504969596863 test_loss: 0.5865680694580078
epoch: 81 training_loss 0.5572215524315834 test_loss: 0.5658530235290528
epoch: 82 training_loss 0.5569785740971566 test_loss: 0.5507050037384034
epoch: 83 training_loss 0.5554460129141807 test_loss: 0.559197998046875
epoch: 84 training_loss 0.5623315492272377 test_loss: 0.5834514617919921
epoch: 85 training_loss 0.5577503573894501 test_loss: 0.5657548427581787
epoch: 86 training_loss 0.5561863127350807 test_loss: 0.5505067825317382
epoch: 87 training_loss 0.551693317592144 test_loss: 0.5496843338012696
epoch: 88 training_loss 0.5515985235571861 test_loss: 0.5389323234558105
epoch: 89 training_loss 0.5429914140701294 test_loss: 0.5548510074615478
epoch: 90 training_loss 0.5396175667643547 test_loss: 0.593686866760254
epoch: 91 training_loss 0.5422314831614494 test_loss: 0.5459829330444336
epoch: 92 training_loss 0.5398243415355682 test_loss: 0.5359714984893799
epoch: 93 training_loss 0.5413740515708924 test_loss: 0.5585761070251465
epoch: 94 training_loss 0.546782459616661 test_loss: 0.5504024505615235
epoch: 95 training_loss 0.5369709107279778 test_loss: 0.5415243148803711
epoch: 96 training_loss 0.5336315104365349 test_loss: 0.5326497554779053
epoch: 97 training_loss 0.5264082726836204 test_loss: 0.5409822463989258
epoch: 98 training_loss 0.5337495020031929 test_loss: 0.5324262142181396
epoch: 99 training_loss 0.5427537778019905 test_loss: 0.5337800979614258
epoch: 100 training_loss 0.5260362347960472 test_loss: 0.5369768142700195
epoch: 101 training_loss 0.5267925503849983 test_loss: 0.5700146198272705
epoch: 102 training_loss 0.5331531658768653 test_loss: 0.5201085090637207
epoch: 103 training_loss 0.52890001475811 test_loss: 0.5247668266296387
epoch: 104 training_loss 0.5260090199112892 test_loss: 0.5262927055358887
epoch: 105 training_loss 0.5236646625399589 test_loss: 0.5244564056396485
epoch: 106 training_loss 0.5306414693593979 test_loss: 0.5201874256134034
epoch: 107 training_loss 0.5279986953735352 test_loss: 0.5233776092529296
epoch: 108 training_loss 0.522478129863739 test_loss: 0.5222498416900635
epoch: 109 training_loss 0.5160467299818993 test_loss: 0.5325814723968506
epoch: 110 training_loss 0.5143567654490471 test_loss: 0.5136945724487305
epoch: 111 training_loss 0.5218396872282028 test_loss: 0.532798433303833
epoch: 112 training_loss 0.5059265151619912 test_loss: 0.5137073993682861
epoch: 113 training_loss 0.514886973798275 test_loss: 0.5225701808929444
epoch: 114 training_loss 0.5269896176457405 test_loss: 0.5392299175262452
epoch: 115 training_loss 0.5153352898359299 test_loss: 0.534790325164795
epoch: 116 training_loss 0.5236862856149673 test_loss: 0.5135930061340332
epoch: 117 training_loss 0.5127001690864563 test_loss: 0.5244925498962403
epoch: 118 training_loss 0.5045288300514221 test_loss: 0.518953275680542
epoch: 119 training_loss 0.5104439806938171 test_loss: 0.5254555702209472
epoch: 120 training_loss 0.508625541627407 test_loss: 0.5084868907928467
epoch: 121 training_loss 0.5128917959332466 test_loss: 0.5118935585021973
epoch: 122 training_loss 0.5108056101202965 test_loss: 0.5052630424499511
epoch: 123 training_loss 0.502441825568676 test_loss: 0.49501748085021974
epoch: 124 training_loss 0.4998543152213097 test_loss: 0.5113969802856445
epoch: 125 training_loss 0.5057933834195137 test_loss: 0.517194128036499
epoch: 126 training_loss 0.4970866194367409 test_loss: 0.49753870964050295
epoch: 127 training_loss 0.5050067391991615 test_loss: 0.506170654296875
epoch: 128 training_loss 0.5064427900314331 test_loss: 0.5036179542541503
epoch: 129 training_loss 0.4919867497682571 test_loss: 0.5355098724365235
epoch: 130 training_loss 0.4992584404349327 test_loss: 0.49149255752563475
epoch: 131 training_loss 0.49946473300457 test_loss: 0.4941986560821533
epoch: 132 training_loss 0.4924482426047325 test_loss: 0.4949207305908203
epoch: 133 training_loss 0.48923475086688994 test_loss: 0.49674367904663086
epoch: 134 training_loss 0.4987559613585472 test_loss: 0.5526583194732666
epoch: 135 training_loss 0.4948702707886696 test_loss: 0.5008617401123047
epoch: 136 training_loss 0.4884281724691391 test_loss: 0.5002414226531983
epoch: 137 training_loss 0.49277040868997574 test_loss: 0.5301918029785156
epoch: 138 training_loss 0.4910923874378204 test_loss: 0.501447057723999
epoch: 139 training_loss 0.4945248967409134 test_loss: 0.4894232749938965
epoch: 140 training_loss 0.4887442123889923 test_loss: 0.4916989803314209
epoch: 141 training_loss 0.48647869378328323 test_loss: 0.4903258323669434
epoch: 142 training_loss 0.4925220441818237 test_loss: 0.48982882499694824
epoch: 143 training_loss 0.49934181094169616 test_loss: 0.5055880069732666
epoch: 144 training_loss 0.4910809552669525 test_loss: 0.48480820655822754
epoch: 145 training_loss 0.4925435993075371 test_loss: 0.4850333213806152
epoch: 146 training_loss 0.48280820310115813 test_loss: 0.5049611091613769
epoch: 147 training_loss 0.4941451448202133 test_loss: 0.49134087562561035
epoch: 148 training_loss 0.4806459441781044 test_loss: 0.4877814769744873
epoch: 149 training_loss 0.48422993868589403 test_loss: 0.48881282806396487
2305.8310275463655
episode: 0 training return: tensor(-261.0514, device='cuda:0')
episode: 1 training return: tensor(-339.5308, device='cuda:0')
episode: 2 training return: tensor(-231.0292, device='cuda:0')
episode: 3 training return: tensor(-323.7555, device='cuda:0')
epoch: 1 test_true_pfm: 2188.125413859017 sim_pfm: -56.3888721507877
episode: 4 training return: tensor(-246.3581, device='cuda:0')
episode: 5 training return: tensor(39.8246, device='cuda:0')
episode: 6 training return: tensor(-367.4017, device='cuda:0')
episode: 7 training return: tensor(-67.9886, device='cuda:0')
epoch: 2 test_true_pfm: 2370.992053191668 sim_pfm: -288.1091828651649
episode: 8 training return: tensor(-217.7564, device='cuda:0')
episode: 9 training return: tensor(-327.8484, device='cuda:0')
episode: 10 training return: tensor(-336.6968, device='cuda:0')
episode: 11 training return: tensor(-134.3733, device='cuda:0')
epoch: 3 test_true_pfm: 1746.8802042847747 sim_pfm: -164.2124709725807
episode: 12 training return: tensor(-360.9040, device='cuda:0')
episode: 13 training return: tensor(-279.6957, device='cuda:0')
episode: 14 training return: tensor(-367.5822, device='cuda:0')
episode: 15 training return: tensor(-367.2696, device='cuda:0')
epoch: 4 test_true_pfm: 1347.9487413118115 sim_pfm: -122.35552299012973
episode: 16 training return: tensor(155.0399, device='cuda:0')
episode: 17 training return: tensor(30.5721, device='cuda:0')
episode: 18 training return: tensor(-284.1445, device='cuda:0')
episode: 19 training return: tensor(-356.8413, device='cuda:0')
epoch: 5 test_true_pfm: 1819.3210194285966 sim_pfm: 5.7645482534426264
episode: 20 training return: tensor(-27.8650, device='cuda:0')
episode: 21 training return: tensor(-256.0209, device='cuda:0')
episode: 22 training return: tensor(-299.4959, device='cuda:0')
episode: 23 training return: tensor(40.9611, device='cuda:0')
epoch: 6 test_true_pfm: 2437.9325943143526 sim_pfm: -143.117301740606
episode: 24 training return: tensor(-151.7056, device='cuda:0')
episode: 25 training return: tensor(-136.6617, device='cuda:0')
episode: 26 training return: tensor(-369.5995, device='cuda:0')
episode: 27 training return: tensor(155.2573, device='cuda:0')
epoch: 7 test_true_pfm: 2090.889781575877 sim_pfm: 51.67434619431151
episode: 28 training return: tensor(234.5099, device='cuda:0')
episode: 29 training return: tensor(-325.3432, device='cuda:0')
episode: 30 training return: tensor(-367.7589, device='cuda:0')
episode: 31 training return: tensor(-270.9315, device='cuda:0')
epoch: 8 test_true_pfm: 2615.7840918821894 sim_pfm: -78.79903635435039
episode: 32 training return: tensor(238.9182, device='cuda:0')
episode: 33 training return: tensor(-276.4540, device='cuda:0')
episode: 34 training return: tensor(-327.6810, device='cuda:0')
episode: 35 training return: tensor(-126.1150, device='cuda:0')
epoch: 9 test_true_pfm: 2051.2853976606434 sim_pfm: -275.49761018215213
episode: 36 training return: tensor(263.7971, device='cuda:0')
episode: 37 training return: tensor(58.3902, device='cuda:0')
episode: 38 training return: tensor(-167.0516, device='cuda:0')
episode: 39 training return: tensor(29.4059, device='cuda:0')
epoch: 10 test_true_pfm: 1711.8656621760263 sim_pfm: -47.460186450975016
episode: 40 training return: tensor(-261.2447, device='cuda:0')
episode: 41 training return: tensor(-354.3410, device='cuda:0')
episode: 42 training return: tensor(-196.4023, device='cuda:0')
episode: 43 training return: tensor(-43.6218, device='cuda:0')
epoch: 11 test_true_pfm: 2658.6952324855843 sim_pfm: 23.59097375983644
episode: 44 training return: tensor(-368.8879, device='cuda:0')
episode: 45 training return: tensor(-331.9531, device='cuda:0')
episode: 46 training return: tensor(-221.3615, device='cuda:0')
episode: 47 training return: tensor(-297.9317, device='cuda:0')
epoch: 12 test_true_pfm: 2145.6001231924542 sim_pfm: -131.01832040564236
episode: 48 training return: tensor(-110.6069, device='cuda:0')
episode: 49 training return: tensor(-332.0981, device='cuda:0')
episode: 50 training return: tensor(-117.4730, device='cuda:0')
episode: 51 training return: tensor(-41.1899, device='cuda:0')
epoch: 13 test_true_pfm: 2513.031955426853 sim_pfm: 91.49699357939728
episode: 52 training return: tensor(140.4236, device='cuda:0')
episode: 53 training return: tensor(-8.1315, device='cuda:0')
episode: 54 training return: tensor(-277.1959, device='cuda:0')
episode: 55 training return: tensor(221.8547, device='cuda:0')
epoch: 14 test_true_pfm: 2595.1669723148293 sim_pfm: -28.78949166702417
episode: 56 training return: tensor(-319.6189, device='cuda:0')
episode: 57 training return: tensor(-299.7954, device='cuda:0')
episode: 58 training return: tensor(-322.1822, device='cuda:0')
episode: 59 training return: tensor(-276.4287, device='cuda:0')
epoch: 15 test_true_pfm: 1619.7351240477094 sim_pfm: 110.413772440108
episode: 60 training return: tensor(-333.8809, device='cuda:0')
episode: 61 training return: tensor(-212.7424, device='cuda:0')
episode: 62 training return: tensor(-280.2642, device='cuda:0')
episode: 63 training return: tensor(-273.8648, device='cuda:0')
epoch: 16 test_true_pfm: 2018.6068737302787 sim_pfm: -68.16064988736373
episode: 64 training return: tensor(-200.0315, device='cuda:0')
episode: 65 training return: tensor(-267.5786, device='cuda:0')
episode: 66 training return: tensor(-268.6531, device='cuda:0')
episode: 67 training return: tensor(219.1277, device='cuda:0')
epoch: 17 test_true_pfm: 2057.9956995289285 sim_pfm: -56.18188024366585
episode: 68 training return: tensor(245.8785, device='cuda:0')
episode: 69 training return: tensor(-112.3969, device='cuda:0')
episode: 70 training return: tensor(-293.1678, device='cuda:0')
episode: 71 training return: tensor(153.0076, device='cuda:0')
epoch: 18 test_true_pfm: 1691.734945081838 sim_pfm: 77.77654149348382
episode: 72 training return: tensor(67.6116, device='cuda:0')
episode: 73 training return: tensor(-25.1674, device='cuda:0')
episode: 74 training return: tensor(258.3963, device='cuda:0')
episode: 75 training return: tensor(97.3977, device='cuda:0')
epoch: 19 test_true_pfm: 2652.7245628193455 sim_pfm: -141.19292705442058
episode: 76 training return: tensor(-278.3248, device='cuda:0')
episode: 77 training return: tensor(-318.0830, device='cuda:0')
episode: 78 training return: tensor(-207.0578, device='cuda:0')
episode: 79 training return: tensor(-295.8568, device='cuda:0')
epoch: 20 test_true_pfm: 2624.6216667448366 sim_pfm: -153.74617077941852
episode: 80 training return: tensor(82.6557, device='cuda:0')
episode: 81 training return: tensor(251.3944, device='cuda:0')
episode: 82 training return: tensor(-263.9816, device='cuda:0')
episode: 83 training return: tensor(-265.1614, device='cuda:0')
epoch: 21 test_true_pfm: 1862.4824914647925 sim_pfm: -67.21604136715177
episode: 84 training return: tensor(34.8087, device='cuda:0')
episode: 85 training return: tensor(41.3116, device='cuda:0')
episode: 86 training return: tensor(109.4233, device='cuda:0')
episode: 87 training return: tensor(-75.2194, device='cuda:0')
epoch: 22 test_true_pfm: 2080.058637071718 sim_pfm: 86.39811532853248
episode: 88 training return: tensor(-226.8734, device='cuda:0')
episode: 89 training return: tensor(202.2929, device='cuda:0')
episode: 90 training return: tensor(-238.7387, device='cuda:0')
episode: 91 training return: tensor(14.1853, device='cuda:0')
epoch: 23 test_true_pfm: 2958.600759807596 sim_pfm: 90.49105392673907
episode: 92 training return: tensor(-275.4159, device='cuda:0')
episode: 93 training return: tensor(-350.3995, device='cuda:0')
episode: 94 training return: tensor(-167.3485, device='cuda:0')
episode: 95 training return: tensor(77.2507, device='cuda:0')
epoch: 24 test_true_pfm: 2260.1643160931553 sim_pfm: -21.2060055945185
episode: 96 training return: tensor(-50.0257, device='cuda:0')
episode: 97 training return: tensor(225.6668, device='cuda:0')
episode: 98 training return: tensor(42.1315, device='cuda:0')
episode: 99 training return: tensor(-325.5043, device='cuda:0')
epoch: 25 test_true_pfm: 3207.7545444702882 sim_pfm: -200.25132330772854
episode: 100 training return: tensor(-386.3685, device='cuda:0')
episode: 101 training return: tensor(41.6364, device='cuda:0')
episode: 102 training return: tensor(-266.5558, device='cuda:0')
episode: 103 training return: tensor(-308.5612, device='cuda:0')
epoch: 26 test_true_pfm: 2381.8043103333484 sim_pfm: 202.9166940569994
episode: 104 training return: tensor(-31.2726, device='cuda:0')
episode: 105 training return: tensor(-209.8447, device='cuda:0')
episode: 106 training return: tensor(67.2050, device='cuda:0')
episode: 107 training return: tensor(141.1509, device='cuda:0')
epoch: 27 test_true_pfm: 2083.440446612885 sim_pfm: -33.789901147092074
episode: 108 training return: tensor(258.7628, device='cuda:0')
episode: 109 training return: tensor(-326.1764, device='cuda:0')
episode: 110 training return: tensor(22.7895, device='cuda:0')
episode: 111 training return: tensor(85.7475, device='cuda:0')
epoch: 28 test_true_pfm: 2573.4478356288873 sim_pfm: -104.3903475474314
episode: 112 training return: tensor(-325.3247, device='cuda:0')
episode: 113 training return: tensor(23.6949, device='cuda:0')
episode: 114 training return: tensor(-234.4757, device='cuda:0')
episode: 115 training return: tensor(-268.7191, device='cuda:0')
epoch: 29 test_true_pfm: 2390.99130114329 sim_pfm: -123.37735489032154
episode: 116 training return: tensor(-212.1679, device='cuda:0')
episode: 117 training return: tensor(-241.6852, device='cuda:0')
episode: 118 training return: tensor(165.9728, device='cuda:0')
episode: 119 training return: tensor(229.8211, device='cuda:0')
epoch: 30 test_true_pfm: 1546.2457707706988 sim_pfm: -188.1959176782208
episode: 120 training return: tensor(-270.9534, device='cuda:0')
episode: 121 training return: tensor(-274.0140, device='cuda:0')
episode: 122 training return: tensor(-69.0512, device='cuda:0')
episode: 123 training return: tensor(-313.5313, device='cuda:0')
epoch: 31 test_true_pfm: 2103.131017011711 sim_pfm: 136.98904060645145
episode: 124 training return: tensor(-276.2442, device='cuda:0')
episode: 125 training return: tensor(99.3762, device='cuda:0')
episode: 126 training return: tensor(228.9908, device='cuda:0')
episode: 127 training return: tensor(-324.7811, device='cuda:0')
epoch: 32 test_true_pfm: 3061.368097616334 sim_pfm: 218.92395663794014
episode: 128 training return: tensor(280.5846, device='cuda:0')
episode: 129 training return: tensor(-303.1843, device='cuda:0')
episode: 130 training return: tensor(-223.0136, device='cuda:0')
episode: 131 training return: tensor(97.6520, device='cuda:0')
epoch: 33 test_true_pfm: 2175.2662692423023 sim_pfm: -267.0708915894502
episode: 132 training return: tensor(-124.3782, device='cuda:0')
episode: 133 training return: tensor(-280.1491, device='cuda:0')
episode: 134 training return: tensor(-333.2580, device='cuda:0')
episode: 135 training return: tensor(-340.3988, device='cuda:0')
epoch: 34 test_true_pfm: 2219.2698037542445 sim_pfm: -175.5040195331676
episode: 136 training return: tensor(-241.4913, device='cuda:0')
episode: 137 training return: tensor(-329.0582, device='cuda:0')
episode: 138 training return: tensor(-61.0106, device='cuda:0')
episode: 139 training return: tensor(-299.7958, device='cuda:0')
epoch: 35 test_true_pfm: 3075.7332421070582 sim_pfm: -21.523042767774314
episode: 140 training return: tensor(-209.0506, device='cuda:0')
episode: 141 training return: tensor(-164.7445, device='cuda:0')
episode: 142 training return: tensor(-320.0604, device='cuda:0')
episode: 143 training return: tensor(259.2150, device='cuda:0')
epoch: 36 test_true_pfm: 1904.1454176780533 sim_pfm: 25.726970754913054
episode: 144 training return: tensor(-264.1423, device='cuda:0')
episode: 145 training return: tensor(-222.8089, device='cuda:0')
episode: 146 training return: tensor(117.5127, device='cuda:0')
episode: 147 training return: tensor(-109.4796, device='cuda:0')
epoch: 37 test_true_pfm: 2694.014085254311 sim_pfm: -166.42373993477668
episode: 148 training return: tensor(37.7929, device='cuda:0')
episode: 149 training return: tensor(-277.9951, device='cuda:0')
episode: 150 training return: tensor(-4.3499, device='cuda:0')
episode: 151 training return: tensor(-155.3303, device='cuda:0')
epoch: 38 test_true_pfm: 2471.9379793409694 sim_pfm: -88.5755340392546
episode: 152 training return: tensor(-267.5540, device='cuda:0')
episode: 153 training return: tensor(-251.1748, device='cuda:0')
episode: 154 training return: tensor(232.1194, device='cuda:0')
episode: 155 training return: tensor(52.7968, device='cuda:0')
epoch: 39 test_true_pfm: 1719.1020919225457 sim_pfm: -227.64089686027728
episode: 156 training return: tensor(-11.6964, device='cuda:0')
episode: 157 training return: tensor(-139.2631, device='cuda:0')
episode: 158 training return: tensor(-267.8362, device='cuda:0')
episode: 159 training return: tensor(-269.8791, device='cuda:0')
epoch: 40 test_true_pfm: 2732.400783403351 sim_pfm: 57.75439039323828
episode: 160 training return: tensor(-279.4665, device='cuda:0')
episode: 161 training return: tensor(164.2483, device='cuda:0')
episode: 162 training return: tensor(-126.8260, device='cuda:0')
episode: 163 training return: tensor(-273.4895, device='cuda:0')
epoch: 41 test_true_pfm: 2484.5541458828784 sim_pfm: -14.481605796240425
episode: 164 training return: tensor(-329.0219, device='cuda:0')
episode: 165 training return: tensor(231.8805, device='cuda:0')
episode: 166 training return: tensor(-291.2260, device='cuda:0')
episode: 167 training return: tensor(-266.3823, device='cuda:0')
epoch: 42 test_true_pfm: 2952.2904700162817 sim_pfm: -22.42492249341255
episode: 168 training return: tensor(205.2989, device='cuda:0')
episode: 169 training return: tensor(64.5887, device='cuda:0')
episode: 170 training return: tensor(-372.7661, device='cuda:0')
episode: 171 training return: tensor(-134.9929, device='cuda:0')
epoch: 43 test_true_pfm: 2851.1064895082222 sim_pfm: 40.28853901725961
episode: 172 training return: tensor(-266.6853, device='cuda:0')
episode: 173 training return: tensor(-191.5448, device='cuda:0')
episode: 174 training return: tensor(-7.8560, device='cuda:0')
episode: 175 training return: tensor(-226.8139, device='cuda:0')
epoch: 44 test_true_pfm: 2399.4449450378147 sim_pfm: 209.6809754402881
episode: 176 training return: tensor(-355.9402, device='cuda:0')
episode: 177 training return: tensor(122.9175, device='cuda:0')
episode: 178 training return: tensor(-279.4683, device='cuda:0')
episode: 179 training return: tensor(-33.1164, device='cuda:0')
epoch: 45 test_true_pfm: 3133.3396447688942 sim_pfm: 156.38689096176918
episode: 180 training return: tensor(-237.7996, device='cuda:0')
episode: 181 training return: tensor(41.7101, device='cuda:0')
episode: 182 training return: tensor(-276.5219, device='cuda:0')
episode: 183 training return: tensor(17.5799, device='cuda:0')
epoch: 46 test_true_pfm: 2591.701880205354 sim_pfm: 118.77591848008645
episode: 184 training return: tensor(-309.9929, device='cuda:0')
episode: 185 training return: tensor(-99.6252, device='cuda:0')
episode: 186 training return: tensor(-247.7107, device='cuda:0')
episode: 187 training return: tensor(-332.3484, device='cuda:0')
epoch: 47 test_true_pfm: 1982.0207646092895 sim_pfm: -217.48845581487208
episode: 188 training return: tensor(-275.4338, device='cuda:0')
episode: 189 training return: tensor(-361.0817, device='cuda:0')
episode: 190 training return: tensor(-146.7735, device='cuda:0')
episode: 191 training return: tensor(-290.0316, device='cuda:0')
epoch: 48 test_true_pfm: 2148.7221749333203 sim_pfm: -79.79551217892246
episode: 192 training return: tensor(-222.3369, device='cuda:0')
episode: 193 training return: tensor(262.6616, device='cuda:0')
episode: 194 training return: tensor(84.0980, device='cuda:0')
episode: 195 training return: tensor(123.9748, device='cuda:0')
epoch: 49 test_true_pfm: 2488.7875758566774 sim_pfm: 161.7277722833484
episode: 196 training return: tensor(47.2501, device='cuda:0')
episode: 197 training return: tensor(-17.3445, device='cuda:0')
episode: 198 training return: tensor(264.0566, device='cuda:0')
episode: 199 training return: tensor(-256.0306, device='cuda:0')
epoch: 50 test_true_pfm: 2671.5475923758827 sim_pfm: 142.32581463757865
episode: 200 training return: tensor(85.3634, device='cuda:0')
episode: 201 training return: tensor(46.1830, device='cuda:0')
episode: 202 training return: tensor(-224.6102, device='cuda:0')
episode: 203 training return: tensor(-269.0146, device='cuda:0')
epoch: 51 test_true_pfm: 2548.977546638316 sim_pfm: -69.64707021178522
episode: 204 training return: tensor(221.0895, device='cuda:0')
episode: 205 training return: tensor(113.7995, device='cuda:0')
episode: 206 training return: tensor(235.6074, device='cuda:0')
episode: 207 training return: tensor(-318.7877, device='cuda:0')
epoch: 52 test_true_pfm: 2444.6626309725484 sim_pfm: -80.62953016802203
episode: 208 training return: tensor(226.0369, device='cuda:0')
episode: 209 training return: tensor(198.0521, device='cuda:0')
episode: 210 training return: tensor(-102.3736, device='cuda:0')
episode: 211 training return: tensor(222.8496, device='cuda:0')
epoch: 53 test_true_pfm: 2757.1961888494407 sim_pfm: -29.508832281765837
episode: 212 training return: tensor(230.2370, device='cuda:0')
episode: 213 training return: tensor(-256.5674, device='cuda:0')
episode: 214 training return: tensor(54.3522, device='cuda:0')
episode: 215 training return: tensor(-290.5679, device='cuda:0')
epoch: 54 test_true_pfm: 2626.954378949989 sim_pfm: -14.657365674094763
episode: 216 training return: tensor(-283.9256, device='cuda:0')
episode: 217 training return: tensor(242.9690, device='cuda:0')
episode: 218 training return: tensor(-297.0187, device='cuda:0')
episode: 219 training return: tensor(-339.9728, device='cuda:0')
epoch: 55 test_true_pfm: 2833.748838731209 sim_pfm: 17.88104014773853
episode: 220 training return: tensor(142.2527, device='cuda:0')
episode: 221 training return: tensor(69.9682, device='cuda:0')
episode: 222 training return: tensor(-22.0706, device='cuda:0')
episode: 223 training return: tensor(-112.2263, device='cuda:0')
epoch: 56 test_true_pfm: 2320.162269247718 sim_pfm: -78.17970029254987
episode: 224 training return: tensor(-272.0033, device='cuda:0')
episode: 225 training return: tensor(173.0052, device='cuda:0')
episode: 226 training return: tensor(-130.3836, device='cuda:0')
episode: 227 training return: tensor(-135.8025, device='cuda:0')
epoch: 57 test_true_pfm: 2565.2870365645595 sim_pfm: 54.627071573981084
episode: 228 training return: tensor(196.4852, device='cuda:0')
episode: 229 training return: tensor(-266.6826, device='cuda:0')
episode: 230 training return: tensor(219.1890, device='cuda:0')
episode: 231 training return: tensor(-14.4400, device='cuda:0')
epoch: 58 test_true_pfm: 2892.7566430099428 sim_pfm: 204.71111189867952
episode: 232 training return: tensor(207.5048, device='cuda:0')
episode: 233 training return: tensor(-26.1332, device='cuda:0')
episode: 234 training return: tensor(277.8560, device='cuda:0')
episode: 235 training return: tensor(-214.7348, device='cuda:0')
epoch: 59 test_true_pfm: 3156.624125930192 sim_pfm: 25.938287449534982
episode: 236 training return: tensor(226.2361, device='cuda:0')
episode: 237 training return: tensor(-373.8844, device='cuda:0')
episode: 238 training return: tensor(-266.9669, device='cuda:0')
episode: 239 training return: tensor(-263.8278, device='cuda:0')
epoch: 60 test_true_pfm: 2679.5109106659634 sim_pfm: 41.14884252190435
episode: 240 training return: tensor(166.9323, device='cuda:0')
episode: 241 training return: tensor(-321.0105, device='cuda:0')
episode: 242 training return: tensor(-68.6657, device='cuda:0')
episode: 243 training return: tensor(-7.6731, device='cuda:0')
epoch: 61 test_true_pfm: 2919.695381810144 sim_pfm: 66.38551750443487
episode: 244 training return: tensor(-132.0366, device='cuda:0')
episode: 245 training return: tensor(-263.7616, device='cuda:0')
episode: 246 training return: tensor(232.8436, device='cuda:0')
episode: 247 training return: tensor(203.5339, device='cuda:0')
epoch: 62 test_true_pfm: 2921.72097277247 sim_pfm: 213.01222941296874
episode: 248 training return: tensor(233.8603, device='cuda:0')
episode: 249 training return: tensor(-210.2185, device='cuda:0')
episode: 250 training return: tensor(-268.8544, device='cuda:0')
episode: 251 training return: tensor(-276.8112, device='cuda:0')
epoch: 63 test_true_pfm: 2760.238792126198 sim_pfm: 116.64059096460308
episode: 252 training return: tensor(-238.3195, device='cuda:0')
episode: 253 training return: tensor(155.6294, device='cuda:0')
episode: 254 training return: tensor(196.2548, device='cuda:0')
episode: 255 training return: tensor(-200.0376, device='cuda:0')
epoch: 64 test_true_pfm: 2380.7083655373317 sim_pfm: 90.32953233679291
episode: 256 training return: tensor(-108.2735, device='cuda:0')
episode: 257 training return: tensor(-373.3456, device='cuda:0')
episode: 258 training return: tensor(254.6073, device='cuda:0')
episode: 259 training return: tensor(175.5949, device='cuda:0')
epoch: 65 test_true_pfm: 3261.581884183402 sim_pfm: 63.02951974570169
episode: 260 training return: tensor(-113.5625, device='cuda:0')
episode: 261 training return: tensor(194.0209, device='cuda:0')
episode: 262 training return: tensor(270.2626, device='cuda:0')
episode: 263 training return: tensor(-272.0389, device='cuda:0')
epoch: 66 test_true_pfm: 3092.615004565765 sim_pfm: 47.96909024157018
episode: 264 training return: tensor(110.2704, device='cuda:0')
episode: 265 training return: tensor(-101.9730, device='cuda:0')
episode: 266 training return: tensor(-250.1117, device='cuda:0')
episode: 267 training return: tensor(-190.8285, device='cuda:0')
epoch: 67 test_true_pfm: 1995.84172807748 sim_pfm: 103.48525370225737
episode: 268 training return: tensor(-365.3026, device='cuda:0')
episode: 269 training return: tensor(-260.9503, device='cuda:0')
episode: 270 training return: tensor(-6.5550, device='cuda:0')
episode: 271 training return: tensor(203.8913, device='cuda:0')
epoch: 68 test_true_pfm: 2626.1681345181346 sim_pfm: -165.7439843689984
episode: 272 training return: tensor(211.9994, device='cuda:0')
episode: 273 training return: tensor(-346.9164, device='cuda:0')
episode: 274 training return: tensor(220.7478, device='cuda:0')
episode: 275 training return: tensor(208.9801, device='cuda:0')
epoch: 69 test_true_pfm: 2720.4323634790376 sim_pfm: 168.47542377651553
episode: 276 training return: tensor(280.0508, device='cuda:0')
episode: 277 training return: tensor(12.3939, device='cuda:0')
episode: 278 training return: tensor(228.7641, device='cuda:0')
episode: 279 training return: tensor(-240.8184, device='cuda:0')
epoch: 70 test_true_pfm: 2501.0880825715262 sim_pfm: -20.404021412551327
episode: 280 training return: tensor(-276.3429, device='cuda:0')
episode: 281 training return: tensor(-283.5412, device='cuda:0')
episode: 282 training return: tensor(-21.3060, device='cuda:0')
episode: 283 training return: tensor(-308.9647, device='cuda:0')
epoch: 71 test_true_pfm: 2697.6376107946467 sim_pfm: 171.77904959424632
episode: 284 training return: tensor(-372.7270, device='cuda:0')
episode: 285 training return: tensor(-251.9770, device='cuda:0')
episode: 286 training return: tensor(-118.4397, device='cuda:0')
episode: 287 training return: tensor(-268.8125, device='cuda:0')
epoch: 72 test_true_pfm: 1963.2304714336772 sim_pfm: 138.6510053431072
episode: 288 training return: tensor(245.1286, device='cuda:0')
episode: 289 training return: tensor(184.5250, device='cuda:0')
episode: 290 training return: tensor(120.1551, device='cuda:0')
episode: 291 training return: tensor(-62.1318, device='cuda:0')
epoch: 73 test_true_pfm: 2831.686953568196 sim_pfm: 54.787817049432
episode: 292 training return: tensor(-318.5840, device='cuda:0')
episode: 293 training return: tensor(-267.9810, device='cuda:0')
episode: 294 training return: tensor(-268.1822, device='cuda:0')
episode: 295 training return: tensor(-234.9533, device='cuda:0')
epoch: 74 test_true_pfm: 2962.2962048759505 sim_pfm: 50.541655875975266
episode: 296 training return: tensor(74.5398, device='cuda:0')
episode: 297 training return: tensor(38.3750, device='cuda:0')
episode: 298 training return: tensor(22.8520, device='cuda:0')
episode: 299 training return: tensor(-29.0912, device='cuda:0')
epoch: 75 test_true_pfm: 2661.062741677826 sim_pfm: 128.0393810857107
episode: 300 training return: tensor(139.5183, device='cuda:0')
episode: 301 training return: tensor(-354.3035, device='cuda:0')
episode: 302 training return: tensor(118.4234, device='cuda:0')
episode: 303 training return: tensor(307.4723, device='cuda:0')
epoch: 76 test_true_pfm: 2668.307309220469 sim_pfm: -7.312866669031791
episode: 304 training return: tensor(-240.8935, device='cuda:0')
episode: 305 training return: tensor(117.5990, device='cuda:0')
episode: 306 training return: tensor(245.7784, device='cuda:0')
episode: 307 training return: tensor(-265.0471, device='cuda:0')
epoch: 77 test_true_pfm: 2571.4915861788177 sim_pfm: 78.2132415145946
episode: 308 training return: tensor(62.8092, device='cuda:0')
episode: 309 training return: tensor(161.3424, device='cuda:0')
episode: 310 training return: tensor(131.5763, device='cuda:0')
episode: 311 training return: tensor(285.5425, device='cuda:0')
epoch: 78 test_true_pfm: 3098.470296037973 sim_pfm: 73.95091887866147
episode: 312 training return: tensor(-269.7414, device='cuda:0')
episode: 313 training return: tensor(196.9423, device='cuda:0')
episode: 314 training return: tensor(-46.1313, device='cuda:0')
episode: 315 training return: tensor(-228.4980, device='cuda:0')
epoch: 79 test_true_pfm: 2700.7471631914264 sim_pfm: 82.70347884636915
episode: 316 training return: tensor(-17.3452, device='cuda:0')
episode: 317 training return: tensor(199.3179, device='cuda:0')
episode: 318 training return: tensor(-243.2627, device='cuda:0')
episode: 319 training return: tensor(-245.8567, device='cuda:0')
epoch: 80 test_true_pfm: 2404.5019174083027 sim_pfm: 27.96968719305005
episode: 320 training return: tensor(-215.4872, device='cuda:0')
episode: 321 training return: tensor(-177.2692, device='cuda:0')
episode: 322 training return: tensor(-371.5165, device='cuda:0')
episode: 323 training return: tensor(221.5345, device='cuda:0')
epoch: 81 test_true_pfm: 2470.7676175488414 sim_pfm: -60.93906712939497
episode: 324 training return: tensor(-266.8720, device='cuda:0')
episode: 325 training return: tensor(-216.2589, device='cuda:0')
episode: 326 training return: tensor(-229.5158, device='cuda:0')
episode: 327 training return: tensor(-281.2396, device='cuda:0')
epoch: 82 test_true_pfm: 2603.538900632268 sim_pfm: 57.197126671739774
episode: 328 training return: tensor(156.6173, device='cuda:0')
episode: 329 training return: tensor(-161.4353, device='cuda:0')
episode: 330 training return: tensor(-342.9587, device='cuda:0')
episode: 331 training return: tensor(93.2127, device='cuda:0')
epoch: 83 test_true_pfm: 2654.3115703024596 sim_pfm: 167.9700138461
episode: 332 training return: tensor(272.9897, device='cuda:0')
episode: 333 training return: tensor(-243.2676, device='cuda:0')
episode: 334 training return: tensor(305.0359, device='cuda:0')
episode: 335 training return: tensor(-322.9507, device='cuda:0')
epoch: 84 test_true_pfm: 2630.3166471432555 sim_pfm: 13.58408055109127
episode: 336 training return: tensor(-316.4029, device='cuda:0')
episode: 337 training return: tensor(-234.9946, device='cuda:0')
episode: 338 training return: tensor(-296.2153, device='cuda:0')
episode: 339 training return: tensor(127.4963, device='cuda:0')
epoch: 85 test_true_pfm: 2912.002002868609 sim_pfm: 204.03345863913032
episode: 340 training return: tensor(-213.2287, device='cuda:0')
episode: 341 training return: tensor(-55.9915, device='cuda:0')
episode: 342 training return: tensor(-32.0374, device='cuda:0')
episode: 343 training return: tensor(-104.6438, device='cuda:0')
epoch: 86 test_true_pfm: 2474.2180606542424 sim_pfm: 80.80819645698648
episode: 344 training return: tensor(211.9169, device='cuda:0')
episode: 345 training return: tensor(-240.4882, device='cuda:0')
episode: 346 training return: tensor(14.9439, device='cuda:0')
episode: 347 training return: tensor(-237.6390, device='cuda:0')
epoch: 87 test_true_pfm: 2732.349587911438 sim_pfm: 192.79082652548095
episode: 348 training return: tensor(158.5466, device='cuda:0')
episode: 349 training return: tensor(-220.7071, device='cuda:0')
episode: 350 training return: tensor(-197.7318, device='cuda:0')
episode: 351 training return: tensor(105.8029, device='cuda:0')
epoch: 88 test_true_pfm: 2791.408409899246 sim_pfm: -111.34235937724588
episode: 352 training return: tensor(239.3668, device='cuda:0')
episode: 353 training return: tensor(55.5557, device='cuda:0')
episode: 354 training return: tensor(228.9164, device='cuda:0')
episode: 355 training return: tensor(-275.4428, device='cuda:0')
epoch: 89 test_true_pfm: 2497.0462193562994 sim_pfm: 2.0323623852843107
episode: 356 training return: tensor(-287.0960, device='cuda:0')
episode: 357 training return: tensor(-280.8007, device='cuda:0')
episode: 358 training return: tensor(40.0275, device='cuda:0')
episode: 359 training return: tensor(91.8770, device='cuda:0')
epoch: 90 test_true_pfm: 3229.300164388769 sim_pfm: 153.56939221122107
episode: 360 training return: tensor(-270.8145, device='cuda:0')
episode: 361 training return: tensor(-290.0909, device='cuda:0')
episode: 362 training return: tensor(218.0664, device='cuda:0')
episode: 363 training return: tensor(-245.9073, device='cuda:0')
epoch: 91 test_true_pfm: 2259.699491365693 sim_pfm: 101.94427178394592
episode: 364 training return: tensor(291.0013, device='cuda:0')
episode: 365 training return: tensor(-140.3690, device='cuda:0')
episode: 366 training return: tensor(79.6404, device='cuda:0')
episode: 367 training return: tensor(-269.8677, device='cuda:0')
epoch: 92 test_true_pfm: 3217.91027828482 sim_pfm: 125.26488157253091
episode: 368 training return: tensor(-195.4656, device='cuda:0')
episode: 369 training return: tensor(-378.9994, device='cuda:0')
episode: 370 training return: tensor(-343.6183, device='cuda:0')
episode: 371 training return: tensor(-13.5981, device='cuda:0')
epoch: 93 test_true_pfm: 2477.214321243898 sim_pfm: -38.49796325472804
episode: 372 training return: tensor(-205.0150, device='cuda:0')
episode: 373 training return: tensor(-39.3029, device='cuda:0')
episode: 374 training return: tensor(-277.4303, device='cuda:0')
episode: 375 training return: tensor(195.0929, device='cuda:0')
epoch: 94 test_true_pfm: 1929.477283131653 sim_pfm: -1.1090249183180276
episode: 376 training return: tensor(-270.4060, device='cuda:0')
episode: 377 training return: tensor(-234.1098, device='cuda:0')
episode: 378 training return: tensor(-168.1190, device='cuda:0')
episode: 379 training return: tensor(247.0567, device='cuda:0')
epoch: 95 test_true_pfm: 2651.6883896235963 sim_pfm: 88.39190987249215
episode: 380 training return: tensor(-264.1600, device='cuda:0')
episode: 381 training return: tensor(-218.4519, device='cuda:0')
episode: 382 training return: tensor(-275.7257, device='cuda:0')
episode: 383 training return: tensor(237.2560, device='cuda:0')
epoch: 96 test_true_pfm: 3112.634013822189 sim_pfm: -78.67297950258944
episode: 384 training return: tensor(-231.6802, device='cuda:0')
episode: 385 training return: tensor(229.5433, device='cuda:0')
episode: 386 training return: tensor(-321.1392, device='cuda:0')
episode: 387 training return: tensor(-287.8729, device='cuda:0')
epoch: 97 test_true_pfm: 2792.0297719477985 sim_pfm: 157.9655041809407
episode: 388 training return: tensor(-281.8127, device='cuda:0')
episode: 389 training return: tensor(236.5724, device='cuda:0')
episode: 390 training return: tensor(-50.3795, device='cuda:0')
episode: 391 training return: tensor(-201.7832, device='cuda:0')
epoch: 98 test_true_pfm: 2133.821243267192 sim_pfm: -44.29064153708168
episode: 392 training return: tensor(115.2123, device='cuda:0')
episode: 393 training return: tensor(-12.2953, device='cuda:0')
episode: 394 training return: tensor(-250.3911, device='cuda:0')
episode: 395 training return: tensor(-31.4201, device='cuda:0')
epoch: 99 test_true_pfm: 3153.1341443988517 sim_pfm: 109.62142686202424
episode: 396 training return: tensor(14.3136, device='cuda:0')
episode: 397 training return: tensor(-40.5457, device='cuda:0')
episode: 398 training return: tensor(41.0120, device='cuda:0')
episode: 399 training return: tensor(30.0251, device='cuda:0')
epoch: 100 test_true_pfm: 2767.9208758125937 sim_pfm: 157.23007853055606
episode: 400 training return: tensor(63.7468, device='cuda:0')
episode: 401 training return: tensor(-1.2749, device='cuda:0')
episode: 402 training return: tensor(161.9924, device='cuda:0')
episode: 403 training return: tensor(235.8884, device='cuda:0')
epoch: 101 test_true_pfm: 2693.9377135052014 sim_pfm: 72.99286982021295
episode: 404 training return: tensor(-318.9167, device='cuda:0')
episode: 405 training return: tensor(-356.4616, device='cuda:0')
episode: 406 training return: tensor(-8.5944, device='cuda:0')
episode: 407 training return: tensor(117.0740, device='cuda:0')
epoch: 102 test_true_pfm: 3246.9337795267206 sim_pfm: -63.48398920856804
episode: 408 training return: tensor(-237.9196, device='cuda:0')
episode: 409 training return: tensor(140.5273, device='cuda:0')
episode: 410 training return: tensor(-237.6113, device='cuda:0')
episode: 411 training return: tensor(-250.0893, device='cuda:0')
epoch: 103 test_true_pfm: 2263.686325876993 sim_pfm: 232.8024014479888
episode: 412 training return: tensor(-39.6192, device='cuda:0')
episode: 413 training return: tensor(-264.5107, device='cuda:0')
episode: 414 training return: tensor(-206.8116, device='cuda:0')
episode: 415 training return: tensor(204.6126, device='cuda:0')
epoch: 104 test_true_pfm: 2648.919069521721 sim_pfm: -36.3265139371894
episode: 416 training return: tensor(237.4670, device='cuda:0')
episode: 417 training return: tensor(-188.3606, device='cuda:0')
episode: 418 training return: tensor(-109.0604, device='cuda:0')
episode: 419 training return: tensor(214.7554, device='cuda:0')
epoch: 105 test_true_pfm: 2669.440864030045 sim_pfm: 220.76475745797507
episode: 420 training return: tensor(-173.5763, device='cuda:0')
episode: 421 training return: tensor(218.0525, device='cuda:0')
episode: 422 training return: tensor(97.3292, device='cuda:0')
episode: 423 training return: tensor(262.9817, device='cuda:0')
epoch: 106 test_true_pfm: 2177.1259384573946 sim_pfm: 233.68279228225583
episode: 424 training return: tensor(92.6752, device='cuda:0')
episode: 425 training return: tensor(199.2125, device='cuda:0')
episode: 426 training return: tensor(167.3116, device='cuda:0')
episode: 427 training return: tensor(-173.4736, device='cuda:0')
epoch: 107 test_true_pfm: 3252.474234706691 sim_pfm: -108.25250210826441
episode: 428 training return: tensor(-371.4480, device='cuda:0')
episode: 429 training return: tensor(-269.5893, device='cuda:0')
episode: 430 training return: tensor(211.2087, device='cuda:0')
episode: 431 training return: tensor(266.2568, device='cuda:0')
epoch: 108 test_true_pfm: 2984.2167309744073 sim_pfm: -57.532721618015785
episode: 432 training return: tensor(233.1789, device='cuda:0')
episode: 433 training return: tensor(225.7590, device='cuda:0')
episode: 434 training return: tensor(218.6515, device='cuda:0')
episode: 435 training return: tensor(-155.7402, device='cuda:0')
epoch: 109 test_true_pfm: 2623.9809657955684 sim_pfm: 198.58379062448512
episode: 436 training return: tensor(-309.6279, device='cuda:0')
episode: 437 training return: tensor(-104.6263, device='cuda:0')
episode: 438 training return: tensor(234.2616, device='cuda:0')
episode: 439 training return: tensor(-264.2756, device='cuda:0')
epoch: 110 test_true_pfm: 2965.3174222334164 sim_pfm: 224.44390746800732
episode: 440 training return: tensor(-120.0033, device='cuda:0')
episode: 441 training return: tensor(133.7794, device='cuda:0')
episode: 442 training return: tensor(265.9335, device='cuda:0')
episode: 443 training return: tensor(295.0748, device='cuda:0')
epoch: 111 test_true_pfm: 2659.3277422461188 sim_pfm: 150.07855452949298
episode: 444 training return: tensor(58.3784, device='cuda:0')
episode: 445 training return: tensor(106.2263, device='cuda:0')
episode: 446 training return: tensor(-346.9049, device='cuda:0')
episode: 447 training return: tensor(-252.6631, device='cuda:0')
epoch: 112 test_true_pfm: 2945.08737141391 sim_pfm: 53.13459670242931
episode: 448 training return: tensor(227.6264, device='cuda:0')
episode: 449 training return: tensor(-197.7678, device='cuda:0')
episode: 450 training return: tensor(196.9496, device='cuda:0')
episode: 451 training return: tensor(-219.9111, device='cuda:0')
epoch: 113 test_true_pfm: 3238.7219012360742 sim_pfm: -41.44212901925979
episode: 452 training return: tensor(-61.3995, device='cuda:0')
episode: 453 training return: tensor(-55.8209, device='cuda:0')
episode: 454 training return: tensor(-274.9471, device='cuda:0')
episode: 455 training return: tensor(286.6146, device='cuda:0')
epoch: 114 test_true_pfm: 3070.8390119155374 sim_pfm: -104.4860630169084
episode: 456 training return: tensor(-281.7814, device='cuda:0')
episode: 457 training return: tensor(-250.3952, device='cuda:0')
episode: 458 training return: tensor(-311.5794, device='cuda:0')
episode: 459 training return: tensor(-244.3916, device='cuda:0')
epoch: 115 test_true_pfm: 2618.139524499829 sim_pfm: 235.4823528296935
episode: 460 training return: tensor(212.3418, device='cuda:0')
episode: 461 training return: tensor(237.1093, device='cuda:0')
episode: 462 training return: tensor(280.9438, device='cuda:0')
episode: 463 training return: tensor(-215.4607, device='cuda:0')
epoch: 116 test_true_pfm: 2552.369646347231 sim_pfm: 152.71463032035777
episode: 464 training return: tensor(212.3231, device='cuda:0')
episode: 465 training return: tensor(-291.3703, device='cuda:0')
episode: 466 training return: tensor(286.5831, device='cuda:0')
episode: 467 training return: tensor(206.1484, device='cuda:0')
epoch: 117 test_true_pfm: 2936.8599951335327 sim_pfm: -24.384373338650523
episode: 468 training return: tensor(-313.9178, device='cuda:0')
episode: 469 training return: tensor(-182.7575, device='cuda:0')
episode: 470 training return: tensor(114.7672, device='cuda:0')
episode: 471 training return: tensor(-202.6303, device='cuda:0')
epoch: 118 test_true_pfm: 3224.850501970284 sim_pfm: 116.35612384288106
episode: 472 training return: tensor(-268.7142, device='cuda:0')
episode: 473 training return: tensor(-324.0488, device='cuda:0')
episode: 474 training return: tensor(-11.7274, device='cuda:0')
episode: 475 training return: tensor(-55.4275, device='cuda:0')
epoch: 119 test_true_pfm: 3153.339304635491 sim_pfm: 130.61983424337814
episode: 476 training return: tensor(219.8024, device='cuda:0')
episode: 477 training return: tensor(264.9596, device='cuda:0')
episode: 478 training return: tensor(-255.2613, device='cuda:0')
episode: 479 training return: tensor(-273.4532, device='cuda:0')
epoch: 120 test_true_pfm: 2857.690747214572 sim_pfm: 154.36534308534465
episode: 480 training return: tensor(-366.0126, device='cuda:0')
episode: 481 training return: tensor(52.0697, device='cuda:0')
episode: 482 training return: tensor(-306.7907, device='cuda:0')
episode: 483 training return: tensor(-173.4907, device='cuda:0')
epoch: 121 test_true_pfm: 2730.013872309717 sim_pfm: 106.30834223978066
episode: 484 training return: tensor(-186.5954, device='cuda:0')
episode: 485 training return: tensor(119.7298, device='cuda:0')
episode: 486 training return: tensor(-271.5165, device='cuda:0')
episode: 487 training return: tensor(65.7056, device='cuda:0')
epoch: 122 test_true_pfm: 3039.0870465681023 sim_pfm: 140.183511575975
episode: 488 training return: tensor(-276.9262, device='cuda:0')
episode: 489 training return: tensor(-271.7336, device='cuda:0')
episode: 490 training return: tensor(18.4391, device='cuda:0')
episode: 491 training return: tensor(-132.7522, device='cuda:0')
epoch: 123 test_true_pfm: 2786.8749696256577 sim_pfm: -133.75066346745007
episode: 492 training return: tensor(257.8769, device='cuda:0')
episode: 493 training return: tensor(195.6570, device='cuda:0')
episode: 494 training return: tensor(134.6801, device='cuda:0')
episode: 495 training return: tensor(-328.8851, device='cuda:0')
epoch: 124 test_true_pfm: 3229.0085498182502 sim_pfm: -66.80390505443211
episode: 496 training return: tensor(-35.6570, device='cuda:0')
episode: 497 training return: tensor(-194.8250, device='cuda:0')
episode: 498 training return: tensor(-72.4633, device='cuda:0')
episode: 499 training return: tensor(23.6653, device='cuda:0')
epoch: 125 test_true_pfm: 2863.060740878847 sim_pfm: 192.7746921922177
episode: 500 training return: tensor(-348.2567, device='cuda:0')
episode: 501 training return: tensor(-212.5633, device='cuda:0')
episode: 502 training return: tensor(227.3121, device='cuda:0')
episode: 503 training return: tensor(89.7006, device='cuda:0')
epoch: 126 test_true_pfm: 2666.041626616396 sim_pfm: -91.1660514603039
episode: 504 training return: tensor(212.2323, device='cuda:0')
episode: 505 training return: tensor(37.6594, device='cuda:0')
episode: 506 training return: tensor(-127.4830, device='cuda:0')
episode: 507 training return: tensor(-140.1211, device='cuda:0')
epoch: 127 test_true_pfm: 3171.8937821379386 sim_pfm: 147.02253248882093
episode: 508 training return: tensor(1.9368, device='cuda:0')
episode: 509 training return: tensor(-269.5089, device='cuda:0')
episode: 510 training return: tensor(-270.9773, device='cuda:0')
episode: 511 training return: tensor(207.0449, device='cuda:0')
epoch: 128 test_true_pfm: 2299.532195742254 sim_pfm: 140.2927157531667
episode: 512 training return: tensor(-202.9926, device='cuda:0')
episode: 513 training return: tensor(200.2923, device='cuda:0')
episode: 514 training return: tensor(19.7078, device='cuda:0')
episode: 515 training return: tensor(-279.7124, device='cuda:0')
epoch: 129 test_true_pfm: 2859.6062783319353 sim_pfm: 222.16913690637253
episode: 516 training return: tensor(-146.7023, device='cuda:0')
episode: 517 training return: tensor(31.2809, device='cuda:0')
episode: 518 training return: tensor(213.3119, device='cuda:0')
episode: 519 training return: tensor(117.7888, device='cuda:0')
epoch: 130 test_true_pfm: 2644.0250315402905 sim_pfm: 238.52931078108182
episode: 520 training return: tensor(-273.6201, device='cuda:0')
episode: 521 training return: tensor(-192.5432, device='cuda:0')
episode: 522 training return: tensor(-289.4070, device='cuda:0')
episode: 523 training return: tensor(-267.2506, device='cuda:0')
epoch: 131 test_true_pfm: 2711.458559150315 sim_pfm: -52.602701003847564
episode: 524 training return: tensor(-188.7578, device='cuda:0')
episode: 525 training return: tensor(225.6180, device='cuda:0')
episode: 526 training return: tensor(-228.6474, device='cuda:0')
episode: 527 training return: tensor(-336.8410, device='cuda:0')
epoch: 132 test_true_pfm: 3224.767675050653 sim_pfm: 222.66118561565722
episode: 528 training return: tensor(252.9428, device='cuda:0')
episode: 529 training return: tensor(-113.3668, device='cuda:0')
episode: 530 training return: tensor(80.6586, device='cuda:0')
episode: 531 training return: tensor(252.7731, device='cuda:0')
epoch: 133 test_true_pfm: 2560.9551932376357 sim_pfm: 63.75670607118324
episode: 532 training return: tensor(86.1088, device='cuda:0')
episode: 533 training return: tensor(-330.1754, device='cuda:0')
episode: 534 training return: tensor(200.5568, device='cuda:0')
episode: 535 training return: tensor(-180.4516, device='cuda:0')
epoch: 134 test_true_pfm: 3001.8748639973364 sim_pfm: -254.81169826444238
episode: 536 training return: tensor(-193.3965, device='cuda:0')
episode: 537 training return: tensor(259.5089, device='cuda:0')
episode: 538 training return: tensor(-150.4868, device='cuda:0')
episode: 539 training return: tensor(239.4538, device='cuda:0')
epoch: 135 test_true_pfm: 3027.917790341042 sim_pfm: 209.2802026072071
episode: 540 training return: tensor(31.7106, device='cuda:0')
episode: 541 training return: tensor(229.1544, device='cuda:0')
episode: 542 training return: tensor(-218.1951, device='cuda:0')
episode: 543 training return: tensor(16.2981, device='cuda:0')
epoch: 136 test_true_pfm: 2881.2468196635077 sim_pfm: 194.9427036048049
episode: 544 training return: tensor(-275.4619, device='cuda:0')
episode: 545 training return: tensor(-33.6694, device='cuda:0')
episode: 546 training return: tensor(202.5782, device='cuda:0')
episode: 547 training return: tensor(223.4136, device='cuda:0')
epoch: 137 test_true_pfm: 2799.7471443920417 sim_pfm: 164.44899246601076
episode: 548 training return: tensor(14.0026, device='cuda:0')
episode: 549 training return: tensor(117.3227, device='cuda:0')
episode: 550 training return: tensor(-233.0720, device='cuda:0')
episode: 551 training return: tensor(-28.0514, device='cuda:0')
epoch: 138 test_true_pfm: 2824.2234427742983 sim_pfm: 213.64765936638773
episode: 552 training return: tensor(-323.0596, device='cuda:0')
episode: 553 training return: tensor(8.1044, device='cuda:0')
episode: 554 training return: tensor(86.5819, device='cuda:0')
episode: 555 training return: tensor(-219.8308, device='cuda:0')
epoch: 139 test_true_pfm: 2457.0559241563014 sim_pfm: 52.36275318475479
episode: 556 training return: tensor(22.3180, device='cuda:0')
episode: 557 training return: tensor(-284.8181, device='cuda:0')
episode: 558 training return: tensor(255.6124, device='cuda:0')
episode: 559 training return: tensor(219.0556, device='cuda:0')
epoch: 140 test_true_pfm: 2844.8020686125433 sim_pfm: -28.844202304569382
episode: 560 training return: tensor(225.7370, device='cuda:0')
episode: 561 training return: tensor(230.2059, device='cuda:0')
episode: 562 training return: tensor(-369.4787, device='cuda:0')
episode: 563 training return: tensor(15.2104, device='cuda:0')
epoch: 141 test_true_pfm: 2996.8110696218755 sim_pfm: 135.89368519274285
episode: 564 training return: tensor(42.1779, device='cuda:0')
episode: 565 training return: tensor(132.4781, device='cuda:0')
episode: 566 training return: tensor(-183.2205, device='cuda:0')
episode: 567 training return: tensor(-230.2524, device='cuda:0')
epoch: 142 test_true_pfm: 2813.879506665217 sim_pfm: 134.29052562862248
episode: 568 training return: tensor(-260.5203, device='cuda:0')
episode: 569 training return: tensor(224.1756, device='cuda:0')
episode: 570 training return: tensor(-264.0409, device='cuda:0')
episode: 571 training return: tensor(-352.4641, device='cuda:0')
epoch: 143 test_true_pfm: 3217.115997204948 sim_pfm: 39.49729138084998
episode: 572 training return: tensor(-356.9879, device='cuda:0')
episode: 573 training return: tensor(-279.8229, device='cuda:0')
episode: 574 training return: tensor(67.9616, device='cuda:0')
episode: 575 training return: tensor(73.6723, device='cuda:0')
epoch: 144 test_true_pfm: 2655.544290598716 sim_pfm: 194.54303267371142
episode: 576 training return: tensor(28.6856, device='cuda:0')
episode: 577 training return: tensor(-362.1197, device='cuda:0')
episode: 578 training return: tensor(30.2840, device='cuda:0')
episode: 579 training return: tensor(-203.1892, device='cuda:0')
epoch: 145 test_true_pfm: 2770.7200654388253 sim_pfm: 212.30042386278123
episode: 580 training return: tensor(208.5202, device='cuda:0')
episode: 581 training return: tensor(-253.4239, device='cuda:0')
episode: 582 training return: tensor(-156.7203, device='cuda:0')
episode: 583 training return: tensor(191.3264, device='cuda:0')
epoch: 146 test_true_pfm: 2348.494021201409 sim_pfm: 11.585887495040273
episode: 584 training return: tensor(40.0459, device='cuda:0')
episode: 585 training return: tensor(-238.9389, device='cuda:0')
episode: 586 training return: tensor(230.4028, device='cuda:0')
episode: 587 training return: tensor(-236.8927, device='cuda:0')
epoch: 147 test_true_pfm: 2767.199201197443 sim_pfm: 22.725852562395932
episode: 588 training return: tensor(30.1955, device='cuda:0')
episode: 589 training return: tensor(202.7217, device='cuda:0')
episode: 590 training return: tensor(219.9235, device='cuda:0')
episode: 591 training return: tensor(133.5072, device='cuda:0')
epoch: 148 test_true_pfm: 3210.524864348846 sim_pfm: -155.107803138264
episode: 592 training return: tensor(227.5718, device='cuda:0')
episode: 593 training return: tensor(-199.6254, device='cuda:0')
episode: 594 training return: tensor(-100.7303, device='cuda:0')
episode: 595 training return: tensor(1.0190, device='cuda:0')
epoch: 149 test_true_pfm: 3054.8211921533443 sim_pfm: 1.9084043767652474
episode: 596 training return: tensor(135.2128, device='cuda:0')
episode: 597 training return: tensor(221.9561, device='cuda:0')
episode: 598 training return: tensor(219.3197, device='cuda:0')
episode: 599 training return: tensor(58.5797, device='cuda:0')
epoch: 150 test_true_pfm: 2951.155450323599 sim_pfm: 78.55180617650815
