['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '2', '--data', '30000']
epoch: 0 training_loss 0.2552454397082329 test_loss: 0.1978065013885498
epoch: 1 training_loss 0.20651195384562016 test_loss: 0.2087003469467163
epoch: 2 training_loss 0.1991037632524967 test_loss: 0.22330667972564697
epoch: 3 training_loss 0.20495538622140885 test_loss: 0.19944239854812623
epoch: 4 training_loss 0.19819596700370312 test_loss: 0.19401460886001587
epoch: 5 training_loss 0.18989813193678856 test_loss: 0.20204710960388184
epoch: 6 training_loss 0.18941933974623681 test_loss: 0.1910965085029602
epoch: 7 training_loss 0.18810622230172158 test_loss: 0.17697277069091796
epoch: 8 training_loss 0.18748464703559875 test_loss: 0.18782191276550292
epoch: 9 training_loss 0.1850913542509079 test_loss: 0.18125061988830565
epoch: 10 training_loss 0.18627575792372228 test_loss: 0.1714423656463623
epoch: 11 training_loss 0.1878759900480509 test_loss: 0.22355995178222657
epoch: 12 training_loss 0.17906109921634197 test_loss: 0.1797143816947937
epoch: 13 training_loss 0.18307491309940815 test_loss: 0.17391712665557862
epoch: 14 training_loss 0.18658412516117096 test_loss: 0.2056661605834961
epoch: 15 training_loss 0.18523824267089367 test_loss: 0.1768358588218689
epoch: 16 training_loss 0.19108248516917228 test_loss: 0.18774914741516113
epoch: 17 training_loss 0.17681804351508618 test_loss: 0.18735091686248778
epoch: 18 training_loss 0.18673286639153958 test_loss: 0.18266783952713012
epoch: 19 training_loss 0.17839087769389153 test_loss: 0.18115565776824952
epoch: 20 training_loss 0.18272363550961018 test_loss: 0.17738279104232788
epoch: 21 training_loss 0.1897713515162468 test_loss: 0.1839301109313965
epoch: 22 training_loss 0.18062427878379822 test_loss: 0.20332014560699463
epoch: 23 training_loss 0.17993452258408069 test_loss: 0.18087509870529175
epoch: 24 training_loss 0.18114959441125392 test_loss: 0.17056337594985962
epoch: 25 training_loss 0.17482783913612365 test_loss: 0.17905492782592775
epoch: 26 training_loss 0.17786949381232262 test_loss: 0.19568585157394408
epoch: 27 training_loss 0.18460730768740177 test_loss: 0.18661266565322876
epoch: 28 training_loss 0.1908358518779278 test_loss: 0.16723859310150146
epoch: 29 training_loss 0.18640866324305536 test_loss: 0.18633376359939574
epoch: 30 training_loss 0.18281943380832671 test_loss: 0.1925278425216675
epoch: 31 training_loss 0.18431523106992245 test_loss: 0.1864749789237976
epoch: 32 training_loss 0.18636996522545815 test_loss: 0.1891399383544922
epoch: 33 training_loss 0.1712222893536091 test_loss: 0.19532473087310792
epoch: 34 training_loss 0.18068734258413316 test_loss: 0.18018670082092286
epoch: 35 training_loss 0.17500599935650826 test_loss: 0.17791008949279785
epoch: 36 training_loss 0.17855322524905204 test_loss: 0.16301437616348266
epoch: 37 training_loss 0.18459477223455906 test_loss: 0.17531826496124267
epoch: 38 training_loss 0.17883019909262657 test_loss: 0.16972997188568115
epoch: 39 training_loss 0.18718621537089347 test_loss: 0.17735973596572877
epoch: 40 training_loss 0.18736061841249466 test_loss: 0.20157341957092284
epoch: 41 training_loss 0.18359801940619946 test_loss: 0.17185899019241332
epoch: 42 training_loss 0.1809726983308792 test_loss: 0.17982505559921264
epoch: 43 training_loss 0.1845709352940321 test_loss: 0.18816498517990113
epoch: 44 training_loss 0.18103577122092246 test_loss: 0.1829479455947876
epoch: 45 training_loss 0.17581225566565992 test_loss: 0.16922371387481688
epoch: 46 training_loss 0.1842746163904667 test_loss: 0.17462515830993652
epoch: 47 training_loss 0.17809802681207657 test_loss: 0.1667485237121582
epoch: 48 training_loss 0.1720627946406603 test_loss: 0.18950334787368775
epoch: 49 training_loss 0.17590245351195335 test_loss: 0.1605069398880005
epoch: 50 training_loss 0.18151687767356633 test_loss: 0.1782313108444214
epoch: 51 training_loss 0.18124340333044528 test_loss: 0.19535952806472778
epoch: 52 training_loss 0.1918409503251314 test_loss: 0.18535130023956298
epoch: 53 training_loss 0.18201281115412712 test_loss: 0.18109416961669922
epoch: 54 training_loss 0.17277041457593442 test_loss: 0.16617839336395263
epoch: 55 training_loss 0.18168606378138066 test_loss: 0.16458114385604858
epoch: 56 training_loss 0.18157876387238503 test_loss: 0.18760136365890503
epoch: 57 training_loss 0.17897397764027118 test_loss: 0.16533757448196412
epoch: 58 training_loss 0.1852608574926853 test_loss: 0.1731935501098633
epoch: 59 training_loss 0.17233324788510798 test_loss: 0.19101685285568237
epoch: 60 training_loss 0.18752236537635325 test_loss: 0.18290187120437623
epoch: 61 training_loss 0.17159078121185303 test_loss: 0.19092390537261963
epoch: 62 training_loss 0.18569020234048367 test_loss: 0.1785471558570862
epoch: 63 training_loss 0.18315850228071212 test_loss: 0.16661055088043214
epoch: 64 training_loss 0.17798725880682467 test_loss: 0.17837374210357665
epoch: 65 training_loss 0.18274407535791398 test_loss: 0.17791937589645385
epoch: 66 training_loss 0.17853633664548396 test_loss: 0.1638530373573303
epoch: 67 training_loss 0.17854711644351481 test_loss: 0.1825935959815979
epoch: 68 training_loss 0.18212120562791825 test_loss: 0.18514226675033568
epoch: 69 training_loss 0.17023782812058927 test_loss: 0.1771717071533203
epoch: 70 training_loss 0.18151879392564296 test_loss: 0.1707789421081543
epoch: 71 training_loss 0.1871270977705717 test_loss: 0.17621028423309326
epoch: 72 training_loss 0.18042812168598174 test_loss: 0.16497361660003662
epoch: 73 training_loss 0.17690911740064622 test_loss: 0.1640392065048218
epoch: 74 training_loss 0.181053312048316 test_loss: 0.16843048334121705
epoch: 75 training_loss 0.18189374826848506 test_loss: 0.1752501368522644
epoch: 76 training_loss 0.17614340119063854 test_loss: 0.17533587217330932
epoch: 77 training_loss 0.17777632392942905 test_loss: 0.16211594343185426
epoch: 78 training_loss 0.17811342656612397 test_loss: 0.16572949886322022
epoch: 79 training_loss 0.180725691691041 test_loss: 0.17077815532684326
epoch: 80 training_loss 0.17667627111077308 test_loss: 0.16761960983276367
epoch: 81 training_loss 0.1783823512494564 test_loss: 0.16049253940582275
epoch: 82 training_loss 0.1748437736928463 test_loss: 0.18732588291168212
epoch: 83 training_loss 0.18670678712427616 test_loss: 0.19500188827514647
epoch: 84 training_loss 0.18387050338089467 test_loss: 0.18597573041915894
epoch: 85 training_loss 0.1761734391748905 test_loss: 0.18097759485244752
epoch: 86 training_loss 0.17251918099820615 test_loss: 0.18112868070602417
epoch: 87 training_loss 0.18204063475131987 test_loss: 0.18083016872406005
epoch: 88 training_loss 0.17175337888300418 test_loss: 0.1868461012840271
epoch: 89 training_loss 0.17502742901444435 test_loss: 0.16810572147369385
epoch: 90 training_loss 0.1766162119060755 test_loss: 0.16893266439437865
epoch: 91 training_loss 0.18779233247041702 test_loss: 0.1926669955253601
epoch: 92 training_loss 0.1713910686969757 test_loss: 0.16807578802108764
epoch: 93 training_loss 0.17578877232968806 test_loss: 0.18509376049041748
epoch: 94 training_loss 0.18184828341007234 test_loss: 0.17973730564117432
epoch: 95 training_loss 0.17396450221538542 test_loss: 0.17651088237762452
epoch: 96 training_loss 0.18919840060174464 test_loss: 0.17883440256118774
epoch: 97 training_loss 0.17472324147820473 test_loss: 0.17109282016754152
epoch: 98 training_loss 0.1737466210871935 test_loss: 0.17454128265380858
epoch: 99 training_loss 0.1732179620116949 test_loss: 0.16791853904724122
epoch: 100 training_loss 0.17984963148832322 test_loss: 0.17260714769363403
epoch: 101 training_loss 0.18346783816814421 test_loss: 0.18928197622299195
epoch: 102 training_loss 0.18115399718284608 test_loss: 0.1698821783065796
epoch: 103 training_loss 0.17638434067368508 test_loss: 0.17868560552597046
epoch: 104 training_loss 0.17939775496721266 test_loss: 0.17480123043060303
epoch: 105 training_loss 0.17158072978258132 test_loss: 0.1897009491920471
epoch: 106 training_loss 0.17061674743890762 test_loss: 0.17583577632904052
epoch: 107 training_loss 0.17621279455721378 test_loss: 0.17593547105789184
epoch: 108 training_loss 0.17543651640415192 test_loss: 0.1771420955657959
epoch: 109 training_loss 0.1725577761232853 test_loss: 0.18428988456726075
epoch: 110 training_loss 0.17403397008776664 test_loss: 0.18405746221542357
epoch: 111 training_loss 0.17575197093188763 test_loss: 0.17013449668884278
epoch: 112 training_loss 0.1718695393204689 test_loss: 0.18470274209976195
epoch: 113 training_loss 0.18420400112867355 test_loss: 0.17686110734939575
epoch: 114 training_loss 0.17157488919794558 test_loss: 0.1657419443130493
epoch: 115 training_loss 0.18049800269305705 test_loss: 0.17453329563140868
epoch: 116 training_loss 0.18075893886387348 test_loss: 0.1792138934135437
epoch: 117 training_loss 0.1728150623291731 test_loss: 0.1702446699142456
epoch: 118 training_loss 0.16853269897401332 test_loss: 0.17716373205184938
epoch: 119 training_loss 0.17504107512533665 test_loss: 0.16507545709609986
epoch: 120 training_loss 0.16851382628083228 test_loss: 0.18201000690460206
epoch: 121 training_loss 0.174012064114213 test_loss: 0.182344388961792
epoch: 122 training_loss 0.17051194921135904 test_loss: 0.16930617094039918
epoch: 123 training_loss 0.17073797967284918 test_loss: 0.17168203592300416
epoch: 124 training_loss 0.17907803557813168 test_loss: 0.18573230504989624
epoch: 125 training_loss 0.17571207284927368 test_loss: 0.17174557447433472
epoch: 126 training_loss 0.17757414057850837 test_loss: 0.18183155059814454
epoch: 127 training_loss 0.16829951003193855 test_loss: 0.17560003995895385
epoch: 128 training_loss 0.17497283704578875 test_loss: 0.16660120487213134
epoch: 129 training_loss 0.16939041204750538 test_loss: 0.18377158641815186
epoch: 130 training_loss 0.1710730804502964 test_loss: 0.1780479669570923
epoch: 131 training_loss 0.18483954712748527 test_loss: 0.18641304969787598
epoch: 132 training_loss 0.18246374554932118 test_loss: 0.17696099281311034
epoch: 133 training_loss 0.17627379700541496 test_loss: 0.16202843189239502
epoch: 134 training_loss 0.17161823626607656 test_loss: 0.16188086271286012
epoch: 135 training_loss 0.1761485205963254 test_loss: 0.16654704809188842
epoch: 136 training_loss 0.17877387680113316 test_loss: 0.18621014356613158
epoch: 137 training_loss 0.1717281138151884 test_loss: 0.184263277053833
epoch: 138 training_loss 0.18401984572410585 test_loss: 0.17572392225265504
epoch: 139 training_loss 0.176731718108058 test_loss: 0.17055827379226685
epoch: 140 training_loss 0.17904125034809112 test_loss: 0.1689482092857361
epoch: 141 training_loss 0.17506848499178887 test_loss: 0.17387467622756958
epoch: 142 training_loss 0.1689122775942087 test_loss: 0.1799553871154785
epoch: 143 training_loss 0.1827580077201128 test_loss: 0.16417495012283326
epoch: 144 training_loss 0.1812460671365261 test_loss: 0.17529107332229615
epoch: 145 training_loss 0.17441575169563295 test_loss: 0.18137627840042114
epoch: 146 training_loss 0.1790613754838705 test_loss: 0.15823031663894654
epoch: 147 training_loss 0.1719618894159794 test_loss: 0.16675447225570678
epoch: 148 training_loss 0.1769618858397007 test_loss: 0.17966012954711913
epoch: 149 training_loss 0.1777023781836033 test_loss: 0.16773759126663207
epoch: 0 training_loss 8.275473947525024 test_loss: 5.247418594360352
epoch: 1 training_loss 4.102891461849213 test_loss: 3.3664379119873047
epoch: 2 training_loss 2.807408919334412 test_loss: 2.4888429641723633
epoch: 3 training_loss 2.2289227437973023 test_loss: 2.0284711837768556
epoch: 4 training_loss 1.8616704833507538 test_loss: 1.7017034530639648
epoch: 5 training_loss 1.62235426902771 test_loss: 1.5637008666992187
epoch: 6 training_loss 1.4490855205059052 test_loss: 1.3885726928710938
epoch: 7 training_loss 1.3692130148410797 test_loss: 1.2915014266967773
epoch: 8 training_loss 1.259319108724594 test_loss: 1.278307056427002
epoch: 9 training_loss 1.2143435668945313 test_loss: 1.1703715324401855
epoch: 10 training_loss 1.1284638172388077 test_loss: 1.116420078277588
epoch: 11 training_loss 1.0985891610383987 test_loss: 1.080216407775879
epoch: 12 training_loss 1.0693703573942184 test_loss: 1.066508674621582
epoch: 13 training_loss 1.0040780329704284 test_loss: 0.993428897857666
epoch: 14 training_loss 0.9787822407484055 test_loss: 0.992820930480957
epoch: 15 training_loss 0.9695801210403442 test_loss: 0.9609206199645997
epoch: 16 training_loss 0.9421992135047913 test_loss: 0.8982039451599121
epoch: 17 training_loss 0.913855721950531 test_loss: 0.8779029846191406
epoch: 18 training_loss 0.890880674123764 test_loss: 0.9078359603881836
epoch: 19 training_loss 0.8706479555368424 test_loss: 0.8911057472229004
epoch: 20 training_loss 0.8542392522096633 test_loss: 0.8285739898681641
epoch: 21 training_loss 0.8196618449687958 test_loss: 0.8242176055908204
epoch: 22 training_loss 0.8159752708673477 test_loss: 0.7975535869598389
epoch: 23 training_loss 0.8019548374414444 test_loss: 0.7803688526153565
epoch: 24 training_loss 0.7900000250339508 test_loss: 0.7818540096282959
epoch: 25 training_loss 0.7711489015817642 test_loss: 0.7744239807128906
epoch: 26 training_loss 0.7698641508817673 test_loss: 0.7419783115386963
epoch: 27 training_loss 0.7583468961715698 test_loss: 0.7355830192565918
epoch: 28 training_loss 0.742989850640297 test_loss: 0.7280033588409424
epoch: 29 training_loss 0.7307152265310287 test_loss: 0.7501578807830811
epoch: 30 training_loss 0.7260385912656784 test_loss: 0.7209810733795166
epoch: 31 training_loss 0.7291819661855697 test_loss: 0.7896782875061035
epoch: 32 training_loss 0.7141600680351258 test_loss: 0.723533296585083
epoch: 33 training_loss 0.7023244726657868 test_loss: 0.6878382682800293
epoch: 34 training_loss 0.7031543374061584 test_loss: 0.7349419593811035
epoch: 35 training_loss 0.6889609414339065 test_loss: 0.69717435836792
epoch: 36 training_loss 0.6898913538455963 test_loss: 0.7066106796264648
epoch: 37 training_loss 0.6723054033517838 test_loss: 0.6759184837341309
epoch: 38 training_loss 0.6707319611310959 test_loss: 0.6610235214233399
epoch: 39 training_loss 0.6601738810539246 test_loss: 0.6675922870635986
epoch: 40 training_loss 0.667150416970253 test_loss: 0.6636215209960937
epoch: 41 training_loss 0.6661609107255936 test_loss: 0.6583060264587403
epoch: 42 training_loss 0.6463484340906143 test_loss: 0.6436164379119873
epoch: 43 training_loss 0.6467193168401718 test_loss: 0.6719583034515381
epoch: 44 training_loss 0.6314771324396133 test_loss: 0.6464983940124511
epoch: 45 training_loss 0.6422074556350708 test_loss: 0.6363691806793212
epoch: 46 training_loss 0.6462746596336365 test_loss: 0.6194700717926025
epoch: 47 training_loss 0.6363618701696396 test_loss: 0.6586946487426758
epoch: 48 training_loss 0.6283439898490906 test_loss: 0.6161482810974122
epoch: 49 training_loss 0.62246916949749 test_loss: 0.6301857471466065
epoch: 50 training_loss 0.6261289340257644 test_loss: 0.6525445938110351
epoch: 51 training_loss 0.6196652609109878 test_loss: 0.64189772605896
epoch: 52 training_loss 0.6143468177318573 test_loss: 0.6171127319335937
epoch: 53 training_loss 0.6158300793170929 test_loss: 0.6081971645355224
epoch: 54 training_loss 0.6164642751216889 test_loss: 0.6313797473907471
epoch: 55 training_loss 0.6110343736410141 test_loss: 0.6075821399688721
epoch: 56 training_loss 0.605853573679924 test_loss: 0.5990254878997803
epoch: 57 training_loss 0.6011349403858185 test_loss: 0.6142109394073486
epoch: 58 training_loss 0.5927759611606598 test_loss: 0.5839518070220947
epoch: 59 training_loss 0.5885743048787117 test_loss: 0.5987821102142334
epoch: 60 training_loss 0.5918633097410202 test_loss: 0.6129984855651855
epoch: 61 training_loss 0.5871024048328399 test_loss: 0.5922786712646484
epoch: 62 training_loss 0.5859330552816391 test_loss: 0.5796961784362793
epoch: 63 training_loss 0.5761733984947205 test_loss: 0.6040281295776367
epoch: 64 training_loss 0.5832499486207962 test_loss: 0.5872916221618653
epoch: 65 training_loss 0.5767143166065216 test_loss: 0.6190482139587402
epoch: 66 training_loss 0.5897490888833999 test_loss: 0.5701142311096191
epoch: 67 training_loss 0.5759949505329132 test_loss: 0.586637306213379
epoch: 68 training_loss 0.5802231207489967 test_loss: 0.5952705860137939
epoch: 69 training_loss 0.5727341774106026 test_loss: 0.583126449584961
epoch: 70 training_loss 0.5786411392688752 test_loss: 0.5680948734283447
epoch: 71 training_loss 0.5624975833296776 test_loss: 0.5622055530548096
epoch: 72 training_loss 0.5613909041881562 test_loss: 0.5645166873931885
epoch: 73 training_loss 0.5576216015219688 test_loss: 0.5633622646331787
epoch: 74 training_loss 0.5606631416082383 test_loss: 0.568894624710083
epoch: 75 training_loss 0.5604166021943092 test_loss: 0.5454400539398193
epoch: 76 training_loss 0.5636272522807121 test_loss: 0.5567205905914306
epoch: 77 training_loss 0.5556873407959938 test_loss: 0.5699715614318848
epoch: 78 training_loss 0.5555008742213249 test_loss: 0.5535117149353027
epoch: 79 training_loss 0.5526950976252556 test_loss: 0.5398647785186768
epoch: 80 training_loss 0.5500762298703193 test_loss: 0.555607795715332
epoch: 81 training_loss 0.562976043522358 test_loss: 0.5420736789703369
epoch: 82 training_loss 0.5429738703370094 test_loss: 0.5449656486511231
epoch: 83 training_loss 0.5498153966665268 test_loss: 0.5868320465087891
epoch: 84 training_loss 0.5496694821119309 test_loss: 0.5565155506134033
epoch: 85 training_loss 0.5458236879110336 test_loss: 0.5327510833740234
epoch: 86 training_loss 0.5342609342932702 test_loss: 0.536573839187622
epoch: 87 training_loss 0.5439729583263397 test_loss: 0.5545203208923339
epoch: 88 training_loss 0.5515953260660171 test_loss: 0.5675484657287597
epoch: 89 training_loss 0.5353508856892586 test_loss: 0.5569300651550293
epoch: 90 training_loss 0.5356826937198639 test_loss: 0.5453065395355224
epoch: 91 training_loss 0.532611776292324 test_loss: 0.5335551738739014
epoch: 92 training_loss 0.5338674718141556 test_loss: 0.5664901256561279
epoch: 93 training_loss 0.5425977385044098 test_loss: 0.5524891376495361
epoch: 94 training_loss 0.526714104115963 test_loss: 0.5188689708709717
epoch: 95 training_loss 0.5243949571251869 test_loss: 0.544620943069458
epoch: 96 training_loss 0.5394952389597892 test_loss: 0.5628795146942138
epoch: 97 training_loss 0.5332808429002762 test_loss: 0.5295773029327393
epoch: 98 training_loss 0.5267402079701423 test_loss: 0.5211643695831298
epoch: 99 training_loss 0.5244299891591072 test_loss: 0.531541109085083
epoch: 100 training_loss 0.5269109830260277 test_loss: 0.52491135597229
epoch: 101 training_loss 0.5193814378976822 test_loss: 0.5131282806396484
epoch: 102 training_loss 0.5139705649018288 test_loss: 0.5248854637145997
epoch: 103 training_loss 0.5249942418932915 test_loss: 0.5301791191101074
epoch: 104 training_loss 0.5244587358832359 test_loss: 0.5347821712493896
epoch: 105 training_loss 0.5294748571515083 test_loss: 0.5335269927978515
epoch: 106 training_loss 0.5153217706084251 test_loss: 0.5104623317718506
epoch: 107 training_loss 0.5102464872598648 test_loss: 0.5212847232818604
epoch: 108 training_loss 0.512538914680481 test_loss: 0.5212455749511719
epoch: 109 training_loss 0.5155722573399544 test_loss: 0.5204386234283447
epoch: 110 training_loss 0.5075449672341347 test_loss: 0.5200763702392578
epoch: 111 training_loss 0.5058128020167351 test_loss: 0.5235475540161133
epoch: 112 training_loss 0.5086934989690781 test_loss: 0.5137893199920655
epoch: 113 training_loss 0.5093515536189079 test_loss: 0.5005659580230712
epoch: 114 training_loss 0.5067476481199265 test_loss: 0.5040306091308594
epoch: 115 training_loss 0.5098071074485779 test_loss: 0.5209288120269775
epoch: 116 training_loss 0.5100802990794182 test_loss: 0.5072683334350586
epoch: 117 training_loss 0.50889000415802 test_loss: 0.48939123153686526
epoch: 118 training_loss 0.5087805980443955 test_loss: 0.5332239627838135
epoch: 119 training_loss 0.5103678587079048 test_loss: 0.5127731800079346
epoch: 120 training_loss 0.5040271124243736 test_loss: 0.5153365135192871
epoch: 121 training_loss 0.5064414048194885 test_loss: 0.5033321380615234
epoch: 122 training_loss 0.5136832427978516 test_loss: 0.5130290508270263
epoch: 123 training_loss 0.5162382352352143 test_loss: 0.4962273597717285
epoch: 124 training_loss 0.49770145922899245 test_loss: 0.5315087795257568
epoch: 125 training_loss 0.49318173468112947 test_loss: 0.4994810581207275
epoch: 126 training_loss 0.5041304877400399 test_loss: 0.5090629577636718
epoch: 127 training_loss 0.4940208798646927 test_loss: 0.5040800094604492
epoch: 128 training_loss 0.4938668882846832 test_loss: 0.5011583805084229
epoch: 129 training_loss 0.49944986194372176 test_loss: 0.5167429447174072
epoch: 130 training_loss 0.5037411451339722 test_loss: 0.4984895706176758
epoch: 131 training_loss 0.4938677805662155 test_loss: 0.4835315227508545
epoch: 132 training_loss 0.49239422380924225 test_loss: 0.49628429412841796
epoch: 133 training_loss 0.4976826596260071 test_loss: 0.4942033767700195
epoch: 134 training_loss 0.4930491715669632 test_loss: 0.49297270774841306
epoch: 135 training_loss 0.4957618671655655 test_loss: 0.49056029319763184
epoch: 136 training_loss 0.49155754685401915 test_loss: 0.5026612281799316
epoch: 137 training_loss 0.49379264920949933 test_loss: 0.5054615497589111
epoch: 138 training_loss 0.4882732480764389 test_loss: 0.48953070640563967
epoch: 139 training_loss 0.49608032941818236 test_loss: 0.4866288185119629
epoch: 140 training_loss 0.5009194347262382 test_loss: 0.4872300148010254
epoch: 141 training_loss 0.49177621155977247 test_loss: 0.48144898414611814
epoch: 142 training_loss 0.48998475015163423 test_loss: 0.49306292533874513
epoch: 143 training_loss 0.4876890125870705 test_loss: 0.5066334724426269
epoch: 144 training_loss 0.4832301214337349 test_loss: 0.49695625305175783
epoch: 145 training_loss 0.47794527262449266 test_loss: 0.4996329307556152
epoch: 146 training_loss 0.48230479955673217 test_loss: 0.48876328468322755
epoch: 147 training_loss 0.4769356366991997 test_loss: 0.48331031799316404
epoch: 148 training_loss 0.48363075375556946 test_loss: 0.4964279651641846
epoch: 149 training_loss 0.4816330197453499 test_loss: 0.4928304195404053
2351.0118431889036
episode: 0 training return: tensor(-329.6740, device='cuda:0')
episode: 1 training return: tensor(-19.6177, device='cuda:0')
episode: 2 training return: tensor(-365.5443, device='cuda:0')
episode: 3 training return: tensor(-273.6032, device='cuda:0')
epoch: 1 test_true_pfm: 2313.5124261176334 sim_pfm: 85.40225017723667
episode: 4 training return: tensor(-270.1786, device='cuda:0')
episode: 5 training return: tensor(213.3502, device='cuda:0')
episode: 6 training return: tensor(244.2736, device='cuda:0')
episode: 7 training return: tensor(187.7884, device='cuda:0')
epoch: 2 test_true_pfm: 3218.1232109933076 sim_pfm: 170.103023675348
episode: 8 training return: tensor(-374.9804, device='cuda:0')
episode: 9 training return: tensor(-295.1199, device='cuda:0')
episode: 10 training return: tensor(-302.9985, device='cuda:0')
episode: 11 training return: tensor(257.9501, device='cuda:0')
epoch: 3 test_true_pfm: 2761.3049418367377 sim_pfm: 46.03502674517222
episode: 12 training return: tensor(-289.5666, device='cuda:0')
episode: 13 training return: tensor(274.4056, device='cuda:0')
episode: 14 training return: tensor(-374.3766, device='cuda:0')
episode: 15 training return: tensor(-200.3315, device='cuda:0')
epoch: 4 test_true_pfm: 2606.68443337677 sim_pfm: -165.00336084413962
episode: 16 training return: tensor(-372.0532, device='cuda:0')
episode: 17 training return: tensor(-264.4996, device='cuda:0')
episode: 18 training return: tensor(-131.1754, device='cuda:0')
episode: 19 training return: tensor(111.9820, device='cuda:0')
epoch: 5 test_true_pfm: 2368.231861785118 sim_pfm: 96.29076267794396
episode: 20 training return: tensor(-227.2331, device='cuda:0')
episode: 21 training return: tensor(-315.8760, device='cuda:0')
episode: 22 training return: tensor(286.5846, device='cuda:0')
episode: 23 training return: tensor(-350.9110, device='cuda:0')
epoch: 6 test_true_pfm: 2326.664685949397 sim_pfm: 0.18145783575406918
episode: 24 training return: tensor(-104.2207, device='cuda:0')
episode: 25 training return: tensor(112.8847, device='cuda:0')
episode: 26 training return: tensor(140.8629, device='cuda:0')
episode: 27 training return: tensor(258.7944, device='cuda:0')
epoch: 7 test_true_pfm: 1901.7850557281645 sim_pfm: -33.07363968613208
episode: 28 training return: tensor(94.8540, device='cuda:0')
episode: 29 training return: tensor(-91.3918, device='cuda:0')
episode: 30 training return: tensor(-352.7088, device='cuda:0')
episode: 31 training return: tensor(312.1812, device='cuda:0')
epoch: 8 test_true_pfm: 2951.39903711169 sim_pfm: 106.00822453588869
episode: 32 training return: tensor(-348.6351, device='cuda:0')
episode: 33 training return: tensor(-365.6533, device='cuda:0')
episode: 34 training return: tensor(-370.6832, device='cuda:0')
episode: 35 training return: tensor(308.7487, device='cuda:0')
epoch: 9 test_true_pfm: 2764.761471739425 sim_pfm: -60.016927176931254
episode: 36 training return: tensor(142.2342, device='cuda:0')
episode: 37 training return: tensor(78.7726, device='cuda:0')
episode: 38 training return: tensor(-265.5638, device='cuda:0')
episode: 39 training return: tensor(-103.6434, device='cuda:0')
epoch: 10 test_true_pfm: 2625.5115217639973 sim_pfm: 10.726483704987913
episode: 40 training return: tensor(-346.3899, device='cuda:0')
episode: 41 training return: tensor(-321.2078, device='cuda:0')
episode: 42 training return: tensor(-274.5462, device='cuda:0')
episode: 43 training return: tensor(-369.8434, device='cuda:0')
epoch: 11 test_true_pfm: 2459.394027137228 sim_pfm: 114.8626400733289
episode: 44 training return: tensor(-327.6062, device='cuda:0')
episode: 45 training return: tensor(33.0160, device='cuda:0')
episode: 46 training return: tensor(178.4524, device='cuda:0')
episode: 47 training return: tensor(-196.4256, device='cuda:0')
epoch: 12 test_true_pfm: 2275.7765813100027 sim_pfm: -60.32205268899755
episode: 48 training return: tensor(17.6882, device='cuda:0')
episode: 49 training return: tensor(-230.6396, device='cuda:0')
episode: 50 training return: tensor(285.1750, device='cuda:0')
episode: 51 training return: tensor(223.2431, device='cuda:0')
epoch: 13 test_true_pfm: 3127.654466308352 sim_pfm: 101.30777925231571
episode: 52 training return: tensor(-235.1534, device='cuda:0')
episode: 53 training return: tensor(-365.1395, device='cuda:0')
episode: 54 training return: tensor(-345.9507, device='cuda:0')
episode: 55 training return: tensor(274.4850, device='cuda:0')
epoch: 14 test_true_pfm: 3100.1511915447695 sim_pfm: -63.76672020584616
episode: 56 training return: tensor(-323.3240, device='cuda:0')
episode: 57 training return: tensor(-367.8294, device='cuda:0')
episode: 58 training return: tensor(-101.9926, device='cuda:0')
episode: 59 training return: tensor(-359.9973, device='cuda:0')
epoch: 15 test_true_pfm: 2601.0868439105593 sim_pfm: 264.09834314523806
episode: 60 training return: tensor(261.8393, device='cuda:0')
episode: 61 training return: tensor(-373.0334, device='cuda:0')
episode: 62 training return: tensor(-321.8867, device='cuda:0')
episode: 63 training return: tensor(-339.6057, device='cuda:0')
epoch: 16 test_true_pfm: 3182.061222192544 sim_pfm: 234.15669962912216
episode: 64 training return: tensor(-69.6429, device='cuda:0')
episode: 65 training return: tensor(-144.6065, device='cuda:0')
episode: 66 training return: tensor(-357.0421, device='cuda:0')
episode: 67 training return: tensor(20.1902, device='cuda:0')
epoch: 17 test_true_pfm: 3161.3587369080064 sim_pfm: 228.26134452716602
episode: 68 training return: tensor(-260.2621, device='cuda:0')
episode: 69 training return: tensor(-187.4841, device='cuda:0')
episode: 70 training return: tensor(-272.7544, device='cuda:0')
episode: 71 training return: tensor(-101.6387, device='cuda:0')
epoch: 18 test_true_pfm: 2138.88134617321 sim_pfm: 82.11248354898999
episode: 72 training return: tensor(-75.1968, device='cuda:0')
episode: 73 training return: tensor(-124.4563, device='cuda:0')
episode: 74 training return: tensor(-365.1508, device='cuda:0')
episode: 75 training return: tensor(-172.1738, device='cuda:0')
epoch: 19 test_true_pfm: 2734.9980090993836 sim_pfm: 98.1673649996131
episode: 76 training return: tensor(-303.1549, device='cuda:0')
episode: 77 training return: tensor(-370.6193, device='cuda:0')
episode: 78 training return: tensor(-236.1960, device='cuda:0')
episode: 79 training return: tensor(60.1800, device='cuda:0')
epoch: 20 test_true_pfm: 2867.1058807315826 sim_pfm: 265.8234977792017
episode: 80 training return: tensor(-264.9604, device='cuda:0')
episode: 81 training return: tensor(-96.0309, device='cuda:0')
episode: 82 training return: tensor(249.1538, device='cuda:0')
episode: 83 training return: tensor(270.3220, device='cuda:0')
epoch: 21 test_true_pfm: 2662.288826860653 sim_pfm: 267.71754742471967
episode: 84 training return: tensor(-136.8233, device='cuda:0')
episode: 85 training return: tensor(-203.2735, device='cuda:0')
episode: 86 training return: tensor(-264.1709, device='cuda:0')
episode: 87 training return: tensor(-170.4172, device='cuda:0')
epoch: 22 test_true_pfm: 1891.4664742600028 sim_pfm: 262.1104416856542
episode: 88 training return: tensor(257.3535, device='cuda:0')
episode: 89 training return: tensor(-273.4746, device='cuda:0')
episode: 90 training return: tensor(-329.3358, device='cuda:0')
episode: 91 training return: tensor(248.4117, device='cuda:0')
epoch: 23 test_true_pfm: 2711.8581765136873 sim_pfm: 22.414799298237387
episode: 92 training return: tensor(-361.2655, device='cuda:0')
episode: 93 training return: tensor(-315.9951, device='cuda:0')
episode: 94 training return: tensor(293.4264, device='cuda:0')
episode: 95 training return: tensor(-287.5026, device='cuda:0')
epoch: 24 test_true_pfm: 2267.6948562848543 sim_pfm: 218.6690274309949
episode: 96 training return: tensor(-59.7570, device='cuda:0')
episode: 97 training return: tensor(-101.0897, device='cuda:0')
episode: 98 training return: tensor(-323.8420, device='cuda:0')
episode: 99 training return: tensor(-286.2598, device='cuda:0')
epoch: 25 test_true_pfm: 2619.682866434906 sim_pfm: 274.0692810173302
episode: 100 training return: tensor(255.1416, device='cuda:0')
episode: 101 training return: tensor(-340.6147, device='cuda:0')
episode: 102 training return: tensor(267.4329, device='cuda:0')
episode: 103 training return: tensor(-148.4640, device='cuda:0')
epoch: 26 test_true_pfm: 2842.838786551302 sim_pfm: 155.76774308935273
episode: 104 training return: tensor(-9.8424, device='cuda:0')
episode: 105 training return: tensor(-162.4441, device='cuda:0')
episode: 106 training return: tensor(226.9915, device='cuda:0')
episode: 107 training return: tensor(255.7348, device='cuda:0')
epoch: 27 test_true_pfm: 2120.572455967212 sim_pfm: 213.86406511893924
episode: 108 training return: tensor(269.8406, device='cuda:0')
episode: 109 training return: tensor(252.4413, device='cuda:0')
episode: 110 training return: tensor(241.7514, device='cuda:0')
episode: 111 training return: tensor(-250.8698, device='cuda:0')
epoch: 28 test_true_pfm: 2567.370137020011 sim_pfm: 212.34394062977904
episode: 112 training return: tensor(-1.5417, device='cuda:0')
episode: 113 training return: tensor(-277.8882, device='cuda:0')
episode: 114 training return: tensor(-279.8950, device='cuda:0')
episode: 115 training return: tensor(-304.4517, device='cuda:0')
epoch: 29 test_true_pfm: 2639.8184832619017 sim_pfm: 198.72059979398423
episode: 116 training return: tensor(-367.4777, device='cuda:0')
episode: 117 training return: tensor(-371.8117, device='cuda:0')
episode: 118 training return: tensor(-191.8039, device='cuda:0')
episode: 119 training return: tensor(-301.6467, device='cuda:0')
epoch: 30 test_true_pfm: 2570.4421177318254 sim_pfm: 166.9593887148852
episode: 120 training return: tensor(-366.8148, device='cuda:0')
episode: 121 training return: tensor(-0.1582, device='cuda:0')
episode: 122 training return: tensor(-251.0945, device='cuda:0')
episode: 123 training return: tensor(-4.4114, device='cuda:0')
epoch: 31 test_true_pfm: 3157.7969069456904 sim_pfm: 229.80556596206347
episode: 124 training return: tensor(262.1122, device='cuda:0')
episode: 125 training return: tensor(-124.8870, device='cuda:0')
episode: 126 training return: tensor(-281.5776, device='cuda:0')
episode: 127 training return: tensor(-279.6157, device='cuda:0')
epoch: 32 test_true_pfm: 2698.6966415507645 sim_pfm: 192.95440231987354
episode: 128 training return: tensor(254.2001, device='cuda:0')
episode: 129 training return: tensor(-189.2416, device='cuda:0')
episode: 130 training return: tensor(261.2352, device='cuda:0')
episode: 131 training return: tensor(-245.0729, device='cuda:0')
epoch: 33 test_true_pfm: 1961.3127268042733 sim_pfm: 39.90576937415366
episode: 132 training return: tensor(263.9277, device='cuda:0')
episode: 133 training return: tensor(-299.7288, device='cuda:0')
episode: 134 training return: tensor(-42.7639, device='cuda:0')
episode: 135 training return: tensor(-146.1997, device='cuda:0')
epoch: 34 test_true_pfm: 3004.04827198421 sim_pfm: 112.86752131201017
episode: 136 training return: tensor(-367.2808, device='cuda:0')
episode: 137 training return: tensor(-240.1376, device='cuda:0')
episode: 138 training return: tensor(-6.4238, device='cuda:0')
episode: 139 training return: tensor(111.3095, device='cuda:0')
epoch: 35 test_true_pfm: 3102.9454771481082 sim_pfm: 235.6320872421396
episode: 140 training return: tensor(301.4919, device='cuda:0')
episode: 141 training return: tensor(34.9564, device='cuda:0')
episode: 142 training return: tensor(271.8404, device='cuda:0')
episode: 143 training return: tensor(-53.6546, device='cuda:0')
epoch: 36 test_true_pfm: 2731.0899709119403 sim_pfm: 102.55542784974871
episode: 144 training return: tensor(189.3280, device='cuda:0')
episode: 145 training return: tensor(-364.0399, device='cuda:0')
episode: 146 training return: tensor(272.6437, device='cuda:0')
episode: 147 training return: tensor(-152.6784, device='cuda:0')
epoch: 37 test_true_pfm: 2924.602033598043 sim_pfm: 102.71258568607543
episode: 148 training return: tensor(-324.5882, device='cuda:0')
episode: 149 training return: tensor(168.8957, device='cuda:0')
episode: 150 training return: tensor(271.3412, device='cuda:0')
episode: 151 training return: tensor(80.4365, device='cuda:0')
epoch: 38 test_true_pfm: 3161.6462218703396 sim_pfm: 285.3196820128942
episode: 152 training return: tensor(-328.0070, device='cuda:0')
episode: 153 training return: tensor(294.0587, device='cuda:0')
episode: 154 training return: tensor(-4.6080, device='cuda:0')
episode: 155 training return: tensor(-23.9802, device='cuda:0')
epoch: 39 test_true_pfm: 2759.066438847442 sim_pfm: 142.72518438610132
episode: 156 training return: tensor(123.6947, device='cuda:0')
episode: 157 training return: tensor(205.0180, device='cuda:0')
episode: 158 training return: tensor(279.2999, device='cuda:0')
episode: 159 training return: tensor(106.8529, device='cuda:0')
epoch: 40 test_true_pfm: 2824.180434677734 sim_pfm: 228.13451606857902
episode: 160 training return: tensor(262.0586, device='cuda:0')
episode: 161 training return: tensor(267.8936, device='cuda:0')
episode: 162 training return: tensor(7.8891, device='cuda:0')
episode: 163 training return: tensor(291.4938, device='cuda:0')
epoch: 41 test_true_pfm: 2594.016145574249 sim_pfm: 229.6174321573926
episode: 164 training return: tensor(-354.5046, device='cuda:0')
episode: 165 training return: tensor(46.8881, device='cuda:0')
episode: 166 training return: tensor(-313.2084, device='cuda:0')
episode: 167 training return: tensor(-337.1992, device='cuda:0')
epoch: 42 test_true_pfm: 2599.770141879735 sim_pfm: 236.25545664525512
episode: 168 training return: tensor(308.3299, device='cuda:0')
episode: 169 training return: tensor(-2.3981, device='cuda:0')
episode: 170 training return: tensor(-360.5834, device='cuda:0')
episode: 171 training return: tensor(285.2470, device='cuda:0')
epoch: 43 test_true_pfm: 2949.7381549532693 sim_pfm: 237.2857448522894
episode: 172 training return: tensor(33.9443, device='cuda:0')
episode: 173 training return: tensor(-302.3960, device='cuda:0')
episode: 174 training return: tensor(-194.4688, device='cuda:0')
episode: 175 training return: tensor(-229.1169, device='cuda:0')
epoch: 44 test_true_pfm: 3063.7544517941856 sim_pfm: 158.58073558351802
episode: 176 training return: tensor(-366.7196, device='cuda:0')
episode: 177 training return: tensor(264.6511, device='cuda:0')
episode: 178 training return: tensor(-166.7940, device='cuda:0')
episode: 179 training return: tensor(-217.9088, device='cuda:0')
epoch: 45 test_true_pfm: 2895.3163601042797 sim_pfm: 275.61847087437246
episode: 180 training return: tensor(-281.9346, device='cuda:0')
episode: 181 training return: tensor(330.1893, device='cuda:0')
episode: 182 training return: tensor(-200.1139, device='cuda:0')
episode: 183 training return: tensor(-14.9348, device='cuda:0')
epoch: 46 test_true_pfm: 2668.018499959086 sim_pfm: 193.9243579263663
episode: 184 training return: tensor(-370.8292, device='cuda:0')
episode: 185 training return: tensor(-289.8673, device='cuda:0')
episode: 186 training return: tensor(291.3004, device='cuda:0')
episode: 187 training return: tensor(-315.0110, device='cuda:0')
epoch: 47 test_true_pfm: 2461.4844677886135 sim_pfm: -61.93916446655445
episode: 188 training return: tensor(319.4706, device='cuda:0')
episode: 189 training return: tensor(283.0180, device='cuda:0')
episode: 190 training return: tensor(-107.0827, device='cuda:0')
episode: 191 training return: tensor(-312.2908, device='cuda:0')
epoch: 48 test_true_pfm: 2890.358714865088 sim_pfm: 8.28494548848054
episode: 192 training return: tensor(291.5037, device='cuda:0')
episode: 193 training return: tensor(263.2556, device='cuda:0')
episode: 194 training return: tensor(-151.2121, device='cuda:0')
episode: 195 training return: tensor(-98.6817, device='cuda:0')
epoch: 49 test_true_pfm: 2292.693595081136 sim_pfm: 264.9622724744161
episode: 196 training return: tensor(-375.0319, device='cuda:0')
episode: 197 training return: tensor(235.6158, device='cuda:0')
episode: 198 training return: tensor(-282.6039, device='cuda:0')
episode: 199 training return: tensor(-43.6560, device='cuda:0')
epoch: 50 test_true_pfm: 2761.5839041545787 sim_pfm: 136.74779701710213
episode: 200 training return: tensor(-323.6760, device='cuda:0')
episode: 201 training return: tensor(-128.0335, device='cuda:0')
episode: 202 training return: tensor(-27.4479, device='cuda:0')
episode: 203 training return: tensor(-255.3251, device='cuda:0')
epoch: 51 test_true_pfm: 3200.2604551473178 sim_pfm: 205.89313002175186
episode: 204 training return: tensor(278.2289, device='cuda:0')
episode: 205 training return: tensor(-285.4743, device='cuda:0')
episode: 206 training return: tensor(-274.9348, device='cuda:0')
episode: 207 training return: tensor(266.4176, device='cuda:0')
epoch: 52 test_true_pfm: 2799.3822766832604 sim_pfm: 154.54506858162736
episode: 208 training return: tensor(-206.2507, device='cuda:0')
episode: 209 training return: tensor(-342.6906, device='cuda:0')
episode: 210 training return: tensor(273.1866, device='cuda:0')
episode: 211 training return: tensor(-195.6296, device='cuda:0')
epoch: 53 test_true_pfm: 3069.553649562467 sim_pfm: 106.51656479336089
episode: 212 training return: tensor(-83.9729, device='cuda:0')
episode: 213 training return: tensor(-183.8496, device='cuda:0')
episode: 214 training return: tensor(-344.0737, device='cuda:0')
episode: 215 training return: tensor(276.7355, device='cuda:0')
epoch: 54 test_true_pfm: 2905.5218513187524 sim_pfm: 94.67350804778592
episode: 216 training return: tensor(-11.2256, device='cuda:0')
episode: 217 training return: tensor(282.3376, device='cuda:0')
episode: 218 training return: tensor(-310.1934, device='cuda:0')
episode: 219 training return: tensor(-105.9897, device='cuda:0')
epoch: 55 test_true_pfm: 2771.061890019994 sim_pfm: 219.99948628856023
episode: 220 training return: tensor(-249.9412, device='cuda:0')
episode: 221 training return: tensor(289.9708, device='cuda:0')
episode: 222 training return: tensor(291.6411, device='cuda:0')
episode: 223 training return: tensor(202.9463, device='cuda:0')
epoch: 56 test_true_pfm: 2907.193277082795 sim_pfm: 277.78474497511826
episode: 224 training return: tensor(-294.0484, device='cuda:0')
episode: 225 training return: tensor(235.2166, device='cuda:0')
episode: 226 training return: tensor(252.9246, device='cuda:0')
episode: 227 training return: tensor(-101.5599, device='cuda:0')
epoch: 57 test_true_pfm: 3102.761405132951 sim_pfm: 289.6757552147028
episode: 228 training return: tensor(-367.9877, device='cuda:0')
episode: 229 training return: tensor(-299.8458, device='cuda:0')
episode: 230 training return: tensor(-152.1525, device='cuda:0')
episode: 231 training return: tensor(258.0293, device='cuda:0')
epoch: 58 test_true_pfm: 2746.8997571285217 sim_pfm: 243.17865867743967
episode: 232 training return: tensor(-333.9049, device='cuda:0')
episode: 233 training return: tensor(-308.3686, device='cuda:0')
episode: 234 training return: tensor(-62.0494, device='cuda:0')
episode: 235 training return: tensor(76.4062, device='cuda:0')
epoch: 59 test_true_pfm: 3081.6517304984113 sim_pfm: 55.94718428373259
episode: 236 training return: tensor(-210.9862, device='cuda:0')
episode: 237 training return: tensor(231.6411, device='cuda:0')
episode: 238 training return: tensor(-309.1385, device='cuda:0')
episode: 239 training return: tensor(-370.1777, device='cuda:0')
epoch: 60 test_true_pfm: 2183.174686513106 sim_pfm: 169.64759868841307
episode: 240 training return: tensor(0.6587, device='cuda:0')
episode: 241 training return: tensor(-343.4973, device='cuda:0')
episode: 242 training return: tensor(-16.7159, device='cuda:0')
episode: 243 training return: tensor(-331.4605, device='cuda:0')
epoch: 61 test_true_pfm: 3190.9299328691595 sim_pfm: 289.5396067300753
episode: 244 training return: tensor(-2.1997, device='cuda:0')
episode: 245 training return: tensor(266.9382, device='cuda:0')
episode: 246 training return: tensor(-155.4248, device='cuda:0')
episode: 247 training return: tensor(-73.6855, device='cuda:0')
epoch: 62 test_true_pfm: 2960.5660772910037 sim_pfm: 305.19341194839217
episode: 248 training return: tensor(-59.3718, device='cuda:0')
episode: 249 training return: tensor(-315.6264, device='cuda:0')
episode: 250 training return: tensor(-327.9031, device='cuda:0')
episode: 251 training return: tensor(59.6104, device='cuda:0')
epoch: 63 test_true_pfm: 2823.9619745913874 sim_pfm: 166.79478563562347
episode: 252 training return: tensor(-252.9536, device='cuda:0')
episode: 253 training return: tensor(-280.3142, device='cuda:0')
episode: 254 training return: tensor(-328.6783, device='cuda:0')
episode: 255 training return: tensor(309.3867, device='cuda:0')
epoch: 64 test_true_pfm: 2711.978987237953 sim_pfm: 279.79381520180806
episode: 256 training return: tensor(296.7375, device='cuda:0')
episode: 257 training return: tensor(-313.1847, device='cuda:0')
episode: 258 training return: tensor(-293.8784, device='cuda:0')
episode: 259 training return: tensor(-154.8399, device='cuda:0')
epoch: 65 test_true_pfm: 3218.589130141263 sim_pfm: 231.40263751719613
episode: 260 training return: tensor(-315.6974, device='cuda:0')
episode: 261 training return: tensor(130.0370, device='cuda:0')
episode: 262 training return: tensor(-24.9850, device='cuda:0')
episode: 263 training return: tensor(185.8939, device='cuda:0')
epoch: 66 test_true_pfm: 2550.708733587272 sim_pfm: 62.76272174318243
episode: 264 training return: tensor(-344.9131, device='cuda:0')
episode: 265 training return: tensor(-325.1963, device='cuda:0')
episode: 266 training return: tensor(-372.0639, device='cuda:0')
episode: 267 training return: tensor(-289.5655, device='cuda:0')
epoch: 67 test_true_pfm: 2549.3839466046406 sim_pfm: 72.68004080511552
episode: 268 training return: tensor(348.9098, device='cuda:0')
episode: 269 training return: tensor(298.1093, device='cuda:0')
episode: 270 training return: tensor(-368.9277, device='cuda:0')
episode: 271 training return: tensor(-372.7299, device='cuda:0')
epoch: 68 test_true_pfm: 3180.6596073747 sim_pfm: 218.68020899644276
episode: 272 training return: tensor(186.4241, device='cuda:0')
episode: 273 training return: tensor(0.4077, device='cuda:0')
episode: 274 training return: tensor(156.0725, device='cuda:0')
episode: 275 training return: tensor(185.0353, device='cuda:0')
epoch: 69 test_true_pfm: 2810.4631492742997 sim_pfm: 189.89100649351408
episode: 276 training return: tensor(180.3652, device='cuda:0')
episode: 277 training return: tensor(-272.1052, device='cuda:0')
episode: 278 training return: tensor(-191.9622, device='cuda:0')
episode: 279 training return: tensor(219.9126, device='cuda:0')
epoch: 70 test_true_pfm: 3235.879859483976 sim_pfm: 142.41937785983706
episode: 280 training return: tensor(-242.8715, device='cuda:0')
episode: 281 training return: tensor(288.0489, device='cuda:0')
episode: 282 training return: tensor(-369.6085, device='cuda:0')
episode: 283 training return: tensor(282.7425, device='cuda:0')
epoch: 71 test_true_pfm: 3027.6067236235235 sim_pfm: 74.74250286685613
episode: 284 training return: tensor(288.4292, device='cuda:0')
episode: 285 training return: tensor(-312.6794, device='cuda:0')
episode: 286 training return: tensor(-381.3040, device='cuda:0')
episode: 287 training return: tensor(246.6874, device='cuda:0')
epoch: 72 test_true_pfm: 2718.848464167766 sim_pfm: 179.27452832943527
episode: 288 training return: tensor(274.3263, device='cuda:0')
episode: 289 training return: tensor(-137.5374, device='cuda:0')
episode: 290 training return: tensor(-374.3895, device='cuda:0')
episode: 291 training return: tensor(117.9134, device='cuda:0')
epoch: 73 test_true_pfm: 2344.1770925719065 sim_pfm: 96.57740414158131
episode: 292 training return: tensor(325.3091, device='cuda:0')
episode: 293 training return: tensor(-114.2366, device='cuda:0')
episode: 294 training return: tensor(-247.2577, device='cuda:0')
episode: 295 training return: tensor(203.2115, device='cuda:0')
epoch: 74 test_true_pfm: 2581.979706820371 sim_pfm: 246.27578073000768
episode: 296 training return: tensor(-103.5814, device='cuda:0')
episode: 297 training return: tensor(270.1232, device='cuda:0')
episode: 298 training return: tensor(-364.6622, device='cuda:0')
episode: 299 training return: tensor(305.5305, device='cuda:0')
epoch: 75 test_true_pfm: 2794.031212035364 sim_pfm: 177.90243795888577
episode: 300 training return: tensor(114.1499, device='cuda:0')
episode: 301 training return: tensor(331.4438, device='cuda:0')
episode: 302 training return: tensor(26.5472, device='cuda:0')
episode: 303 training return: tensor(-312.7477, device='cuda:0')
epoch: 76 test_true_pfm: 2542.100112706477 sim_pfm: -10.8151144227013
episode: 304 training return: tensor(302.5541, device='cuda:0')
episode: 305 training return: tensor(236.5573, device='cuda:0')
episode: 306 training return: tensor(-337.0471, device='cuda:0')
episode: 307 training return: tensor(-287.2776, device='cuda:0')
epoch: 77 test_true_pfm: 2968.9979449103753 sim_pfm: 162.62268977598674
episode: 308 training return: tensor(-291.8917, device='cuda:0')
episode: 309 training return: tensor(-149.0785, device='cuda:0')
episode: 310 training return: tensor(268.2043, device='cuda:0')
episode: 311 training return: tensor(-184.6217, device='cuda:0')
epoch: 78 test_true_pfm: 2863.9328468715553 sim_pfm: 232.95083491687546
episode: 312 training return: tensor(-341.8857, device='cuda:0')
episode: 313 training return: tensor(262.9652, device='cuda:0')
episode: 314 training return: tensor(-107.3524, device='cuda:0')
episode: 315 training return: tensor(281.1315, device='cuda:0')
epoch: 79 test_true_pfm: 3049.811417461682 sim_pfm: 231.25225401484445
episode: 316 training return: tensor(-88.4410, device='cuda:0')
episode: 317 training return: tensor(-325.9923, device='cuda:0')
episode: 318 training return: tensor(-316.7019, device='cuda:0')
episode: 319 training return: tensor(-298.0497, device='cuda:0')
epoch: 80 test_true_pfm: 2622.5360788042594 sim_pfm: 275.3731324834128
episode: 320 training return: tensor(-8.5120, device='cuda:0')
episode: 321 training return: tensor(-365.8372, device='cuda:0')
episode: 322 training return: tensor(-265.2657, device='cuda:0')
episode: 323 training return: tensor(249.6743, device='cuda:0')
epoch: 81 test_true_pfm: 2887.4126811235164 sim_pfm: 276.2730726238903
episode: 324 training return: tensor(156.9465, device='cuda:0')
episode: 325 training return: tensor(31.8971, device='cuda:0')
episode: 326 training return: tensor(-372.5326, device='cuda:0')
episode: 327 training return: tensor(-372.2141, device='cuda:0')
epoch: 82 test_true_pfm: 2703.950731668954 sim_pfm: 165.42947621499965
episode: 328 training return: tensor(277.1579, device='cuda:0')
episode: 329 training return: tensor(300.7916, device='cuda:0')
episode: 330 training return: tensor(-291.8129, device='cuda:0')
episode: 331 training return: tensor(-344.7276, device='cuda:0')
epoch: 83 test_true_pfm: 3129.71265063216 sim_pfm: 243.5212079717118
episode: 332 training return: tensor(-88.2538, device='cuda:0')
episode: 333 training return: tensor(267.4184, device='cuda:0')
episode: 334 training return: tensor(-87.2973, device='cuda:0')
episode: 335 training return: tensor(-315.1633, device='cuda:0')
epoch: 84 test_true_pfm: 3149.658531427375 sim_pfm: 224.07739985262742
episode: 336 training return: tensor(337.8690, device='cuda:0')
episode: 337 training return: tensor(-269.4002, device='cuda:0')
episode: 338 training return: tensor(160.9152, device='cuda:0')
episode: 339 training return: tensor(-284.9997, device='cuda:0')
epoch: 85 test_true_pfm: 2936.3643264746606 sim_pfm: 265.8270237230851
episode: 340 training return: tensor(316.4112, device='cuda:0')
episode: 341 training return: tensor(-116.5011, device='cuda:0')
episode: 342 training return: tensor(-344.8135, device='cuda:0')
episode: 343 training return: tensor(-376.2125, device='cuda:0')
epoch: 86 test_true_pfm: 2942.209109058114 sim_pfm: 69.01939713872464
episode: 344 training return: tensor(-318.9452, device='cuda:0')
episode: 345 training return: tensor(51.1035, device='cuda:0')
episode: 346 training return: tensor(-196.2758, device='cuda:0')
episode: 347 training return: tensor(-287.9873, device='cuda:0')
epoch: 87 test_true_pfm: 2784.0724173786475 sim_pfm: 190.65232581237797
episode: 348 training return: tensor(-125.0745, device='cuda:0')
episode: 349 training return: tensor(-325.2266, device='cuda:0')
episode: 350 training return: tensor(279.7194, device='cuda:0')
episode: 351 training return: tensor(-327.2285, device='cuda:0')
epoch: 88 test_true_pfm: 2521.8990815561788 sim_pfm: 42.303379099913094
episode: 352 training return: tensor(-171.3510, device='cuda:0')
episode: 353 training return: tensor(255.9267, device='cuda:0')
episode: 354 training return: tensor(294.0590, device='cuda:0')
episode: 355 training return: tensor(-374.3294, device='cuda:0')
epoch: 89 test_true_pfm: 2674.1999176965114 sim_pfm: 114.09219905902864
episode: 356 training return: tensor(-311.8738, device='cuda:0')
episode: 357 training return: tensor(113.5557, device='cuda:0')
episode: 358 training return: tensor(303.7941, device='cuda:0')
episode: 359 training return: tensor(167.2750, device='cuda:0')
epoch: 90 test_true_pfm: 3180.921204661347 sim_pfm: 125.89823846427801
episode: 360 training return: tensor(-139.2111, device='cuda:0')
episode: 361 training return: tensor(90.1117, device='cuda:0')
episode: 362 training return: tensor(260.4044, device='cuda:0')
episode: 363 training return: tensor(22.7833, device='cuda:0')
epoch: 91 test_true_pfm: 2668.1728565465487 sim_pfm: 161.92275836986178
episode: 364 training return: tensor(-366.8898, device='cuda:0')
episode: 365 training return: tensor(118.5684, device='cuda:0')
episode: 366 training return: tensor(-155.3087, device='cuda:0')
episode: 367 training return: tensor(-367.5659, device='cuda:0')
epoch: 92 test_true_pfm: 3033.8881411866346 sim_pfm: -0.5193785017666718
episode: 368 training return: tensor(-142.2746, device='cuda:0')
episode: 369 training return: tensor(-328.2913, device='cuda:0')
episode: 370 training return: tensor(25.9185, device='cuda:0')
episode: 371 training return: tensor(-322.4713, device='cuda:0')
epoch: 93 test_true_pfm: 2723.228889581664 sim_pfm: 283.10032323453925
episode: 372 training return: tensor(-193.5918, device='cuda:0')
episode: 373 training return: tensor(277.7038, device='cuda:0')
episode: 374 training return: tensor(271.2031, device='cuda:0')
episode: 375 training return: tensor(283.0156, device='cuda:0')
epoch: 94 test_true_pfm: 3084.1989358066 sim_pfm: 208.6468090347868
episode: 376 training return: tensor(94.6604, device='cuda:0')
episode: 377 training return: tensor(-97.5922, device='cuda:0')
episode: 378 training return: tensor(153.4720, device='cuda:0')
episode: 379 training return: tensor(-81.1884, device='cuda:0')
epoch: 95 test_true_pfm: 2801.979523277003 sim_pfm: 114.92705509780596
episode: 380 training return: tensor(-290.3123, device='cuda:0')
episode: 381 training return: tensor(6.5284, device='cuda:0')
episode: 382 training return: tensor(193.1285, device='cuda:0')
episode: 383 training return: tensor(-370.7610, device='cuda:0')
epoch: 96 test_true_pfm: 2955.0915945638367 sim_pfm: 201.8748212948752
episode: 384 training return: tensor(-367.7771, device='cuda:0')
episode: 385 training return: tensor(267.5520, device='cuda:0')
episode: 386 training return: tensor(-127.7353, device='cuda:0')
episode: 387 training return: tensor(296.6348, device='cuda:0')
epoch: 97 test_true_pfm: 3191.4796068864507 sim_pfm: 27.128796766744927
episode: 388 training return: tensor(-282.2061, device='cuda:0')
episode: 389 training return: tensor(-98.2212, device='cuda:0')
episode: 390 training return: tensor(-238.3070, device='cuda:0')
episode: 391 training return: tensor(258.5476, device='cuda:0')
epoch: 98 test_true_pfm: 2668.0186502345728 sim_pfm: 10.01303190466327
episode: 392 training return: tensor(80.4141, device='cuda:0')
episode: 393 training return: tensor(92.0542, device='cuda:0')
episode: 394 training return: tensor(-240.5095, device='cuda:0')
episode: 395 training return: tensor(-316.9272, device='cuda:0')
epoch: 99 test_true_pfm: 2654.174112228088 sim_pfm: 175.2858699594702
episode: 396 training return: tensor(261.2084, device='cuda:0')
episode: 397 training return: tensor(252.1684, device='cuda:0')
episode: 398 training return: tensor(240.6884, device='cuda:0')
episode: 399 training return: tensor(214.1466, device='cuda:0')
epoch: 100 test_true_pfm: 2679.455237580854 sim_pfm: 84.2063918436567
episode: 400 training return: tensor(-377.0002, device='cuda:0')
episode: 401 training return: tensor(-57.8633, device='cuda:0')
episode: 402 training return: tensor(-278.6889, device='cuda:0')
episode: 403 training return: tensor(-109.2618, device='cuda:0')
epoch: 101 test_true_pfm: 3074.2222564075423 sim_pfm: 109.83891661977395
episode: 404 training return: tensor(-321.9010, device='cuda:0')
episode: 405 training return: tensor(-193.4109, device='cuda:0')
episode: 406 training return: tensor(275.7653, device='cuda:0')
episode: 407 training return: tensor(-331.2261, device='cuda:0')
epoch: 102 test_true_pfm: 2505.799091918359 sim_pfm: 186.5845950987617
episode: 408 training return: tensor(-288.5355, device='cuda:0')
episode: 409 training return: tensor(-331.6329, device='cuda:0')
episode: 410 training return: tensor(-115.7518, device='cuda:0')
episode: 411 training return: tensor(250.3534, device='cuda:0')
epoch: 103 test_true_pfm: 3224.5905902504624 sim_pfm: 288.1597049141225
episode: 412 training return: tensor(-356.1675, device='cuda:0')
episode: 413 training return: tensor(291.3136, device='cuda:0')
episode: 414 training return: tensor(-28.9529, device='cuda:0')
episode: 415 training return: tensor(-201.9308, device='cuda:0')
epoch: 104 test_true_pfm: 2785.668866249478 sim_pfm: 60.88528840756044
episode: 416 training return: tensor(282.0802, device='cuda:0')
episode: 417 training return: tensor(-199.5666, device='cuda:0')
episode: 418 training return: tensor(266.9576, device='cuda:0')
episode: 419 training return: tensor(-193.6870, device='cuda:0')
epoch: 105 test_true_pfm: 2942.273679263077 sim_pfm: 107.65068821585737
episode: 420 training return: tensor(-369.4259, device='cuda:0')
episode: 421 training return: tensor(-280.3014, device='cuda:0')
episode: 422 training return: tensor(-137.2049, device='cuda:0')
episode: 423 training return: tensor(268.4261, device='cuda:0')
epoch: 106 test_true_pfm: 3160.4030464017646 sim_pfm: 252.44921280129347
episode: 424 training return: tensor(-314.4216, device='cuda:0')
episode: 425 training return: tensor(-270.1110, device='cuda:0')
episode: 426 training return: tensor(307.8734, device='cuda:0')
episode: 427 training return: tensor(-370.9236, device='cuda:0')
epoch: 107 test_true_pfm: 2526.724370172043 sim_pfm: 166.0756412789245
episode: 428 training return: tensor(-333.0561, device='cuda:0')
episode: 429 training return: tensor(-290.5670, device='cuda:0')
episode: 430 training return: tensor(-370.9006, device='cuda:0')
episode: 431 training return: tensor(294.9819, device='cuda:0')
epoch: 108 test_true_pfm: 3241.0515179906906 sim_pfm: 250.42296770080188
episode: 432 training return: tensor(249.3108, device='cuda:0')
episode: 433 training return: tensor(-279.2376, device='cuda:0')
episode: 434 training return: tensor(284.5111, device='cuda:0')
episode: 435 training return: tensor(-136.9736, device='cuda:0')
epoch: 109 test_true_pfm: 3192.6557012394064 sim_pfm: 158.50112826595432
episode: 436 training return: tensor(254.6016, device='cuda:0')
episode: 437 training return: tensor(325.0495, device='cuda:0')
episode: 438 training return: tensor(277.0086, device='cuda:0')
episode: 439 training return: tensor(298.4992, device='cuda:0')
epoch: 110 test_true_pfm: 3008.985293423319 sim_pfm: 269.62768794918276
episode: 440 training return: tensor(343.9821, device='cuda:0')
episode: 441 training return: tensor(271.8592, device='cuda:0')
episode: 442 training return: tensor(37.5602, device='cuda:0')
episode: 443 training return: tensor(253.6862, device='cuda:0')
epoch: 111 test_true_pfm: 3043.1324646218345 sim_pfm: 74.23206092463806
episode: 444 training return: tensor(-365.2768, device='cuda:0')
episode: 445 training return: tensor(164.7438, device='cuda:0')
episode: 446 training return: tensor(-107.2128, device='cuda:0')
episode: 447 training return: tensor(-371.0458, device='cuda:0')
epoch: 112 test_true_pfm: 3215.6989961042705 sim_pfm: 65.16980761664065
episode: 448 training return: tensor(-200.1616, device='cuda:0')
episode: 449 training return: tensor(-201.1210, device='cuda:0')
episode: 450 training return: tensor(257.6670, device='cuda:0')
episode: 451 training return: tensor(-36.9630, device='cuda:0')
epoch: 113 test_true_pfm: 2831.3603519733283 sim_pfm: 273.89099241946434
episode: 452 training return: tensor(-367.9543, device='cuda:0')
episode: 453 training return: tensor(-359.7172, device='cuda:0')
episode: 454 training return: tensor(-369.1076, device='cuda:0')
episode: 455 training return: tensor(-260.1554, device='cuda:0')
epoch: 114 test_true_pfm: 2749.2307201230687 sim_pfm: 283.13857833224273
episode: 456 training return: tensor(-344.4088, device='cuda:0')
episode: 457 training return: tensor(-12.7353, device='cuda:0')
episode: 458 training return: tensor(257.0096, device='cuda:0')
episode: 459 training return: tensor(159.2654, device='cuda:0')
epoch: 115 test_true_pfm: 3168.392200758442 sim_pfm: 234.7621321279245
episode: 460 training return: tensor(-71.8238, device='cuda:0')
episode: 461 training return: tensor(-280.2176, device='cuda:0')
episode: 462 training return: tensor(-377.1299, device='cuda:0')
episode: 463 training return: tensor(-127.8576, device='cuda:0')
epoch: 116 test_true_pfm: 3012.6319028514995 sim_pfm: 27.42476007299653
episode: 464 training return: tensor(-121.3249, device='cuda:0')
episode: 465 training return: tensor(331.3508, device='cuda:0')
episode: 466 training return: tensor(-295.0710, device='cuda:0')
episode: 467 training return: tensor(249.0872, device='cuda:0')
epoch: 117 test_true_pfm: 2967.243491291802 sim_pfm: 259.4010575908469
episode: 468 training return: tensor(-136.4807, device='cuda:0')
episode: 469 training return: tensor(59.9913, device='cuda:0')
episode: 470 training return: tensor(270.7171, device='cuda:0')
episode: 471 training return: tensor(-337.3210, device='cuda:0')
epoch: 118 test_true_pfm: 3235.8735947605164 sim_pfm: 259.2962524569496
episode: 472 training return: tensor(-352.4407, device='cuda:0')
episode: 473 training return: tensor(110.7160, device='cuda:0')
episode: 474 training return: tensor(-358.5439, device='cuda:0')
episode: 475 training return: tensor(-238.8937, device='cuda:0')
epoch: 119 test_true_pfm: 3175.1758125171086 sim_pfm: 207.46720571800446
episode: 476 training return: tensor(-343.6828, device='cuda:0')
episode: 477 training return: tensor(-369.7916, device='cuda:0')
episode: 478 training return: tensor(255.9851, device='cuda:0')
episode: 479 training return: tensor(219.0771, device='cuda:0')
epoch: 120 test_true_pfm: 2836.6458050266688 sim_pfm: 229.31389739940641
episode: 480 training return: tensor(329.3180, device='cuda:0')
episode: 481 training return: tensor(-229.2595, device='cuda:0')
episode: 482 training return: tensor(-64.4142, device='cuda:0')
episode: 483 training return: tensor(-286.4206, device='cuda:0')
epoch: 121 test_true_pfm: 2693.945079110337 sim_pfm: 22.38737513394638
episode: 484 training return: tensor(-325.6336, device='cuda:0')
episode: 485 training return: tensor(255.2359, device='cuda:0')
episode: 486 training return: tensor(-209.3904, device='cuda:0')
episode: 487 training return: tensor(274.3223, device='cuda:0')
epoch: 122 test_true_pfm: 2900.88642421439 sim_pfm: 203.2037018893558
episode: 488 training return: tensor(291.9691, device='cuda:0')
episode: 489 training return: tensor(19.7913, device='cuda:0')
episode: 490 training return: tensor(-314.1610, device='cuda:0')
episode: 491 training return: tensor(-366.6901, device='cuda:0')
epoch: 123 test_true_pfm: 2723.822304842347 sim_pfm: 119.44155061252725
episode: 492 training return: tensor(277.0026, device='cuda:0')
episode: 493 training return: tensor(-125.1656, device='cuda:0')
episode: 494 training return: tensor(-69.9422, device='cuda:0')
episode: 495 training return: tensor(61.5816, device='cuda:0')
epoch: 124 test_true_pfm: 2550.2682520732396 sim_pfm: 275.34351186834584
episode: 496 training return: tensor(-284.9466, device='cuda:0')
episode: 497 training return: tensor(-366.9532, device='cuda:0')
episode: 498 training return: tensor(327.2663, device='cuda:0')
episode: 499 training return: tensor(-4.1823, device='cuda:0')
epoch: 125 test_true_pfm: 2985.500230283325 sim_pfm: 246.17597934184596
episode: 500 training return: tensor(249.3173, device='cuda:0')
episode: 501 training return: tensor(-270.0096, device='cuda:0')
episode: 502 training return: tensor(-341.1731, device='cuda:0')
episode: 503 training return: tensor(262.0199, device='cuda:0')
epoch: 126 test_true_pfm: 2775.1760620058194 sim_pfm: 149.2762082054202
episode: 504 training return: tensor(-63.0885, device='cuda:0')
episode: 505 training return: tensor(28.8794, device='cuda:0')
episode: 506 training return: tensor(277.3682, device='cuda:0')
episode: 507 training return: tensor(-369.9380, device='cuda:0')
epoch: 127 test_true_pfm: 2778.7046473272535 sim_pfm: 201.22199996260073
episode: 508 training return: tensor(263.1320, device='cuda:0')
episode: 509 training return: tensor(286.4959, device='cuda:0')
episode: 510 training return: tensor(263.4311, device='cuda:0')
episode: 511 training return: tensor(253.0035, device='cuda:0')
epoch: 128 test_true_pfm: 3046.993407151685 sim_pfm: 257.17544993997825
episode: 512 training return: tensor(291.6458, device='cuda:0')
episode: 513 training return: tensor(301.3784, device='cuda:0')
episode: 514 training return: tensor(34.4987, device='cuda:0')
episode: 515 training return: tensor(-103.2074, device='cuda:0')
epoch: 129 test_true_pfm: 3150.0115696059015 sim_pfm: 176.31775789917447
episode: 516 training return: tensor(-369.0256, device='cuda:0')
episode: 517 training return: tensor(-371.3669, device='cuda:0')
episode: 518 training return: tensor(-344.9483, device='cuda:0')
episode: 519 training return: tensor(212.7222, device='cuda:0')
epoch: 130 test_true_pfm: 2976.1591817776275 sim_pfm: 184.40016984906592
episode: 520 training return: tensor(-242.5767, device='cuda:0')
episode: 521 training return: tensor(-372.7251, device='cuda:0')
episode: 522 training return: tensor(-22.1527, device='cuda:0')
episode: 523 training return: tensor(-341.1593, device='cuda:0')
epoch: 131 test_true_pfm: 2964.3441225096285 sim_pfm: 230.09917557184235
episode: 524 training return: tensor(-282.6355, device='cuda:0')
episode: 525 training return: tensor(-321.8617, device='cuda:0')
episode: 526 training return: tensor(248.1803, device='cuda:0')
episode: 527 training return: tensor(-11.9024, device='cuda:0')
epoch: 132 test_true_pfm: 3077.009785221726 sim_pfm: 172.6866586980177
episode: 528 training return: tensor(-332.0483, device='cuda:0')
episode: 529 training return: tensor(3.7605, device='cuda:0')
episode: 530 training return: tensor(-227.8119, device='cuda:0')
episode: 531 training return: tensor(304.3384, device='cuda:0')
epoch: 133 test_true_pfm: 2794.1547687984676 sim_pfm: 215.9618115637762
episode: 532 training return: tensor(260.0538, device='cuda:0')
episode: 533 training return: tensor(-323.4162, device='cuda:0')
episode: 534 training return: tensor(-253.2058, device='cuda:0')
episode: 535 training return: tensor(-365.9717, device='cuda:0')
epoch: 134 test_true_pfm: 3135.2769339817946 sim_pfm: 221.6318779854822
episode: 536 training return: tensor(-373.0034, device='cuda:0')
episode: 537 training return: tensor(277.6989, device='cuda:0')
episode: 538 training return: tensor(199.7042, device='cuda:0')
episode: 539 training return: tensor(-117.8228, device='cuda:0')
epoch: 135 test_true_pfm: 3186.662622102229 sim_pfm: 258.22992564039305
episode: 540 training return: tensor(-305.4257, device='cuda:0')
episode: 541 training return: tensor(-92.8104, device='cuda:0')
episode: 542 training return: tensor(-143.1948, device='cuda:0')
episode: 543 training return: tensor(258.7827, device='cuda:0')
epoch: 136 test_true_pfm: 2809.9652731575156 sim_pfm: 234.89720113029276
episode: 544 training return: tensor(-181.6642, device='cuda:0')
episode: 545 training return: tensor(-125.5548, device='cuda:0')
episode: 546 training return: tensor(-374.6037, device='cuda:0')
episode: 547 training return: tensor(315.2652, device='cuda:0')
epoch: 137 test_true_pfm: 3174.8970579740558 sim_pfm: 276.00943890438066
episode: 548 training return: tensor(-321.5045, device='cuda:0')
episode: 549 training return: tensor(-305.6989, device='cuda:0')
episode: 550 training return: tensor(-181.6950, device='cuda:0')
episode: 551 training return: tensor(-327.5213, device='cuda:0')
epoch: 138 test_true_pfm: 3016.8201387903046 sim_pfm: 204.39046450149422
episode: 552 training return: tensor(320.3097, device='cuda:0')
episode: 553 training return: tensor(-329.0281, device='cuda:0')
episode: 554 training return: tensor(303.3413, device='cuda:0')
episode: 555 training return: tensor(-109.1516, device='cuda:0')
epoch: 139 test_true_pfm: 3177.62275935286 sim_pfm: 200.17550903640222
episode: 556 training return: tensor(307.2502, device='cuda:0')
episode: 557 training return: tensor(206.1195, device='cuda:0')
episode: 558 training return: tensor(-266.4798, device='cuda:0')
episode: 559 training return: tensor(301.2195, device='cuda:0')
epoch: 140 test_true_pfm: 2588.2462620025085 sim_pfm: 162.11324732688567
episode: 560 training return: tensor(-299.9043, device='cuda:0')
episode: 561 training return: tensor(-280.8108, device='cuda:0')
episode: 562 training return: tensor(295.5822, device='cuda:0')
episode: 563 training return: tensor(-156.8465, device='cuda:0')
epoch: 141 test_true_pfm: 3213.2047579149985 sim_pfm: 272.317894980195
episode: 564 training return: tensor(141.5062, device='cuda:0')
episode: 565 training return: tensor(-204.2421, device='cuda:0')
episode: 566 training return: tensor(118.2278, device='cuda:0')
episode: 567 training return: tensor(-365.8013, device='cuda:0')
epoch: 142 test_true_pfm: 2862.16649530072 sim_pfm: 152.26256072121518
episode: 568 training return: tensor(18.5180, device='cuda:0')
episode: 569 training return: tensor(234.2183, device='cuda:0')
episode: 570 training return: tensor(151.2020, device='cuda:0')
episode: 571 training return: tensor(-49.2294, device='cuda:0')
epoch: 143 test_true_pfm: 2770.795830666136 sim_pfm: 126.12074686967146
episode: 572 training return: tensor(330.1937, device='cuda:0')
episode: 573 training return: tensor(17.5543, device='cuda:0')
episode: 574 training return: tensor(309.5307, device='cuda:0')
episode: 575 training return: tensor(295.2975, device='cuda:0')
epoch: 144 test_true_pfm: 2659.150968292326 sim_pfm: 239.93076526125273
episode: 576 training return: tensor(283.9496, device='cuda:0')
episode: 577 training return: tensor(-305.1583, device='cuda:0')
episode: 578 training return: tensor(-107.0626, device='cuda:0')
episode: 579 training return: tensor(-213.8583, device='cuda:0')
epoch: 145 test_true_pfm: 3057.488746334084 sim_pfm: 195.03839565747572
episode: 580 training return: tensor(-264.6431, device='cuda:0')
episode: 581 training return: tensor(-338.0392, device='cuda:0')
episode: 582 training return: tensor(237.1305, device='cuda:0')
episode: 583 training return: tensor(257.7097, device='cuda:0')
epoch: 146 test_true_pfm: 3191.104292565258 sim_pfm: 281.76862129270256
episode: 584 training return: tensor(289.0196, device='cuda:0')
episode: 585 training return: tensor(-35.1297, device='cuda:0')
episode: 586 training return: tensor(189.3149, device='cuda:0')
episode: 587 training return: tensor(-212.5574, device='cuda:0')
epoch: 147 test_true_pfm: 3205.4993947193448 sim_pfm: 138.7601544034745
episode: 588 training return: tensor(158.4489, device='cuda:0')
episode: 589 training return: tensor(240.7769, device='cuda:0')
episode: 590 training return: tensor(-237.2247, device='cuda:0')
episode: 591 training return: tensor(-145.4330, device='cuda:0')
epoch: 148 test_true_pfm: 2837.517247134138 sim_pfm: 279.7049522693269
episode: 592 training return: tensor(192.3659, device='cuda:0')
episode: 593 training return: tensor(-285.7927, device='cuda:0')
episode: 594 training return: tensor(-336.4683, device='cuda:0')
episode: 595 training return: tensor(279.0112, device='cuda:0')
epoch: 149 test_true_pfm: 3081.3444641149094 sim_pfm: 216.86891673516948
episode: 596 training return: tensor(-375.7885, device='cuda:0')
episode: 597 training return: tensor(272.5608, device='cuda:0')
episode: 598 training return: tensor(-273.5121, device='cuda:0')
episode: 599 training return: tensor(-96.6132, device='cuda:0')
epoch: 150 test_true_pfm: 2997.13323918236 sim_pfm: 113.48577327323922
