['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '0', '--data', '3000', '--sub']
epoch: 0 training_loss 0.2543376006931066 test_loss: 0.1030652642250061
epoch: 1 training_loss 0.14856575891375542 test_loss: 0.07113362550735473
epoch: 2 training_loss 0.11725493893027306 test_loss: 0.07842331528663635
epoch: 3 training_loss 0.1054547031223774 test_loss: 0.06842338442802429
epoch: 4 training_loss 0.10653503399342298 test_loss: 0.0681963562965393
epoch: 5 training_loss 0.11548604462295771 test_loss: 0.0669455111026764
epoch: 6 training_loss 0.10254195243120194 test_loss: 0.06360714435577393
epoch: 7 training_loss 0.09711387805640698 test_loss: 0.07992032766342164
epoch: 8 training_loss 0.09061900394037366 test_loss: 0.0736100435256958
epoch: 9 training_loss 0.08903134077787399 test_loss: 0.06757086515426636
epoch: 10 training_loss 0.07460228592157364 test_loss: 0.09422070384025574
epoch: 11 training_loss 0.09780689653009177 test_loss: 0.0737663209438324
epoch: 12 training_loss 0.08189419845119118 test_loss: 0.06361159682273865
epoch: 13 training_loss 0.08755521597340703 test_loss: 0.08117464184761047
epoch: 14 training_loss 0.06996316952630878 test_loss: 0.07529886960983276
epoch: 15 training_loss 0.07980013612657785 test_loss: 0.08388803005218506
epoch: 16 training_loss 0.07079297056421638 test_loss: 0.07630323767662048
epoch: 17 training_loss 0.07169839899986982 test_loss: 0.07680971026420594
epoch: 18 training_loss 0.07159391239285469 test_loss: 0.07318605184555053
epoch: 19 training_loss 0.06333619432523846 test_loss: 0.07717978358268737
epoch: 20 training_loss 0.07034082202240825 test_loss: 0.08045530319213867
epoch: 21 training_loss 0.06629855981096626 test_loss: 0.06974358558654785
epoch: 22 training_loss 0.06789807154797017 test_loss: 0.07604204416275025
epoch: 23 training_loss 0.06750422157347202 test_loss: 0.06747626066207886
epoch: 24 training_loss 0.06118978081271052 test_loss: 0.08701683282852173
epoch: 25 training_loss 0.059895993331447246 test_loss: 0.0784801185131073
epoch: 26 training_loss 0.06290180440992117 test_loss: 0.07127217054367066
epoch: 27 training_loss 0.053855798272415996 test_loss: 0.07610533833503723
epoch: 28 training_loss 0.05602289055474102 test_loss: 0.08926472067832947
epoch: 29 training_loss 0.06439827920868993 test_loss: 0.07934783101081848
epoch: 30 training_loss 0.05968930582515895 test_loss: 0.08538730144500732
epoch: 31 training_loss 0.052112030163407326 test_loss: 0.0788145661354065
epoch: 32 training_loss 0.05136067450046539 test_loss: 0.08073425889015198
epoch: 33 training_loss 0.04954231944866479 test_loss: 0.09105991125106812
epoch: 34 training_loss 0.05464195262640715 test_loss: 0.09925398230552673
epoch: 35 training_loss 0.05207182859070599 test_loss: 0.08039996027946472
epoch: 36 training_loss 0.05004504971206188 test_loss: 0.09400911927223206
epoch: 37 training_loss 0.04731927921064198 test_loss: 0.08929085731506348
epoch: 38 training_loss 0.04996886437758803 test_loss: 0.07609483599662781
epoch: 39 training_loss 0.05741495837457478 test_loss: 0.07568660378456116
epoch: 40 training_loss 0.0458855654951185 test_loss: 0.10409729480743408
epoch: 41 training_loss 0.04885304816998541 test_loss: 0.088661789894104
epoch: 42 training_loss 0.04996228112839162 test_loss: 0.07952746748924255
epoch: 43 training_loss 0.04157448641955853 test_loss: 0.09669051766395569
epoch: 44 training_loss 0.052501954687759284 test_loss: 0.09262363314628601
epoch: 45 training_loss 0.03535657369531691 test_loss: 0.09658570289611816
epoch: 46 training_loss 0.03930804938077927 test_loss: 0.09332796931266785
epoch: 47 training_loss 0.03720722792670131 test_loss: 0.08986837267875672
epoch: 48 training_loss 0.039322093571536244 test_loss: 0.10358858108520508
epoch: 49 training_loss 0.03951906859874725 test_loss: 0.10183490514755249
epoch: 50 training_loss 0.038172930264845494 test_loss: 0.10276696681976319
epoch: 51 training_loss 0.049322601445019244 test_loss: 0.09606063365936279
epoch: 52 training_loss 0.04510314662009478 test_loss: 0.10207945108413696
epoch: 53 training_loss 0.030013935118913652 test_loss: 0.0924474537372589
epoch: 54 training_loss 0.03832265549339354 test_loss: 0.076840341091156
epoch: 55 training_loss 0.033129072478041054 test_loss: 0.09842187166213989
epoch: 56 training_loss 0.029849072182551027 test_loss: 0.11828627586364746
epoch: 57 training_loss 0.03186673446092755 test_loss: 0.11688450574874878
epoch: 58 training_loss 0.03175881011877209 test_loss: 0.10385154485702515
epoch: 59 training_loss 0.03159775260835886 test_loss: 0.10791354179382324
epoch: 60 training_loss 0.034783559064380826 test_loss: 0.09695791602134704
epoch: 61 training_loss 0.027324219634756446 test_loss: 0.10624316930770875
epoch: 62 training_loss 0.02222333981655538 test_loss: 0.12354549169540405
epoch: 63 training_loss 0.023877308354713023 test_loss: 0.1132975697517395
epoch: 64 training_loss 0.023658770653419196 test_loss: 0.11594598293304444
epoch: 65 training_loss 0.026013215919956565 test_loss: 0.11294312477111816
epoch: 66 training_loss 0.03200972457882017 test_loss: 0.1071509599685669
epoch: 67 training_loss 0.02584733140654862 test_loss: 0.11238502264022827
epoch: 68 training_loss 0.02448940829373896 test_loss: 0.12562180757522584
epoch: 69 training_loss 0.02322068952023983 test_loss: 0.13650918006896973
epoch: 70 training_loss 0.023693116838112472 test_loss: 0.1294209599494934
epoch: 71 training_loss 0.032129583444911985 test_loss: 0.11549845933914185
epoch: 72 training_loss 0.01900399654638022 test_loss: 0.12561098337173462
epoch: 73 training_loss 0.019381501241587103 test_loss: 0.12325116395950317
epoch: 74 training_loss 0.02047908290522173 test_loss: 0.1266080617904663
epoch: 75 training_loss 0.02252993662841618 test_loss: 0.12914952039718627
epoch: 76 training_loss 0.021500621158629656 test_loss: 0.11945410966873168
epoch: 77 training_loss 0.01902752954280004 test_loss: 0.11884106397628784
epoch: 78 training_loss 0.023119477960281073 test_loss: 0.1299746513366699
epoch: 79 training_loss 0.023823843603022397 test_loss: 0.13147019147872924
epoch: 80 training_loss 0.01635442577302456 test_loss: 0.11091021299362183
epoch: 81 training_loss 0.018973103361204266 test_loss: 0.13945000171661376
epoch: 82 training_loss 0.01620063348207623 test_loss: 0.13083040714263916
epoch: 83 training_loss 0.020914465030655263 test_loss: 0.12843817472457886
epoch: 84 training_loss 0.015391541589051485 test_loss: 0.13774914741516114
epoch: 85 training_loss 0.015167051975149662 test_loss: 0.14539297819137573
epoch: 86 training_loss 0.02375776524655521 test_loss: 0.12429403066635132
epoch: 87 training_loss 0.017647502527106552 test_loss: 0.13555464744567872
epoch: 88 training_loss 0.01391274926951155 test_loss: 0.13520337343215943
epoch: 89 training_loss 0.015829001395031808 test_loss: 0.13499773740768434
epoch: 90 training_loss 0.015246834987774491 test_loss: 0.13760735988616943
epoch: 91 training_loss 0.013854997863527388 test_loss: 0.14700448513031006
epoch: 92 training_loss 0.011108227517688647 test_loss: 0.15045521259307862
epoch: 93 training_loss 0.016848469430115073 test_loss: 0.14966051578521727
epoch: 94 training_loss 0.0185141562926583 test_loss: 0.13372789621353148
epoch: 95 training_loss 0.01380821666913107 test_loss: 0.15054314136505126
epoch: 96 training_loss 0.01880159136839211 test_loss: 0.14372646808624268
epoch: 97 training_loss 0.015566210967954248 test_loss: 0.14220393896102906
epoch: 98 training_loss 0.009509511993965134 test_loss: 0.14009941816329957
epoch: 99 training_loss 0.009947337303310632 test_loss: 0.14024667739868163
epoch: 100 training_loss 0.018862681130412964 test_loss: 0.14092762470245362
epoch: 101 training_loss 0.01783984009642154 test_loss: 0.13127501010894777
epoch: 102 training_loss 0.012793998264241964 test_loss: 0.15302449464797974
epoch: 103 training_loss 0.01007446711184457 test_loss: 0.1513805866241455
epoch: 104 training_loss 0.009369803046574816 test_loss: 0.14994964599609376
epoch: 105 training_loss 0.008192043513990938 test_loss: 0.14553085565567017
epoch: 106 training_loss 0.008616017072927206 test_loss: 0.15797382593154907
epoch: 107 training_loss 0.014040993268135935 test_loss: 0.145388925075531
epoch: 108 training_loss 0.009773502690950408 test_loss: 0.14338871240615844
epoch: 109 training_loss 0.01904713818570599 test_loss: 0.1454690456390381
epoch: 110 training_loss 0.02917613778728992 test_loss: 0.13322571516036988
epoch: 111 training_loss 0.019099097053986044 test_loss: 0.12360541820526123
epoch: 112 training_loss 0.010061942839529365 test_loss: 0.1564551830291748
epoch: 113 training_loss 0.00835886650485918 test_loss: 0.15709646940231323
epoch: 114 training_loss 0.01078097288031131 test_loss: 0.14742636680603027
epoch: 115 training_loss 0.009584159845253452 test_loss: 0.15409629344940184
epoch: 116 training_loss 0.010798727021319792 test_loss: 0.1336319088935852
epoch: 117 training_loss 0.014970417879521847 test_loss: 0.15554192066192626
epoch: 118 training_loss 0.011861464378889651 test_loss: 0.15589919090270996
epoch: 119 training_loss 0.00787822569021955 test_loss: 0.14668622016906738
epoch: 120 training_loss 0.006659011996816844 test_loss: 0.14399290084838867
epoch: 121 training_loss 0.0063160480908118185 test_loss: 0.1492850661277771
epoch: 122 training_loss 0.006185610799584538 test_loss: 0.15067753791809083
epoch: 123 training_loss 0.008156348622869701 test_loss: 0.14582701921463012
epoch: 124 training_loss 0.00917456288356334 test_loss: 0.16089515686035155
epoch: 125 training_loss 0.007595049936790019 test_loss: 0.1678943157196045
epoch: 126 training_loss 0.010743284826166927 test_loss: 0.15695310831069947
epoch: 127 training_loss 0.0319670342723839 test_loss: 0.1471693515777588
epoch: 128 training_loss 0.018097496293485164 test_loss: 0.1646694779396057
epoch: 129 training_loss 0.010908379860920832 test_loss: 0.1582239031791687
epoch: 130 training_loss 0.007132940770825371 test_loss: 0.15328046083450317
epoch: 131 training_loss 0.0057719611120410266 test_loss: 0.17094420194625853
epoch: 132 training_loss 0.007403054715832695 test_loss: 0.15730804204940796
epoch: 133 training_loss 0.009255894203670323 test_loss: 0.16212643384933473
epoch: 134 training_loss 0.006345251245656982 test_loss: 0.1767983317375183
epoch: 135 training_loss 0.008066774596227334 test_loss: 0.16028916835784912
epoch: 136 training_loss 0.00438581841182895 test_loss: 0.17202829122543334
epoch: 137 training_loss 0.00436408041161485 test_loss: 0.15698928833007814
epoch: 138 training_loss 0.00430462219635956 test_loss: 0.16558150053024293
epoch: 139 training_loss 0.004615487724076956 test_loss: 0.16757227182388307
epoch: 140 training_loss 0.00494376756483689 test_loss: 0.17494791746139526
epoch: 141 training_loss 0.015294812080683187 test_loss: 0.15189141035079956
epoch: 142 training_loss 0.04276047529419884 test_loss: 0.1320199489593506
epoch: 143 training_loss 0.02710301404353231 test_loss: 0.12348135709762573
epoch: 144 training_loss 0.013670410732738674 test_loss: 0.14383500814437866
epoch: 145 training_loss 0.008944659982807935 test_loss: 0.15356465578079223
epoch: 146 training_loss 0.007441187127842568 test_loss: 0.15212239027023317
epoch: 147 training_loss 0.0059543127100914715 test_loss: 0.1489650249481201
epoch: 148 training_loss 0.0049725673056673254 test_loss: 0.15257278680801392
epoch: 149 training_loss 0.004604169242084026 test_loss: 0.15772032737731934
epoch: 0 training_loss 36.90596441268921 test_loss: 9.979582977294921
epoch: 1 training_loss 17.13058207511902 test_loss: 6.793370056152344
epoch: 2 training_loss 12.823368349075317 test_loss: 5.50318717956543
epoch: 3 training_loss 10.415963706970215 test_loss: 4.660674285888672
epoch: 4 training_loss 9.134513239860535 test_loss: 4.268513870239258
epoch: 5 training_loss 8.089701976776123 test_loss: 3.8778587341308595
epoch: 6 training_loss 7.4786363792419435 test_loss: 3.6026172637939453
epoch: 7 training_loss 7.049703130722046 test_loss: 3.435626220703125
epoch: 8 training_loss 6.513552746772766 test_loss: 3.2355373382568358
epoch: 9 training_loss 6.1906543684005735 test_loss: 3.1157629013061525
epoch: 10 training_loss 5.854106884002686 test_loss: 2.956441307067871
epoch: 11 training_loss 5.748261551856995 test_loss: 2.8463520050048827
epoch: 12 training_loss 5.4014002799987795 test_loss: 2.731196975708008
epoch: 13 training_loss 5.228699254989624 test_loss: 2.6468196868896485
epoch: 14 training_loss 5.029774289131165 test_loss: 2.57276611328125
epoch: 15 training_loss 4.897829411029815 test_loss: 2.5038612365722654
epoch: 16 training_loss 4.722036669254303 test_loss: 2.462054443359375
epoch: 17 training_loss 4.619630527496338 test_loss: 2.411837577819824
epoch: 18 training_loss 4.436580481529236 test_loss: 2.3121063232421877
epoch: 19 training_loss 4.3835916686058045 test_loss: 2.286263275146484
epoch: 20 training_loss 4.253770039081574 test_loss: 2.221657371520996
epoch: 21 training_loss 4.181897954940796 test_loss: 2.1951894760131836
epoch: 22 training_loss 4.081582131385804 test_loss: 2.139089584350586
epoch: 23 training_loss 4.001987957954407 test_loss: 2.130414581298828
epoch: 24 training_loss 3.9164918732643126 test_loss: 2.0451536178588867
epoch: 25 training_loss 3.8012309503555297 test_loss: 2.0323190689086914
epoch: 26 training_loss 3.773144631385803 test_loss: 2.0017208099365233
epoch: 27 training_loss 3.7375140142440797 test_loss: 1.9994951248168946
epoch: 28 training_loss 3.6582445025444033 test_loss: 1.944124412536621
epoch: 29 training_loss 3.5339721965789797 test_loss: 1.9231708526611329
epoch: 30 training_loss 3.5031867837905883 test_loss: 1.881414031982422
epoch: 31 training_loss 3.477340967655182 test_loss: 1.8691551208496093
epoch: 32 training_loss 3.3453665018081664 test_loss: 1.8388074874877929
epoch: 33 training_loss 3.3581358861923216 test_loss: 1.8179000854492187
epoch: 34 training_loss 3.298507890701294 test_loss: 1.8060947418212892
epoch: 35 training_loss 3.2558530831336974 test_loss: 1.768674659729004
epoch: 36 training_loss 3.2460003685951233 test_loss: 1.7687332153320312
epoch: 37 training_loss 3.1928064584732057 test_loss: 1.7280460357666017
epoch: 38 training_loss 3.1521627426147463 test_loss: 1.7464105606079101
epoch: 39 training_loss 3.194347553253174 test_loss: 1.712173080444336
epoch: 40 training_loss 3.079645493030548 test_loss: 1.6994661331176757
epoch: 41 training_loss 3.0621081495285036 test_loss: 1.6804832458496093
epoch: 42 training_loss 3.0398260974884033 test_loss: 1.6899280548095703
epoch: 43 training_loss 3.0394580698013307 test_loss: 1.639546775817871
epoch: 44 training_loss 2.9798722648620606 test_loss: 1.6352678298950196
epoch: 45 training_loss 2.949456193447113 test_loss: 1.6600099563598634
epoch: 46 training_loss 2.9336272501945495 test_loss: 1.6179737091064452
epoch: 47 training_loss 2.894697360992432 test_loss: 1.5861329078674316
epoch: 48 training_loss 2.969223294258118 test_loss: 1.5891606330871582
epoch: 49 training_loss 2.899720687866211 test_loss: 1.5703157424926757
epoch: 50 training_loss 2.8484555149078368 test_loss: 1.5667323112487792
epoch: 51 training_loss 2.778549931049347 test_loss: 1.566378116607666
epoch: 52 training_loss 2.7367525339126586 test_loss: 1.5438620567321777
epoch: 53 training_loss 2.751460864543915 test_loss: 1.541173553466797
epoch: 54 training_loss 2.7671813011169433 test_loss: 1.5300164222717285
epoch: 55 training_loss 2.7486724185943605 test_loss: 1.5212482452392577
epoch: 56 training_loss 2.7069372487068177 test_loss: 1.5411535263061524
epoch: 57 training_loss 2.6619848251342773 test_loss: 1.5176645278930665
epoch: 58 training_loss 2.610759334564209 test_loss: 1.5012269020080566
epoch: 59 training_loss 2.684365015029907 test_loss: 1.4905264854431153
epoch: 60 training_loss 2.6507258796691895 test_loss: 1.483281135559082
epoch: 61 training_loss 2.5915538227558135 test_loss: 1.4724668502807616
epoch: 62 training_loss 2.5827866864204405 test_loss: 1.4791740417480468
epoch: 63 training_loss 2.5637491273880006 test_loss: 1.4748740196228027
epoch: 64 training_loss 2.5955472660064696 test_loss: 1.4407122611999512
epoch: 65 training_loss 2.525895125865936 test_loss: 1.4589661598205566
epoch: 66 training_loss 2.507770321369171 test_loss: 1.4470832824707032
epoch: 67 training_loss 2.4865099000930786 test_loss: 1.442065143585205
epoch: 68 training_loss 2.488008726835251 test_loss: 1.424039077758789
epoch: 69 training_loss 2.513850303888321 test_loss: 1.4468930244445801
epoch: 70 training_loss 2.487588317394257 test_loss: 1.4208640098571776
epoch: 71 training_loss 2.41809766292572 test_loss: 1.4319083213806152
epoch: 72 training_loss 2.435359480381012 test_loss: 1.4212525367736817
epoch: 73 training_loss 2.396568171977997 test_loss: 1.4285232543945312
epoch: 74 training_loss 2.418268294334412 test_loss: 1.3957839012145996
epoch: 75 training_loss 2.396921124458313 test_loss: 1.3936500549316406
epoch: 76 training_loss 2.4164898729324342 test_loss: 1.387210750579834
epoch: 77 training_loss 2.3778441905975343 test_loss: 1.3888283729553224
epoch: 78 training_loss 2.3941745841503144 test_loss: 1.398013210296631
epoch: 79 training_loss 2.361462653875351 test_loss: 1.3739030838012696
epoch: 80 training_loss 2.3113897705078124 test_loss: 1.3742991447448731
epoch: 81 training_loss 2.328738173246384 test_loss: 1.3572352409362793
epoch: 82 training_loss 2.322614459991455 test_loss: 1.3572925567626952
epoch: 83 training_loss 2.328515830039978 test_loss: 1.3633959770202637
epoch: 84 training_loss 2.2900337517261504 test_loss: 1.3598227500915527
epoch: 85 training_loss 2.295071676969528 test_loss: 1.3632834434509278
epoch: 86 training_loss 2.3141981387138366 test_loss: 1.359104061126709
epoch: 87 training_loss 2.3051345908641814 test_loss: 1.3886920928955078
epoch: 88 training_loss 2.258892992734909 test_loss: 1.3344101905822754
epoch: 89 training_loss 2.2507562720775605 test_loss: 1.3597548484802247
epoch: 90 training_loss 2.2668991339206697 test_loss: 1.3155899047851562
epoch: 91 training_loss 2.280040669441223 test_loss: 1.3429308891296388
epoch: 92 training_loss 2.247399550676346 test_loss: 1.3326159477233888
epoch: 93 training_loss 2.2157586205005644 test_loss: 1.3354668617248535
epoch: 94 training_loss 2.2167689502239227 test_loss: 1.325517463684082
epoch: 95 training_loss 2.2219119155406952 test_loss: 1.339116382598877
epoch: 96 training_loss 2.21327476143837 test_loss: 1.319196605682373
epoch: 97 training_loss 2.2001397156715394 test_loss: 1.3083653450012207
epoch: 98 training_loss 2.2024008309841157 test_loss: 1.3044816970825195
epoch: 99 training_loss 2.237799289226532 test_loss: 1.313774299621582
epoch: 100 training_loss 2.2126794481277465 test_loss: 1.3235891342163086
epoch: 101 training_loss 2.1957395577430727 test_loss: 1.3043665885925293
epoch: 102 training_loss 2.1843982267379762 test_loss: 1.3038043975830078
epoch: 103 training_loss 2.216222566366196 test_loss: 1.2908682823181152
epoch: 104 training_loss 2.1650085723400116 test_loss: 1.296210479736328
epoch: 105 training_loss 2.167314831018448 test_loss: 1.279536533355713
epoch: 106 training_loss 2.1487992346286773 test_loss: 1.2988579750061036
epoch: 107 training_loss 2.141233984231949 test_loss: 1.2889843940734864
epoch: 108 training_loss 2.165951337814331 test_loss: 1.276709747314453
epoch: 109 training_loss 2.1661844086647033 test_loss: 1.2691098213195802
epoch: 110 training_loss 2.100267380475998 test_loss: 1.2939437866210937
epoch: 111 training_loss 2.0751378405094147 test_loss: 1.2901010513305664
epoch: 112 training_loss 2.169498184919357 test_loss: 1.2833455085754395
epoch: 113 training_loss 2.1344667625427247 test_loss: 1.2811246871948243
epoch: 114 training_loss 2.1220131945610046 test_loss: 1.2935884475708008
epoch: 115 training_loss 2.116886693239212 test_loss: 1.2883575439453125
epoch: 116 training_loss 2.0714264059066774 test_loss: 1.2725975036621093
epoch: 117 training_loss 2.1123970341682434 test_loss: 1.2814413070678712
epoch: 118 training_loss 2.115775754451752 test_loss: 1.2851916313171388
epoch: 119 training_loss 2.1048690950870514 test_loss: 1.2589285850524903
epoch: 120 training_loss 2.056509299278259 test_loss: 1.2715516090393066
epoch: 121 training_loss 2.0464756202697756 test_loss: 1.2584747314453124
epoch: 122 training_loss 2.0611026000976564 test_loss: 1.2614863395690918
epoch: 123 training_loss 2.0384402644634245 test_loss: 1.2556397438049316
epoch: 124 training_loss 2.034148579835892 test_loss: 1.2581121444702148
epoch: 125 training_loss 2.092977559566498 test_loss: 1.2891465187072755
epoch: 126 training_loss 2.063947845697403 test_loss: 1.268143653869629
epoch: 127 training_loss 2.062312214374542 test_loss: 1.2386832237243652
epoch: 128 training_loss 2.027307977676392 test_loss: 1.2388691902160645
epoch: 129 training_loss 2.0629185688495637 test_loss: 1.2601383209228516
epoch: 130 training_loss 2.0376903438568115 test_loss: 1.2635760307312012
epoch: 131 training_loss 2.036398251056671 test_loss: 1.2705390930175782
epoch: 132 training_loss 2.0423484897613524 test_loss: 1.2645319938659667
epoch: 133 training_loss 1.9612960124015808 test_loss: 1.2386370658874513
epoch: 134 training_loss 2.024532153606415 test_loss: 1.2469978332519531
epoch: 135 training_loss 2.0445778262615204 test_loss: 1.2648557662963866
epoch: 136 training_loss 1.983196314573288 test_loss: 1.2535139083862306
epoch: 137 training_loss 2.009303388595581 test_loss: 1.2420957565307618
epoch: 138 training_loss 2.011573007106781 test_loss: 1.2525565147399902
epoch: 139 training_loss 2.00023432970047 test_loss: 1.2377854347229005
epoch: 140 training_loss 1.987677844762802 test_loss: 1.2285379409790038
epoch: 141 training_loss 1.970981215238571 test_loss: 1.2371475219726562
epoch: 142 training_loss 1.98276420712471 test_loss: 1.2563825607299806
epoch: 143 training_loss 2.008344888687134 test_loss: 1.260579490661621
epoch: 144 training_loss 1.9712189209461213 test_loss: 1.2349764823913574
epoch: 145 training_loss 1.9595436787605285 test_loss: 1.2318760871887207
epoch: 146 training_loss 1.9972148621082306 test_loss: 1.2572463035583497
epoch: 147 training_loss 2.0092556631565093 test_loss: 1.2522540092468262
epoch: 148 training_loss 1.9706158018112183 test_loss: 1.2480708122253419
epoch: 149 training_loss 1.9662955784797669 test_loss: 1.234518051147461
2259.732195614518
episode: 0 training return: tensor(111.6811, device='cuda:0')
episode: 1 training return: tensor(128.2270, device='cuda:0')
episode: 2 training return: tensor(218.9480, device='cuda:0')
episode: 3 training return: tensor(139.5137, device='cuda:0')
epoch: 1 test_true_pfm: 2464.855584334904 sim_pfm: -161.6278799183298
episode: 4 training return: tensor(-304.1847, device='cuda:0')
episode: 5 training return: tensor(-467.4055, device='cuda:0')
episode: 6 training return: tensor(-493.1811, device='cuda:0')
episode: 7 training return: tensor(264.0240, device='cuda:0')
epoch: 2 test_true_pfm: 2491.5060595885643 sim_pfm: -77.52168148336932
episode: 8 training return: tensor(-96.7601, device='cuda:0')
episode: 9 training return: tensor(237.7027, device='cuda:0')
episode: 10 training return: tensor(94.2397, device='cuda:0')
episode: 11 training return: tensor(182.9002, device='cuda:0')
epoch: 3 test_true_pfm: 2851.1942684857495 sim_pfm: -417.65432000680204
episode: 12 training return: tensor(268.4642, device='cuda:0')
episode: 13 training return: tensor(195.0882, device='cuda:0')
episode: 14 training return: tensor(238.4671, device='cuda:0')
episode: 15 training return: tensor(126.8241, device='cuda:0')
epoch: 4 test_true_pfm: 3121.1395028082675 sim_pfm: 313.90962231490994
episode: 16 training return: tensor(-123.6631, device='cuda:0')
episode: 17 training return: tensor(114.7329, device='cuda:0')
episode: 18 training return: tensor(-399.5462, device='cuda:0')
episode: 19 training return: tensor(-88.0093, device='cuda:0')
epoch: 5 test_true_pfm: 3202.310791861148 sim_pfm: -9.965646252424145
episode: 20 training return: tensor(-254.8240, device='cuda:0')
episode: 21 training return: tensor(204.4365, device='cuda:0')
episode: 22 training return: tensor(183.2167, device='cuda:0')
episode: 23 training return: tensor(-450.4557, device='cuda:0')
epoch: 6 test_true_pfm: 1592.7448566955318 sim_pfm: 41.466574220753195
episode: 24 training return: tensor(208.5074, device='cuda:0')
episode: 25 training return: tensor(-411.6094, device='cuda:0')
episode: 26 training return: tensor(157.4596, device='cuda:0')
episode: 27 training return: tensor(-450.8440, device='cuda:0')
epoch: 7 test_true_pfm: 1927.9662544163075 sim_pfm: -296.3301903351676
episode: 28 training return: tensor(199.5856, device='cuda:0')
episode: 29 training return: tensor(-220.5867, device='cuda:0')
episode: 30 training return: tensor(173.0544, device='cuda:0')
episode: 31 training return: tensor(-301.9511, device='cuda:0')
epoch: 8 test_true_pfm: 1985.3293331240686 sim_pfm: -45.959303702402394
episode: 32 training return: tensor(-302.3358, device='cuda:0')
episode: 33 training return: tensor(80.0934, device='cuda:0')
episode: 34 training return: tensor(-420.3458, device='cuda:0')
episode: 35 training return: tensor(-324.0320, device='cuda:0')
epoch: 9 test_true_pfm: 2151.1974996638523 sim_pfm: 57.50965481585202
episode: 36 training return: tensor(-137.1490, device='cuda:0')
episode: 37 training return: tensor(314.0371, device='cuda:0')
episode: 38 training return: tensor(-223.4570, device='cuda:0')
episode: 39 training return: tensor(267.1101, device='cuda:0')
epoch: 10 test_true_pfm: 2163.955431923767 sim_pfm: -155.71772995550418
episode: 40 training return: tensor(223.9224, device='cuda:0')
episode: 41 training return: tensor(-83.3512, device='cuda:0')
episode: 42 training return: tensor(164.6782, device='cuda:0')
episode: 43 training return: tensor(159.6294, device='cuda:0')
epoch: 11 test_true_pfm: 2548.5359628726615 sim_pfm: -97.27301324019209
episode: 44 training return: tensor(-449.9669, device='cuda:0')
episode: 45 training return: tensor(-218.6719, device='cuda:0')
episode: 46 training return: tensor(267.4117, device='cuda:0')
episode: 47 training return: tensor(4.9248, device='cuda:0')
epoch: 12 test_true_pfm: 1937.2630346188896 sim_pfm: 30.97512488286399
episode: 48 training return: tensor(-48.5523, device='cuda:0')
episode: 49 training return: tensor(20.6967, device='cuda:0')
episode: 50 training return: tensor(99.4000, device='cuda:0')
episode: 51 training return: tensor(-29.4567, device='cuda:0')
epoch: 13 test_true_pfm: 3267.198307510988 sim_pfm: -111.85005407367134
episode: 52 training return: tensor(-12.0583, device='cuda:0')
episode: 53 training return: tensor(-296.5866, device='cuda:0')
episode: 54 training return: tensor(-304.9139, device='cuda:0')
episode: 55 training return: tensor(286.5360, device='cuda:0')
epoch: 14 test_true_pfm: 2534.8652021732837 sim_pfm: 51.34623003366869
episode: 56 training return: tensor(-27.2514, device='cuda:0')
episode: 57 training return: tensor(-443.3640, device='cuda:0')
episode: 58 training return: tensor(351.6567, device='cuda:0')
episode: 59 training return: tensor(390.0609, device='cuda:0')
epoch: 15 test_true_pfm: 3200.1709700809884 sim_pfm: -110.08710157407525
episode: 60 training return: tensor(-232.9063, device='cuda:0')
episode: 61 training return: tensor(180.0855, device='cuda:0')
episode: 62 training return: tensor(-461.1967, device='cuda:0')
episode: 63 training return: tensor(-312.1205, device='cuda:0')
epoch: 16 test_true_pfm: 2493.4826447952505 sim_pfm: -159.85270525767314
episode: 64 training return: tensor(297.2339, device='cuda:0')
episode: 65 training return: tensor(233.6601, device='cuda:0')
episode: 66 training return: tensor(-317.2819, device='cuda:0')
episode: 67 training return: tensor(348.0492, device='cuda:0')
epoch: 17 test_true_pfm: 2855.388413207209 sim_pfm: 172.96840622800906
episode: 68 training return: tensor(230.1905, device='cuda:0')
episode: 69 training return: tensor(386.5529, device='cuda:0')
episode: 70 training return: tensor(-137.3916, device='cuda:0')
episode: 71 training return: tensor(232.8008, device='cuda:0')
epoch: 18 test_true_pfm: 2284.641169205663 sim_pfm: -90.76757138163278
episode: 72 training return: tensor(190.9324, device='cuda:0')
episode: 73 training return: tensor(366.9598, device='cuda:0')
episode: 74 training return: tensor(-382.5070, device='cuda:0')
episode: 75 training return: tensor(-273.0862, device='cuda:0')
epoch: 19 test_true_pfm: 2788.5939186637793 sim_pfm: 123.72195431729779
episode: 76 training return: tensor(285.2890, device='cuda:0')
episode: 77 training return: tensor(147.2522, device='cuda:0')
episode: 78 training return: tensor(-145.0914, device='cuda:0')
episode: 79 training return: tensor(-415.8059, device='cuda:0')
epoch: 20 test_true_pfm: 3069.7565443416056 sim_pfm: -76.00596808257978
episode: 80 training return: tensor(-406.0530, device='cuda:0')
episode: 81 training return: tensor(93.4566, device='cuda:0')
episode: 82 training return: tensor(276.9406, device='cuda:0')
episode: 83 training return: tensor(356.4732, device='cuda:0')
epoch: 21 test_true_pfm: 2743.019253824484 sim_pfm: 162.7077861308741
episode: 84 training return: tensor(336.3377, device='cuda:0')
episode: 85 training return: tensor(186.6085, device='cuda:0')
episode: 86 training return: tensor(317.2752, device='cuda:0')
episode: 87 training return: tensor(221.6159, device='cuda:0')
epoch: 22 test_true_pfm: 2538.5560802510486 sim_pfm: -83.65645434102044
episode: 88 training return: tensor(307.3474, device='cuda:0')
episode: 89 training return: tensor(-63.7890, device='cuda:0')
episode: 90 training return: tensor(-250.2698, device='cuda:0')
episode: 91 training return: tensor(305.9710, device='cuda:0')
epoch: 23 test_true_pfm: 2454.955791725646 sim_pfm: -57.64888831389059
episode: 92 training return: tensor(314.9041, device='cuda:0')
episode: 93 training return: tensor(-387.6107, device='cuda:0')
episode: 94 training return: tensor(-391.9650, device='cuda:0')
episode: 95 training return: tensor(0.3989, device='cuda:0')
epoch: 24 test_true_pfm: 3054.4113624370134 sim_pfm: -157.7092876698589
episode: 96 training return: tensor(257.9293, device='cuda:0')
episode: 97 training return: tensor(212.6714, device='cuda:0')
episode: 98 training return: tensor(321.4627, device='cuda:0')
episode: 99 training return: tensor(68.8788, device='cuda:0')
epoch: 25 test_true_pfm: 2777.7963133875965 sim_pfm: 105.29053673730232
episode: 100 training return: tensor(247.3335, device='cuda:0')
episode: 101 training return: tensor(-395.3636, device='cuda:0')
episode: 102 training return: tensor(330.3962, device='cuda:0')
episode: 103 training return: tensor(252.8286, device='cuda:0')
epoch: 26 test_true_pfm: 3550.280823309702 sim_pfm: 237.66332247356573
episode: 104 training return: tensor(334.6022, device='cuda:0')
episode: 105 training return: tensor(214.3766, device='cuda:0')
episode: 106 training return: tensor(330.0637, device='cuda:0')
episode: 107 training return: tensor(360.2535, device='cuda:0')
epoch: 27 test_true_pfm: 3422.65036673037 sim_pfm: 101.94352064836615
episode: 108 training return: tensor(253.7036, device='cuda:0')
episode: 109 training return: tensor(238.0935, device='cuda:0')
episode: 110 training return: tensor(392.9183, device='cuda:0')
episode: 111 training return: tensor(122.8139, device='cuda:0')
epoch: 28 test_true_pfm: 3458.820775706477 sim_pfm: 281.02802217693534
episode: 112 training return: tensor(-234.8735, device='cuda:0')
episode: 113 training return: tensor(-263.9620, device='cuda:0')
episode: 114 training return: tensor(307.4614, device='cuda:0')
episode: 115 training return: tensor(307.3581, device='cuda:0')
epoch: 29 test_true_pfm: 3505.3408145184817 sim_pfm: 112.39717476900357
episode: 116 training return: tensor(-30.6318, device='cuda:0')
episode: 117 training return: tensor(236.2195, device='cuda:0')
episode: 118 training return: tensor(-188.9854, device='cuda:0')
episode: 119 training return: tensor(165.2651, device='cuda:0')
epoch: 30 test_true_pfm: 3488.9188834680867 sim_pfm: 7.701114466258635
episode: 120 training return: tensor(162.4772, device='cuda:0')
episode: 121 training return: tensor(298.7343, device='cuda:0')
episode: 122 training return: tensor(339.7370, device='cuda:0')
episode: 123 training return: tensor(188.0237, device='cuda:0')
epoch: 31 test_true_pfm: 3248.053703475241 sim_pfm: 204.15028545679525
episode: 124 training return: tensor(248.0487, device='cuda:0')
episode: 125 training return: tensor(380.6685, device='cuda:0')
episode: 126 training return: tensor(246.5674, device='cuda:0')
episode: 127 training return: tensor(-0.6120, device='cuda:0')
epoch: 32 test_true_pfm: 3460.757967476813 sim_pfm: 224.96924704080448
episode: 128 training return: tensor(177.4150, device='cuda:0')
episode: 129 training return: tensor(335.1153, device='cuda:0')
episode: 130 training return: tensor(-73.0182, device='cuda:0')
episode: 131 training return: tensor(288.5740, device='cuda:0')
epoch: 33 test_true_pfm: 3476.2633644756893 sim_pfm: 199.20990660899164
episode: 132 training return: tensor(215.4276, device='cuda:0')
episode: 133 training return: tensor(340.7688, device='cuda:0')
episode: 134 training return: tensor(-222.6548, device='cuda:0')
episode: 135 training return: tensor(-239.9360, device='cuda:0')
epoch: 34 test_true_pfm: 3267.041617226143 sim_pfm: 188.21900521645634
episode: 136 training return: tensor(345.0469, device='cuda:0')
episode: 137 training return: tensor(261.9978, device='cuda:0')
episode: 138 training return: tensor(337.1758, device='cuda:0')
episode: 139 training return: tensor(-188.2602, device='cuda:0')
epoch: 35 test_true_pfm: 3569.142875648217 sim_pfm: 224.56113825181578
episode: 140 training return: tensor(-364.4925, device='cuda:0')
episode: 141 training return: tensor(43.4757, device='cuda:0')
episode: 142 training return: tensor(377.2446, device='cuda:0')
episode: 143 training return: tensor(289.2005, device='cuda:0')
epoch: 36 test_true_pfm: 3455.1010069348376 sim_pfm: 343.6022480404936
episode: 144 training return: tensor(251.0442, device='cuda:0')
episode: 145 training return: tensor(342.9987, device='cuda:0')
episode: 146 training return: tensor(309.4082, device='cuda:0')
episode: 147 training return: tensor(276.3405, device='cuda:0')
epoch: 37 test_true_pfm: 3451.2039514948538 sim_pfm: 363.8683821215721
episode: 148 training return: tensor(295.3299, device='cuda:0')
episode: 149 training return: tensor(139.9815, device='cuda:0')
episode: 150 training return: tensor(140.6811, device='cuda:0')
episode: 151 training return: tensor(267.3714, device='cuda:0')
epoch: 38 test_true_pfm: 3601.308429241326 sim_pfm: 341.72935002025525
episode: 152 training return: tensor(307.6046, device='cuda:0')
episode: 153 training return: tensor(355.9915, device='cuda:0')
episode: 154 training return: tensor(259.3237, device='cuda:0')
episode: 155 training return: tensor(367.4313, device='cuda:0')
epoch: 39 test_true_pfm: 3606.9696877553374 sim_pfm: 215.83616617150255
episode: 156 training return: tensor(367.8056, device='cuda:0')
episode: 157 training return: tensor(249.4869, device='cuda:0')
episode: 158 training return: tensor(-120.8753, device='cuda:0')
episode: 159 training return: tensor(344.2103, device='cuda:0')
epoch: 40 test_true_pfm: 3274.970215701982 sim_pfm: 367.7996886932524
episode: 160 training return: tensor(94.8569, device='cuda:0')
episode: 161 training return: tensor(290.8018, device='cuda:0')
episode: 162 training return: tensor(-158.1065, device='cuda:0')
episode: 163 training return: tensor(213.6646, device='cuda:0')
epoch: 41 test_true_pfm: 3106.5023061597226 sim_pfm: 415.9119632435807
episode: 164 training return: tensor(331.9427, device='cuda:0')
episode: 165 training return: tensor(343.2311, device='cuda:0')
episode: 166 training return: tensor(234.7005, device='cuda:0')
episode: 167 training return: tensor(237.9569, device='cuda:0')
epoch: 42 test_true_pfm: 3190.048876868641 sim_pfm: 282.27500977277913
episode: 168 training return: tensor(320.0664, device='cuda:0')
episode: 169 training return: tensor(226.1426, device='cuda:0')
episode: 170 training return: tensor(210.5804, device='cuda:0')
episode: 171 training return: tensor(284.9642, device='cuda:0')
epoch: 43 test_true_pfm: 3355.7153823483336 sim_pfm: 376.1692187510101
episode: 172 training return: tensor(-290.9250, device='cuda:0')
episode: 173 training return: tensor(0.8788, device='cuda:0')
episode: 174 training return: tensor(306.9628, device='cuda:0')
episode: 175 training return: tensor(373.2242, device='cuda:0')
epoch: 44 test_true_pfm: 3576.432114415197 sim_pfm: 283.64134397274273
episode: 176 training return: tensor(306.1949, device='cuda:0')
episode: 177 training return: tensor(317.4922, device='cuda:0')
episode: 178 training return: tensor(310.2362, device='cuda:0')
episode: 179 training return: tensor(248.8566, device='cuda:0')
epoch: 45 test_true_pfm: 3587.537434110943 sim_pfm: 393.67877666221466
episode: 180 training return: tensor(315.6981, device='cuda:0')
episode: 181 training return: tensor(338.0734, device='cuda:0')
episode: 182 training return: tensor(-252.8306, device='cuda:0')
episode: 183 training return: tensor(260.7272, device='cuda:0')
epoch: 46 test_true_pfm: 3291.131084125291 sim_pfm: 357.95046801067656
episode: 184 training return: tensor(309.9483, device='cuda:0')
episode: 185 training return: tensor(412.6740, device='cuda:0')
episode: 186 training return: tensor(-63.9575, device='cuda:0')
episode: 187 training return: tensor(338.6645, device='cuda:0')
epoch: 47 test_true_pfm: 3506.9475062880388 sim_pfm: 302.9622042333552
episode: 188 training return: tensor(307.6711, device='cuda:0')
episode: 189 training return: tensor(68.1792, device='cuda:0')
episode: 190 training return: tensor(64.8021, device='cuda:0')
episode: 191 training return: tensor(381.2746, device='cuda:0')
epoch: 48 test_true_pfm: 3430.4436819095918 sim_pfm: 367.77413442493224
episode: 192 training return: tensor(314.8532, device='cuda:0')
episode: 193 training return: tensor(287.2473, device='cuda:0')
episode: 194 training return: tensor(319.5789, device='cuda:0')
episode: 195 training return: tensor(-317.5614, device='cuda:0')
epoch: 49 test_true_pfm: 3538.800568700015 sim_pfm: 355.5651951842592
episode: 196 training return: tensor(293.4028, device='cuda:0')
episode: 197 training return: tensor(-143.1008, device='cuda:0')
episode: 198 training return: tensor(363.5667, device='cuda:0')
episode: 199 training return: tensor(-41.4126, device='cuda:0')
epoch: 50 test_true_pfm: 3489.9270925605465 sim_pfm: 361.685752634871
episode: 200 training return: tensor(266.4390, device='cuda:0')
episode: 201 training return: tensor(375.0613, device='cuda:0')
episode: 202 training return: tensor(339.8857, device='cuda:0')
episode: 203 training return: tensor(297.5692, device='cuda:0')
epoch: 51 test_true_pfm: 3042.1056534102395 sim_pfm: 124.91215613170061
episode: 204 training return: tensor(300.3723, device='cuda:0')
episode: 205 training return: tensor(264.2331, device='cuda:0')
episode: 206 training return: tensor(336.2511, device='cuda:0')
episode: 207 training return: tensor(296.4106, device='cuda:0')
epoch: 52 test_true_pfm: 3473.2390299452086 sim_pfm: 329.66559943696484
episode: 208 training return: tensor(283.9590, device='cuda:0')
episode: 209 training return: tensor(325.3108, device='cuda:0')
episode: 210 training return: tensor(366.7275, device='cuda:0')
episode: 211 training return: tensor(396.1069, device='cuda:0')
epoch: 53 test_true_pfm: 3495.1260586595013 sim_pfm: 262.0011883341358
episode: 212 training return: tensor(-221.8105, device='cuda:0')
episode: 213 training return: tensor(322.2320, device='cuda:0')
episode: 214 training return: tensor(216.5566, device='cuda:0')
episode: 215 training return: tensor(379.2187, device='cuda:0')
epoch: 54 test_true_pfm: 3527.8190286446516 sim_pfm: 349.2997041430014
episode: 216 training return: tensor(411.9906, device='cuda:0')
episode: 217 training return: tensor(304.3881, device='cuda:0')
episode: 218 training return: tensor(170.2985, device='cuda:0')
episode: 219 training return: tensor(177.7383, device='cuda:0')
epoch: 55 test_true_pfm: 3435.5604515383966 sim_pfm: 380.8264489793801
episode: 220 training return: tensor(324.0853, device='cuda:0')
episode: 221 training return: tensor(190.1056, device='cuda:0')
episode: 222 training return: tensor(281.7635, device='cuda:0')
episode: 223 training return: tensor(345.1190, device='cuda:0')
epoch: 56 test_true_pfm: 3576.8110490526687 sim_pfm: 249.72680141303377
episode: 224 training return: tensor(201.8095, device='cuda:0')
episode: 225 training return: tensor(209.9503, device='cuda:0')
episode: 226 training return: tensor(138.5034, device='cuda:0')
episode: 227 training return: tensor(249.6120, device='cuda:0')
epoch: 57 test_true_pfm: 3257.2896612033387 sim_pfm: 374.647668063757
episode: 228 training return: tensor(284.5093, device='cuda:0')
episode: 229 training return: tensor(295.0136, device='cuda:0')
episode: 230 training return: tensor(294.8125, device='cuda:0')
episode: 231 training return: tensor(-179.0612, device='cuda:0')
epoch: 58 test_true_pfm: 3549.9994755067078 sim_pfm: 268.42296018109
episode: 232 training return: tensor(338.8965, device='cuda:0')
episode: 233 training return: tensor(313.0458, device='cuda:0')
episode: 234 training return: tensor(183.8302, device='cuda:0')
episode: 235 training return: tensor(278.4881, device='cuda:0')
epoch: 59 test_true_pfm: 3078.6126821485 sim_pfm: 324.06203543394804
episode: 236 training return: tensor(377.4167, device='cuda:0')
episode: 237 training return: tensor(331.6719, device='cuda:0')
episode: 238 training return: tensor(196.4597, device='cuda:0')
episode: 239 training return: tensor(305.4539, device='cuda:0')
epoch: 60 test_true_pfm: 3547.0522983766514 sim_pfm: 370.0351110788276
episode: 240 training return: tensor(312.6353, device='cuda:0')
episode: 241 training return: tensor(370.3336, device='cuda:0')
episode: 242 training return: tensor(364.0011, device='cuda:0')
episode: 243 training return: tensor(279.3761, device='cuda:0')
epoch: 61 test_true_pfm: 3644.143401023473 sim_pfm: 418.4282446503639
episode: 244 training return: tensor(245.7315, device='cuda:0')
episode: 245 training return: tensor(267.9657, device='cuda:0')
episode: 246 training return: tensor(300.2640, device='cuda:0')
episode: 247 training return: tensor(294.3273, device='cuda:0')
epoch: 62 test_true_pfm: 3585.3765389765954 sim_pfm: 377.66222782235127
episode: 248 training return: tensor(254.0905, device='cuda:0')
episode: 249 training return: tensor(400.9753, device='cuda:0')
episode: 250 training return: tensor(40.5116, device='cuda:0')
episode: 251 training return: tensor(435.7326, device='cuda:0')
epoch: 63 test_true_pfm: 3565.3469067038654 sim_pfm: 342.9895795303358
episode: 252 training return: tensor(84.5326, device='cuda:0')
episode: 253 training return: tensor(289.3658, device='cuda:0')
episode: 254 training return: tensor(287.4606, device='cuda:0')
episode: 255 training return: tensor(371.9630, device='cuda:0')
epoch: 64 test_true_pfm: 3555.2673025869153 sim_pfm: 412.3868929012485
episode: 256 training return: tensor(361.3833, device='cuda:0')
episode: 257 training return: tensor(310.2911, device='cuda:0')
episode: 258 training return: tensor(371.8455, device='cuda:0')
episode: 259 training return: tensor(330.7519, device='cuda:0')
epoch: 65 test_true_pfm: 3080.104782630817 sim_pfm: 367.96843096146284
episode: 260 training return: tensor(199.9002, device='cuda:0')
episode: 261 training return: tensor(370.9065, device='cuda:0')
episode: 262 training return: tensor(344.5583, device='cuda:0')
episode: 263 training return: tensor(355.7019, device='cuda:0')
epoch: 66 test_true_pfm: 3507.5532203363546 sim_pfm: 135.79511929190872
episode: 264 training return: tensor(384.9793, device='cuda:0')
episode: 265 training return: tensor(343.1786, device='cuda:0')
episode: 266 training return: tensor(361.7231, device='cuda:0')
episode: 267 training return: tensor(359.5871, device='cuda:0')
epoch: 67 test_true_pfm: 3111.3975232527278 sim_pfm: 385.24807630308595
episode: 268 training return: tensor(321.8206, device='cuda:0')
episode: 269 training return: tensor(264.1849, device='cuda:0')
episode: 270 training return: tensor(-197.4817, device='cuda:0')
episode: 271 training return: tensor(415.1179, device='cuda:0')
epoch: 68 test_true_pfm: 3564.042912665893 sim_pfm: 196.65217074699467
episode: 272 training return: tensor(388.4922, device='cuda:0')
episode: 273 training return: tensor(13.6029, device='cuda:0')
episode: 274 training return: tensor(316.4149, device='cuda:0')
episode: 275 training return: tensor(276.5606, device='cuda:0')
epoch: 69 test_true_pfm: 3573.796890593652 sim_pfm: 321.3597370343244
episode: 276 training return: tensor(304.4294, device='cuda:0')
episode: 277 training return: tensor(234.4231, device='cuda:0')
episode: 278 training return: tensor(372.3331, device='cuda:0')
episode: 279 training return: tensor(420.4521, device='cuda:0')
epoch: 70 test_true_pfm: 3611.518866550532 sim_pfm: 219.84857440671962
episode: 280 training return: tensor(382.5018, device='cuda:0')
episode: 281 training return: tensor(285.9792, device='cuda:0')
episode: 282 training return: tensor(312.5919, device='cuda:0')
episode: 283 training return: tensor(294.3049, device='cuda:0')
epoch: 71 test_true_pfm: 3511.7624916488116 sim_pfm: 310.34970492872526
episode: 284 training return: tensor(318.0260, device='cuda:0')
episode: 285 training return: tensor(-222.2732, device='cuda:0')
episode: 286 training return: tensor(414.4485, device='cuda:0')
episode: 287 training return: tensor(58.8735, device='cuda:0')
epoch: 72 test_true_pfm: 2033.3791477106304 sim_pfm: 191.2693143541304
episode: 288 training return: tensor(378.3815, device='cuda:0')
episode: 289 training return: tensor(362.7957, device='cuda:0')
episode: 290 training return: tensor(-112.8050, device='cuda:0')
episode: 291 training return: tensor(109.0334, device='cuda:0')
epoch: 73 test_true_pfm: 3504.044262695636 sim_pfm: 390.67934739308356
episode: 292 training return: tensor(339.9428, device='cuda:0')
episode: 293 training return: tensor(351.7120, device='cuda:0')
episode: 294 training return: tensor(253.3739, device='cuda:0')
episode: 295 training return: tensor(236.6366, device='cuda:0')
epoch: 74 test_true_pfm: 3553.195867157951 sim_pfm: 383.28193166828714
episode: 296 training return: tensor(330.4053, device='cuda:0')
episode: 297 training return: tensor(262.6660, device='cuda:0')
episode: 298 training return: tensor(329.4394, device='cuda:0')
episode: 299 training return: tensor(14.9927, device='cuda:0')
epoch: 75 test_true_pfm: 3494.7509105918657 sim_pfm: 180.6141867119004
episode: 300 training return: tensor(330.0390, device='cuda:0')
episode: 301 training return: tensor(209.6089, device='cuda:0')
episode: 302 training return: tensor(349.8444, device='cuda:0')
episode: 303 training return: tensor(265.9043, device='cuda:0')
epoch: 76 test_true_pfm: 3153.495122065317 sim_pfm: 422.80676645585726
episode: 304 training return: tensor(-84.7963, device='cuda:0')
episode: 305 training return: tensor(403.8121, device='cuda:0')
episode: 306 training return: tensor(268.6355, device='cuda:0')
episode: 307 training return: tensor(387.2901, device='cuda:0')
epoch: 77 test_true_pfm: 3509.9084905291334 sim_pfm: 361.5798647935929
episode: 308 training return: tensor(387.4236, device='cuda:0')
episode: 309 training return: tensor(386.7872, device='cuda:0')
episode: 310 training return: tensor(348.4050, device='cuda:0')
episode: 311 training return: tensor(255.9057, device='cuda:0')
epoch: 78 test_true_pfm: 3307.8496386150987 sim_pfm: 348.45942307065707
episode: 312 training return: tensor(412.7695, device='cuda:0')
episode: 313 training return: tensor(265.2238, device='cuda:0')
episode: 314 training return: tensor(304.7189, device='cuda:0')
episode: 315 training return: tensor(315.2577, device='cuda:0')
epoch: 79 test_true_pfm: 3534.4455297588247 sim_pfm: 223.41635493013504
episode: 316 training return: tensor(285.3708, device='cuda:0')
episode: 317 training return: tensor(348.3111, device='cuda:0')
episode: 318 training return: tensor(246.7487, device='cuda:0')
episode: 319 training return: tensor(222.9480, device='cuda:0')
epoch: 80 test_true_pfm: 3550.0184070378687 sim_pfm: 358.51355726548354
episode: 320 training return: tensor(358.8422, device='cuda:0')
episode: 321 training return: tensor(117.3993, device='cuda:0')
episode: 322 training return: tensor(285.7271, device='cuda:0')
episode: 323 training return: tensor(322.6161, device='cuda:0')
epoch: 81 test_true_pfm: 3526.1665809703322 sim_pfm: 350.3542635253495
episode: 324 training return: tensor(234.7904, device='cuda:0')
episode: 325 training return: tensor(468.2145, device='cuda:0')
episode: 326 training return: tensor(428.9795, device='cuda:0')
episode: 327 training return: tensor(269.2003, device='cuda:0')
epoch: 82 test_true_pfm: 3535.0096141068257 sim_pfm: 318.9032811239401
episode: 328 training return: tensor(178.0021, device='cuda:0')
episode: 329 training return: tensor(365.6339, device='cuda:0')
episode: 330 training return: tensor(223.1019, device='cuda:0')
episode: 331 training return: tensor(309.1265, device='cuda:0')
epoch: 83 test_true_pfm: 3517.410492313737 sim_pfm: 388.9329671505063
episode: 332 training return: tensor(302.2708, device='cuda:0')
episode: 333 training return: tensor(205.8051, device='cuda:0')
episode: 334 training return: tensor(239.7243, device='cuda:0')
episode: 335 training return: tensor(179.5758, device='cuda:0')
epoch: 84 test_true_pfm: 3501.506457376865 sim_pfm: 416.6858464938705
episode: 336 training return: tensor(368.1197, device='cuda:0')
episode: 337 training return: tensor(58.2733, device='cuda:0')
episode: 338 training return: tensor(369.2370, device='cuda:0')
episode: 339 training return: tensor(296.5896, device='cuda:0')
epoch: 85 test_true_pfm: 3550.908789121145 sim_pfm: 390.5351729871084
episode: 340 training return: tensor(380.3656, device='cuda:0')
episode: 341 training return: tensor(-177.8683, device='cuda:0')
episode: 342 training return: tensor(-6.3078, device='cuda:0')
episode: 343 training return: tensor(370.6434, device='cuda:0')
epoch: 86 test_true_pfm: 3546.8101386815265 sim_pfm: 224.56415080517763
episode: 344 training return: tensor(354.4666, device='cuda:0')
episode: 345 training return: tensor(355.6442, device='cuda:0')
episode: 346 training return: tensor(383.3526, device='cuda:0')
episode: 347 training return: tensor(310.2415, device='cuda:0')
epoch: 87 test_true_pfm: 3686.250360018467 sim_pfm: 333.2721927089151
episode: 348 training return: tensor(372.3515, device='cuda:0')
episode: 349 training return: tensor(-40.7585, device='cuda:0')
episode: 350 training return: tensor(269.5703, device='cuda:0')
episode: 351 training return: tensor(383.4253, device='cuda:0')
epoch: 88 test_true_pfm: 3186.5427052832542 sim_pfm: 354.7122514460546
episode: 352 training return: tensor(310.9847, device='cuda:0')
episode: 353 training return: tensor(350.3172, device='cuda:0')
episode: 354 training return: tensor(246.4513, device='cuda:0')
episode: 355 training return: tensor(279.0016, device='cuda:0')
epoch: 89 test_true_pfm: 2632.9057454819517 sim_pfm: 172.3064019891899
episode: 356 training return: tensor(398.7523, device='cuda:0')
episode: 357 training return: tensor(210.7951, device='cuda:0')
episode: 358 training return: tensor(318.9125, device='cuda:0')
episode: 359 training return: tensor(425.4843, device='cuda:0')
epoch: 90 test_true_pfm: 3349.3086358541022 sim_pfm: 439.04677915089997
episode: 360 training return: tensor(459.5268, device='cuda:0')
episode: 361 training return: tensor(385.4242, device='cuda:0')
episode: 362 training return: tensor(323.7011, device='cuda:0')
episode: 363 training return: tensor(328.0341, device='cuda:0')
epoch: 91 test_true_pfm: 3531.2595321718422 sim_pfm: 384.0019446570271
episode: 364 training return: tensor(365.9633, device='cuda:0')
episode: 365 training return: tensor(419.4168, device='cuda:0')
episode: 366 training return: tensor(384.0444, device='cuda:0')
episode: 367 training return: tensor(311.3071, device='cuda:0')
epoch: 92 test_true_pfm: 3579.2743188096424 sim_pfm: 225.41181234000638
episode: 368 training return: tensor(209.9727, device='cuda:0')
episode: 369 training return: tensor(408.9400, device='cuda:0')
episode: 370 training return: tensor(-47.8214, device='cuda:0')
episode: 371 training return: tensor(377.1638, device='cuda:0')
epoch: 93 test_true_pfm: 3363.336673108702 sim_pfm: 403.24289127370383
episode: 372 training return: tensor(205.7301, device='cuda:0')
episode: 373 training return: tensor(393.5850, device='cuda:0')
episode: 374 training return: tensor(294.2983, device='cuda:0')
episode: 375 training return: tensor(402.4863, device='cuda:0')
epoch: 94 test_true_pfm: 2984.9379278948413 sim_pfm: 399.525571264443
episode: 376 training return: tensor(365.7755, device='cuda:0')
episode: 377 training return: tensor(-61.0519, device='cuda:0')
episode: 378 training return: tensor(360.7498, device='cuda:0')
episode: 379 training return: tensor(313.4581, device='cuda:0')
epoch: 95 test_true_pfm: 3580.782496122351 sim_pfm: 279.2660006734659
episode: 380 training return: tensor(407.2577, device='cuda:0')
episode: 381 training return: tensor(368.7306, device='cuda:0')
episode: 382 training return: tensor(290.4261, device='cuda:0')
episode: 383 training return: tensor(395.2350, device='cuda:0')
epoch: 96 test_true_pfm: 3611.959605893406 sim_pfm: 295.9189019316885
episode: 384 training return: tensor(-104.3318, device='cuda:0')
episode: 385 training return: tensor(306.8878, device='cuda:0')
episode: 386 training return: tensor(387.3203, device='cuda:0')
episode: 387 training return: tensor(314.8566, device='cuda:0')
epoch: 97 test_true_pfm: 3516.4623936652574 sim_pfm: 357.1478542157759
episode: 388 training return: tensor(370.8603, device='cuda:0')
episode: 389 training return: tensor(363.5547, device='cuda:0')
episode: 390 training return: tensor(76.3167, device='cuda:0')
episode: 391 training return: tensor(327.3898, device='cuda:0')
epoch: 98 test_true_pfm: 3585.109472601951 sim_pfm: 409.6093277596713
episode: 392 training return: tensor(264.7334, device='cuda:0')
episode: 393 training return: tensor(293.9918, device='cuda:0')
episode: 394 training return: tensor(242.2050, device='cuda:0')
episode: 395 training return: tensor(428.4507, device='cuda:0')
epoch: 99 test_true_pfm: 3625.940310077696 sim_pfm: 388.3265084189479
episode: 396 training return: tensor(423.4872, device='cuda:0')
episode: 397 training return: tensor(340.8122, device='cuda:0')
episode: 398 training return: tensor(360.7420, device='cuda:0')
episode: 399 training return: tensor(297.1199, device='cuda:0')
epoch: 100 test_true_pfm: 3294.371031525428 sim_pfm: 285.9431074895547
episode: 400 training return: tensor(438.2722, device='cuda:0')
episode: 401 training return: tensor(239.3852, device='cuda:0')
episode: 402 training return: tensor(427.8538, device='cuda:0')
episode: 403 training return: tensor(320.1142, device='cuda:0')
epoch: 101 test_true_pfm: 3600.343445505303 sim_pfm: 419.30394125683233
episode: 404 training return: tensor(416.2726, device='cuda:0')
episode: 405 training return: tensor(351.6887, device='cuda:0')
episode: 406 training return: tensor(370.2207, device='cuda:0')
episode: 407 training return: tensor(389.0338, device='cuda:0')
epoch: 102 test_true_pfm: 3616.7121019836945 sim_pfm: 391.9130455817406
episode: 408 training return: tensor(232.1535, device='cuda:0')
episode: 409 training return: tensor(310.0081, device='cuda:0')
episode: 410 training return: tensor(332.3360, device='cuda:0')
episode: 411 training return: tensor(310.0057, device='cuda:0')
epoch: 103 test_true_pfm: 2501.252500063343 sim_pfm: 403.76673484708107
episode: 412 training return: tensor(283.1035, device='cuda:0')
episode: 413 training return: tensor(286.1431, device='cuda:0')
episode: 414 training return: tensor(379.9872, device='cuda:0')
episode: 415 training return: tensor(161.2334, device='cuda:0')
epoch: 104 test_true_pfm: 3608.604972268642 sim_pfm: 367.5801708834285
episode: 416 training return: tensor(361.8122, device='cuda:0')
episode: 417 training return: tensor(388.7877, device='cuda:0')
episode: 418 training return: tensor(174.4282, device='cuda:0')
episode: 419 training return: tensor(266.7249, device='cuda:0')
epoch: 105 test_true_pfm: 2933.1381758928005 sim_pfm: 93.17497135727899
episode: 420 training return: tensor(105.3069, device='cuda:0')
episode: 421 training return: tensor(291.9465, device='cuda:0')
episode: 422 training return: tensor(333.3260, device='cuda:0')
episode: 423 training return: tensor(241.9245, device='cuda:0')
epoch: 106 test_true_pfm: 2959.352023489219 sim_pfm: 181.06703134637792
episode: 424 training return: tensor(-172.3770, device='cuda:0')
episode: 425 training return: tensor(76.4690, device='cuda:0')
episode: 426 training return: tensor(370.8277, device='cuda:0')
episode: 427 training return: tensor(171.4191, device='cuda:0')
epoch: 107 test_true_pfm: 3637.3712628043795 sim_pfm: 262.09082343821257
episode: 428 training return: tensor(-20.2300, device='cuda:0')
episode: 429 training return: tensor(301.8586, device='cuda:0')
episode: 430 training return: tensor(318.0887, device='cuda:0')
episode: 431 training return: tensor(333.0601, device='cuda:0')
epoch: 108 test_true_pfm: 3617.1778758002583 sim_pfm: 437.2199410408696
episode: 432 training return: tensor(448.2354, device='cuda:0')
episode: 433 training return: tensor(323.4144, device='cuda:0')
episode: 434 training return: tensor(380.2896, device='cuda:0')
episode: 435 training return: tensor(303.2304, device='cuda:0')
epoch: 109 test_true_pfm: 3632.822561897865 sim_pfm: 228.13141923826575
episode: 436 training return: tensor(400.6585, device='cuda:0')
episode: 437 training return: tensor(414.3910, device='cuda:0')
episode: 438 training return: tensor(365.5463, device='cuda:0')
episode: 439 training return: tensor(345.3315, device='cuda:0')
epoch: 110 test_true_pfm: 3597.9605181834922 sim_pfm: 148.02308739201786
episode: 440 training return: tensor(408.1292, device='cuda:0')
episode: 441 training return: tensor(385.2478, device='cuda:0')
episode: 442 training return: tensor(0.6541, device='cuda:0')
episode: 443 training return: tensor(-27.1797, device='cuda:0')
epoch: 111 test_true_pfm: 3558.830742374959 sim_pfm: 313.75422591531725
episode: 444 training return: tensor(247.9014, device='cuda:0')
episode: 445 training return: tensor(337.6739, device='cuda:0')
episode: 446 training return: tensor(358.6135, device='cuda:0')
episode: 447 training return: tensor(360.5268, device='cuda:0')
epoch: 112 test_true_pfm: 3475.025224446516 sim_pfm: 150.86061992147006
episode: 448 training return: tensor(377.3231, device='cuda:0')
episode: 449 training return: tensor(300.3373, device='cuda:0')
episode: 450 training return: tensor(-49.0631, device='cuda:0')
episode: 451 training return: tensor(301.4004, device='cuda:0')
epoch: 113 test_true_pfm: 3458.716857430118 sim_pfm: 122.03680751167121
episode: 452 training return: tensor(-27.3681, device='cuda:0')
episode: 453 training return: tensor(44.3377, device='cuda:0')
episode: 454 training return: tensor(459.9925, device='cuda:0')
episode: 455 training return: tensor(411.5144, device='cuda:0')
epoch: 114 test_true_pfm: 3544.2018382976216 sim_pfm: 241.16549550378113
episode: 456 training return: tensor(416.0275, device='cuda:0')
episode: 457 training return: tensor(276.7577, device='cuda:0')
episode: 458 training return: tensor(335.0430, device='cuda:0')
episode: 459 training return: tensor(313.3536, device='cuda:0')
epoch: 115 test_true_pfm: 3536.242770665063 sim_pfm: 435.4810125011136
episode: 460 training return: tensor(399.7325, device='cuda:0')
episode: 461 training return: tensor(343.1702, device='cuda:0')
episode: 462 training return: tensor(250.1777, device='cuda:0')
episode: 463 training return: tensor(342.7787, device='cuda:0')
epoch: 116 test_true_pfm: 3567.5493470529677 sim_pfm: 368.41653657438775
episode: 464 training return: tensor(387.1592, device='cuda:0')
episode: 465 training return: tensor(366.1073, device='cuda:0')
episode: 466 training return: tensor(367.9474, device='cuda:0')
episode: 467 training return: tensor(331.2458, device='cuda:0')
epoch: 117 test_true_pfm: 3139.9795300459627 sim_pfm: 401.10615618286346
episode: 468 training return: tensor(329.6886, device='cuda:0')
episode: 469 training return: tensor(326.2592, device='cuda:0')
episode: 470 training return: tensor(299.5326, device='cuda:0')
episode: 471 training return: tensor(-92.9108, device='cuda:0')
epoch: 118 test_true_pfm: 3654.331861361244 sim_pfm: 397.05453846951906
episode: 472 training return: tensor(431.8329, device='cuda:0')
episode: 473 training return: tensor(353.0095, device='cuda:0')
episode: 474 training return: tensor(340.0370, device='cuda:0')
episode: 475 training return: tensor(412.1891, device='cuda:0')
epoch: 119 test_true_pfm: 3543.4051440290236 sim_pfm: 394.8213883684172
episode: 476 training return: tensor(458.4216, device='cuda:0')
episode: 477 training return: tensor(230.0996, device='cuda:0')
episode: 478 training return: tensor(-195.9975, device='cuda:0')
episode: 479 training return: tensor(280.1049, device='cuda:0')
epoch: 120 test_true_pfm: 3568.912513882106 sim_pfm: 410.3755769197596
episode: 480 training return: tensor(393.8731, device='cuda:0')
episode: 481 training return: tensor(401.4891, device='cuda:0')
episode: 482 training return: tensor(408.8710, device='cuda:0')
episode: 483 training return: tensor(361.2187, device='cuda:0')
epoch: 121 test_true_pfm: 3681.6750011629506 sim_pfm: 436.9694339571676
episode: 484 training return: tensor(399.1920, device='cuda:0')
episode: 485 training return: tensor(-28.8029, device='cuda:0')
episode: 486 training return: tensor(390.2173, device='cuda:0')
episode: 487 training return: tensor(365.9723, device='cuda:0')
epoch: 122 test_true_pfm: 3196.5265439977043 sim_pfm: -8.765321718063205
episode: 488 training return: tensor(325.0348, device='cuda:0')
episode: 489 training return: tensor(93.9694, device='cuda:0')
episode: 490 training return: tensor(348.6066, device='cuda:0')
episode: 491 training return: tensor(-124.5402, device='cuda:0')
epoch: 123 test_true_pfm: 3616.6959915586376 sim_pfm: 410.53282104500494
episode: 492 training return: tensor(328.0552, device='cuda:0')
episode: 493 training return: tensor(324.4079, device='cuda:0')
episode: 494 training return: tensor(408.0379, device='cuda:0')
episode: 495 training return: tensor(291.9840, device='cuda:0')
epoch: 124 test_true_pfm: 3560.355471105233 sim_pfm: 441.8480091262997
episode: 496 training return: tensor(333.1513, device='cuda:0')
episode: 497 training return: tensor(333.1175, device='cuda:0')
episode: 498 training return: tensor(289.7146, device='cuda:0')
episode: 499 training return: tensor(25.9434, device='cuda:0')
epoch: 125 test_true_pfm: 3601.914808308054 sim_pfm: 291.1862018002236
episode: 500 training return: tensor(316.6874, device='cuda:0')
episode: 501 training return: tensor(372.7780, device='cuda:0')
episode: 502 training return: tensor(-130.4225, device='cuda:0')
episode: 503 training return: tensor(350.8724, device='cuda:0')
epoch: 126 test_true_pfm: 3046.4794722785596 sim_pfm: 349.12881152580184
episode: 504 training return: tensor(-265.5800, device='cuda:0')
episode: 505 training return: tensor(313.3727, device='cuda:0')
episode: 506 training return: tensor(413.0567, device='cuda:0')
episode: 507 training return: tensor(298.5893, device='cuda:0')
epoch: 127 test_true_pfm: 2836.06826246591 sim_pfm: 187.588222873533
episode: 508 training return: tensor(351.9357, device='cuda:0')
episode: 509 training return: tensor(281.6497, device='cuda:0')
episode: 510 training return: tensor(-128.4944, device='cuda:0')
episode: 511 training return: tensor(336.4135, device='cuda:0')
epoch: 128 test_true_pfm: 3504.4250788546037 sim_pfm: 325.1868651617551
episode: 512 training return: tensor(296.4473, device='cuda:0')
episode: 513 training return: tensor(311.9043, device='cuda:0')
episode: 514 training return: tensor(427.7640, device='cuda:0')
episode: 515 training return: tensor(278.0125, device='cuda:0')
epoch: 129 test_true_pfm: 3331.4357090359813 sim_pfm: 384.16598852123326
episode: 516 training return: tensor(423.4031, device='cuda:0')
episode: 517 training return: tensor(208.6622, device='cuda:0')
episode: 518 training return: tensor(341.8026, device='cuda:0')
episode: 519 training return: tensor(468.7260, device='cuda:0')
epoch: 130 test_true_pfm: 3525.0631751053984 sim_pfm: 467.60182050530176
episode: 520 training return: tensor(13.8336, device='cuda:0')
episode: 521 training return: tensor(429.9266, device='cuda:0')
episode: 522 training return: tensor(311.7848, device='cuda:0')
episode: 523 training return: tensor(410.4335, device='cuda:0')
epoch: 131 test_true_pfm: 3331.505702830098 sim_pfm: 190.31606558316466
episode: 524 training return: tensor(413.1870, device='cuda:0')
episode: 525 training return: tensor(360.0146, device='cuda:0')
episode: 526 training return: tensor(281.2775, device='cuda:0')
episode: 527 training return: tensor(333.9145, device='cuda:0')
epoch: 132 test_true_pfm: 3617.097128535934 sim_pfm: 368.17534139500157
episode: 528 training return: tensor(465.2987, device='cuda:0')
episode: 529 training return: tensor(349.7675, device='cuda:0')
episode: 530 training return: tensor(109.6883, device='cuda:0')
episode: 531 training return: tensor(362.5635, device='cuda:0')
epoch: 133 test_true_pfm: 3526.034307691208 sim_pfm: 270.4549860598054
episode: 532 training return: tensor(299.5889, device='cuda:0')
episode: 533 training return: tensor(417.8360, device='cuda:0')
episode: 534 training return: tensor(355.1559, device='cuda:0')
episode: 535 training return: tensor(268.7320, device='cuda:0')
epoch: 134 test_true_pfm: 3584.2884572813077 sim_pfm: 461.8821263630525
episode: 536 training return: tensor(485.8617, device='cuda:0')
episode: 537 training return: tensor(393.2829, device='cuda:0')
episode: 538 training return: tensor(465.0127, device='cuda:0')
episode: 539 training return: tensor(106.0121, device='cuda:0')
epoch: 135 test_true_pfm: 3620.4268661218534 sim_pfm: 387.4775138381713
episode: 540 training return: tensor(411.9074, device='cuda:0')
episode: 541 training return: tensor(407.6207, device='cuda:0')
episode: 542 training return: tensor(424.6230, device='cuda:0')
episode: 543 training return: tensor(341.3426, device='cuda:0')
epoch: 136 test_true_pfm: 3192.002138109988 sim_pfm: 376.181671160118
episode: 544 training return: tensor(316.0523, device='cuda:0')
episode: 545 training return: tensor(396.8931, device='cuda:0')
episode: 546 training return: tensor(343.1332, device='cuda:0')
episode: 547 training return: tensor(164.5776, device='cuda:0')
epoch: 137 test_true_pfm: 3565.6405585999114 sim_pfm: 364.2984437290773
episode: 548 training return: tensor(-247.4403, device='cuda:0')
episode: 549 training return: tensor(453.0634, device='cuda:0')
episode: 550 training return: tensor(339.5107, device='cuda:0')
episode: 551 training return: tensor(267.9980, device='cuda:0')
epoch: 138 test_true_pfm: 3511.5503512919313 sim_pfm: 250.0164400296441
episode: 552 training return: tensor(408.6566, device='cuda:0')
episode: 553 training return: tensor(114.2215, device='cuda:0')
episode: 554 training return: tensor(436.5421, device='cuda:0')
episode: 555 training return: tensor(323.1682, device='cuda:0')
epoch: 139 test_true_pfm: 3526.7754546842193 sim_pfm: 392.1982104193109
episode: 556 training return: tensor(389.3965, device='cuda:0')
episode: 557 training return: tensor(359.2395, device='cuda:0')
episode: 558 training return: tensor(227.4904, device='cuda:0')
episode: 559 training return: tensor(274.1362, device='cuda:0')
epoch: 140 test_true_pfm: 3582.614363383174 sim_pfm: 269.84791737247724
episode: 560 training return: tensor(256.1501, device='cuda:0')
episode: 561 training return: tensor(397.3216, device='cuda:0')
episode: 562 training return: tensor(291.6475, device='cuda:0')
episode: 563 training return: tensor(362.9381, device='cuda:0')
epoch: 141 test_true_pfm: 3410.50757674828 sim_pfm: 442.7098494239035
episode: 564 training return: tensor(49.2185, device='cuda:0')
episode: 565 training return: tensor(372.0230, device='cuda:0')
episode: 566 training return: tensor(346.2440, device='cuda:0')
episode: 567 training return: tensor(-343.8652, device='cuda:0')
epoch: 142 test_true_pfm: 3553.47055011822 sim_pfm: 403.2850369577257
episode: 568 training return: tensor(438.8688, device='cuda:0')
episode: 569 training return: tensor(213.0903, device='cuda:0')
episode: 570 training return: tensor(300.6702, device='cuda:0')
episode: 571 training return: tensor(362.3929, device='cuda:0')
epoch: 143 test_true_pfm: 3594.259381377103 sim_pfm: 166.7001889171855
episode: 572 training return: tensor(274.0158, device='cuda:0')
episode: 573 training return: tensor(360.9825, device='cuda:0')
episode: 574 training return: tensor(386.0066, device='cuda:0')
episode: 575 training return: tensor(-82.6590, device='cuda:0')
epoch: 144 test_true_pfm: 3530.2118964961414 sim_pfm: 395.6459436015478
episode: 576 training return: tensor(-28.3423, device='cuda:0')
episode: 577 training return: tensor(276.6090, device='cuda:0')
episode: 578 training return: tensor(414.2927, device='cuda:0')
episode: 579 training return: tensor(400.8303, device='cuda:0')
epoch: 145 test_true_pfm: 3629.003780292002 sim_pfm: 410.6359134546947
episode: 580 training return: tensor(371.3865, device='cuda:0')
episode: 581 training return: tensor(322.1792, device='cuda:0')
episode: 582 training return: tensor(256.0236, device='cuda:0')
episode: 583 training return: tensor(398.4486, device='cuda:0')
epoch: 146 test_true_pfm: 3580.4883685802265 sim_pfm: 380.969316873583
episode: 584 training return: tensor(403.1467, device='cuda:0')
episode: 585 training return: tensor(331.9307, device='cuda:0')
episode: 586 training return: tensor(272.6388, device='cuda:0')
episode: 587 training return: tensor(301.5522, device='cuda:0')
epoch: 147 test_true_pfm: 3613.3668486686297 sim_pfm: 413.79621381188434
episode: 588 training return: tensor(397.2210, device='cuda:0')
episode: 589 training return: tensor(340.3926, device='cuda:0')
episode: 590 training return: tensor(344.5399, device='cuda:0')
episode: 591 training return: tensor(-239.0366, device='cuda:0')
epoch: 148 test_true_pfm: 3282.634545662922 sim_pfm: 360.647758086406
episode: 592 training return: tensor(346.1314, device='cuda:0')
episode: 593 training return: tensor(309.0140, device='cuda:0')
episode: 594 training return: tensor(399.4418, device='cuda:0')
episode: 595 training return: tensor(193.0240, device='cuda:0')
epoch: 149 test_true_pfm: 3151.9459059110413 sim_pfm: 340.5783013588904
episode: 596 training return: tensor(379.8163, device='cuda:0')
episode: 597 training return: tensor(313.6593, device='cuda:0')
episode: 598 training return: tensor(366.7968, device='cuda:0')
episode: 599 training return: tensor(345.3178, device='cuda:0')
epoch: 150 test_true_pfm: 3621.3873565818008 sim_pfm: 360.7376154775072
