['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '1', '--data', '3000', '--sub']
epoch: 0 training_loss 0.27245371155440806 test_loss: 0.10518134832382202
epoch: 1 training_loss 0.14229659534990788 test_loss: 0.08884567022323608
epoch: 2 training_loss 0.11868008520454168 test_loss: 0.08645277619361877
epoch: 3 training_loss 0.11209568049758672 test_loss: 0.07584241628646851
epoch: 4 training_loss 0.11102693490684032 test_loss: 0.06834219694137574
epoch: 5 training_loss 0.09375842146575451 test_loss: 0.07307952642440796
epoch: 6 training_loss 0.09762781787663698 test_loss: 0.07390822172164917
epoch: 7 training_loss 0.08553266702219844 test_loss: 0.06746222972869872
epoch: 8 training_loss 0.08505240332335234 test_loss: 0.06707005500793457
epoch: 9 training_loss 0.08607335437089204 test_loss: 0.06372116208076477
epoch: 10 training_loss 0.07895823218859732 test_loss: 0.07305902242660522
epoch: 11 training_loss 0.08563202539458871 test_loss: 0.07057698369026184
epoch: 12 training_loss 0.07421203680336476 test_loss: 0.07046186923980713
epoch: 13 training_loss 0.0749677750468254 test_loss: 0.07248148918151856
epoch: 14 training_loss 0.06890928369015455 test_loss: 0.08230180144309998
epoch: 15 training_loss 0.07085816590115428 test_loss: 0.07100830078125
epoch: 16 training_loss 0.06933249570429326 test_loss: 0.07156658172607422
epoch: 17 training_loss 0.06912906417623162 test_loss: 0.06999767422676087
epoch: 18 training_loss 0.06566236538812519 test_loss: 0.07638197541236877
epoch: 19 training_loss 0.0685964778251946 test_loss: 0.07372070550918579
epoch: 20 training_loss 0.07061996306292713 test_loss: 0.0741308093070984
epoch: 21 training_loss 0.06895860638469457 test_loss: 0.07121030688285827
epoch: 22 training_loss 0.05858934401068836 test_loss: 0.07332560420036316
epoch: 23 training_loss 0.05587341031059623 test_loss: 0.09210597276687622
epoch: 24 training_loss 0.05817566412501037 test_loss: 0.07987881898880005
epoch: 25 training_loss 0.055512155732139946 test_loss: 0.08829737305641175
epoch: 26 training_loss 0.06519056386779994 test_loss: 0.07259958386421203
epoch: 27 training_loss 0.05449430077336728 test_loss: 0.08437948226928711
epoch: 28 training_loss 0.05531738482415676 test_loss: 0.07902805209159851
epoch: 29 training_loss 0.05366429380141199 test_loss: 0.07787615060806274
epoch: 30 training_loss 0.057668886259198186 test_loss: 0.09161998629570008
epoch: 31 training_loss 0.052066401233896616 test_loss: 0.08282429575920106
epoch: 32 training_loss 0.04842800259590149 test_loss: 0.09138892292976379
epoch: 33 training_loss 0.04658113410696387 test_loss: 0.08641837239265442
epoch: 34 training_loss 0.04708978072740137 test_loss: 0.0931522786617279
epoch: 35 training_loss 0.057424529157578945 test_loss: 0.08782453536987304
epoch: 36 training_loss 0.049606032688170675 test_loss: 0.09322789907455445
epoch: 37 training_loss 0.04903949634172022 test_loss: 0.11114873886108398
epoch: 38 training_loss 0.053766734721139076 test_loss: 0.09900414943695068
epoch: 39 training_loss 0.05092188156209886 test_loss: 0.09544406533241272
epoch: 40 training_loss 0.046174213271588084 test_loss: 0.09063599705696106
epoch: 41 training_loss 0.04117684661410749 test_loss: 0.09367850422859192
epoch: 42 training_loss 0.04702047866769135 test_loss: 0.09115824103355408
epoch: 43 training_loss 0.043311205385252835 test_loss: 0.09404709935188293
epoch: 44 training_loss 0.041704351510852576 test_loss: 0.09152203202247619
epoch: 45 training_loss 0.045122305806726215 test_loss: 0.08622375130653381
epoch: 46 training_loss 0.04174424304859713 test_loss: 0.10490531921386718
epoch: 47 training_loss 0.037938258945941924 test_loss: 0.086896413564682
epoch: 48 training_loss 0.04189831243827939 test_loss: 0.10722165107727051
epoch: 49 training_loss 0.036627432778477666 test_loss: 0.09828796982765198
epoch: 50 training_loss 0.03541561924386769 test_loss: 0.10558592081069947
epoch: 51 training_loss 0.03934045938774943 test_loss: 0.09865339994430541
epoch: 52 training_loss 0.03579652952030301 test_loss: 0.0961119294166565
epoch: 53 training_loss 0.0338537217490375 test_loss: 0.08560796976089477
epoch: 54 training_loss 0.03269770339597017 test_loss: 0.10344547033309937
epoch: 55 training_loss 0.030332329208031295 test_loss: 0.09878491163253784
epoch: 56 training_loss 0.031059058024547993 test_loss: 0.11521264314651489
epoch: 57 training_loss 0.03998191413469612 test_loss: 0.10168629884719849
epoch: 58 training_loss 0.030198522764258086 test_loss: 0.10769721269607543
epoch: 59 training_loss 0.03090083169285208 test_loss: 0.10363669395446777
epoch: 60 training_loss 0.02668395232409239 test_loss: 0.10523433685302734
epoch: 61 training_loss 0.02723353345412761 test_loss: 0.09732102155685425
epoch: 62 training_loss 0.02705288880970329 test_loss: 0.11294291019439698
epoch: 63 training_loss 0.04185475350357592 test_loss: 0.1049451470375061
epoch: 64 training_loss 0.036208515129983423 test_loss: 0.0994712233543396
epoch: 65 training_loss 0.0296612838935107 test_loss: 0.1011924386024475
epoch: 66 training_loss 0.02808417425956577 test_loss: 0.11218560934066772
epoch: 67 training_loss 0.02948042252799496 test_loss: 0.11266238689422607
epoch: 68 training_loss 0.022876389920711518 test_loss: 0.10603026151657105
epoch: 69 training_loss 0.021497890623286366 test_loss: 0.11438517570495606
epoch: 70 training_loss 0.02237592628225684 test_loss: 0.10476722717285156
epoch: 71 training_loss 0.02619200006593019 test_loss: 0.10146433115005493
epoch: 72 training_loss 0.024529497167095543 test_loss: 0.11534126996994018
epoch: 73 training_loss 0.018344970617908983 test_loss: 0.10654611587524414
epoch: 74 training_loss 0.022983025177381933 test_loss: 0.10756430625915528
epoch: 75 training_loss 0.025463806851767003 test_loss: 0.09866947531700135
epoch: 76 training_loss 0.02894633139949292 test_loss: 0.10903687477111816
epoch: 77 training_loss 0.02282181159593165 test_loss: 0.10620976686477661
epoch: 78 training_loss 0.017860528007149698 test_loss: 0.10500195026397705
epoch: 79 training_loss 0.0214500324241817 test_loss: 0.11587733030319214
epoch: 80 training_loss 0.024292405175510793 test_loss: 0.1040252685546875
epoch: 81 training_loss 0.022399863391183317 test_loss: 0.10405268669128417
epoch: 82 training_loss 0.023615909474901855 test_loss: 0.1048973560333252
epoch: 83 training_loss 0.015854447460733355 test_loss: 0.11454472541809083
epoch: 84 training_loss 0.02776162100257352 test_loss: 0.11892653703689575
epoch: 85 training_loss 0.021305049126967786 test_loss: 0.10968408584594727
epoch: 86 training_loss 0.01496820081025362 test_loss: 0.10578299760818481
epoch: 87 training_loss 0.026199147989973425 test_loss: 0.11243695020675659
epoch: 88 training_loss 0.01626321714371443 test_loss: 0.11083152294158935
epoch: 89 training_loss 0.015789423184469343 test_loss: 0.10980254411697388
epoch: 90 training_loss 0.01591701459372416 test_loss: 0.11294031143188477
epoch: 91 training_loss 0.01669791670748964 test_loss: 0.12318894863128663
epoch: 92 training_loss 0.013863111431710422 test_loss: 0.11291879415512085
epoch: 93 training_loss 0.014553512227721513 test_loss: 0.11508723497390747
epoch: 94 training_loss 0.014240127105731517 test_loss: 0.11120671033859253
epoch: 95 training_loss 0.015643123309127985 test_loss: 0.12930134534835816
epoch: 96 training_loss 0.013089061114005745 test_loss: 0.1239992618560791
epoch: 97 training_loss 0.013072492075152695 test_loss: 0.11249217987060547
epoch: 98 training_loss 0.0149330427008681 test_loss: 0.12611559629440308
epoch: 99 training_loss 0.01412015750305727 test_loss: 0.11614571809768677
epoch: 100 training_loss 0.012877750697080047 test_loss: 0.10990438461303711
epoch: 101 training_loss 0.011824746527709066 test_loss: 0.10616211891174317
epoch: 102 training_loss 0.026862361219245942 test_loss: 0.13642791509628296
epoch: 103 training_loss 0.026888993009924887 test_loss: 0.11843801736831665
epoch: 104 training_loss 0.014442088208161294 test_loss: 0.11044634580612182
epoch: 105 training_loss 0.012182778813876211 test_loss: 0.11037286520004272
epoch: 106 training_loss 0.010664520126301796 test_loss: 0.1116517186164856
epoch: 107 training_loss 0.011009009301196783 test_loss: 0.11679834127426147
epoch: 108 training_loss 0.008380889660911635 test_loss: 0.10983600616455078
epoch: 109 training_loss 0.009157285187393427 test_loss: 0.11481614112854004
epoch: 110 training_loss 0.010977329480228946 test_loss: 0.11915228366851807
epoch: 111 training_loss 0.012897262789774686 test_loss: 0.11743155717849732
epoch: 112 training_loss 0.014959262360353023 test_loss: 0.11025017499923706
epoch: 113 training_loss 0.01441815573722124 test_loss: 0.12663933038711547
epoch: 114 training_loss 0.010358614488504827 test_loss: 0.13479143381118774
epoch: 115 training_loss 0.007637982587330044 test_loss: 0.13568140268325807
epoch: 116 training_loss 0.007845157524570823 test_loss: 0.12288203239440917
epoch: 117 training_loss 0.006366495523834601 test_loss: 0.13785041570663453
epoch: 118 training_loss 0.017287997198291122 test_loss: 0.13360984325408937
epoch: 119 training_loss 0.024823631180915982 test_loss: 0.13615109920501708
epoch: 120 training_loss 0.013194618299603462 test_loss: 0.11892261505126953
epoch: 121 training_loss 0.007913077332777902 test_loss: 0.12856930494308472
epoch: 122 training_loss 0.006051195183536038 test_loss: 0.12445529699325561
epoch: 123 training_loss 0.007005113043123856 test_loss: 0.13256293535232544
epoch: 124 training_loss 0.007472585000796244 test_loss: 0.13223676681518554
epoch: 125 training_loss 0.006155780222034082 test_loss: 0.13043173551559448
epoch: 126 training_loss 0.005959447120549157 test_loss: 0.14053151607513428
epoch: 127 training_loss 0.00643333088606596 test_loss: 0.13267229795455932
epoch: 128 training_loss 0.005221119172638282 test_loss: 0.13515846729278563
epoch: 129 training_loss 0.005531724024331197 test_loss: 0.13621531724929808
epoch: 130 training_loss 0.006509651155793108 test_loss: 0.14086273908615113
epoch: 131 training_loss 0.020033428724855185 test_loss: 0.16870101690292358
epoch: 132 training_loss 0.045196831282228234 test_loss: 0.15206551551818848
epoch: 133 training_loss 0.048092428669333456 test_loss: 0.12089190483093262
epoch: 134 training_loss 0.016479002812411638 test_loss: 0.1266953706741333
epoch: 135 training_loss 0.010868980411905795 test_loss: 0.11798336505889892
epoch: 136 training_loss 0.009431117317872122 test_loss: 0.12257922887802124
epoch: 137 training_loss 0.008096207163762302 test_loss: 0.13101736307144166
epoch: 138 training_loss 0.007048988392925821 test_loss: 0.13462499380111695
epoch: 139 training_loss 0.008040245248703287 test_loss: 0.13826065063476561
epoch: 140 training_loss 0.0074642262724228204 test_loss: 0.14324653148651123
epoch: 141 training_loss 0.006836122806416825 test_loss: 0.1383517861366272
epoch: 142 training_loss 0.0067900607187766585 test_loss: 0.13828577995300292
epoch: 143 training_loss 0.005756395402131602 test_loss: 0.14007939100265504
epoch: 144 training_loss 0.006596113120904193 test_loss: 0.14365763664245607
epoch: 145 training_loss 0.0046403523569460954 test_loss: 0.14354608058929444
epoch: 146 training_loss 0.004829501888016239 test_loss: 0.14489407539367677
epoch: 147 training_loss 0.004271424820180982 test_loss: 0.14884334802627563
epoch: 148 training_loss 0.004574820200214162 test_loss: 0.15726189613342284
epoch: 149 training_loss 0.0044539986259769645 test_loss: 0.1528424620628357
epoch: 0 training_loss 36.1281078338623 test_loss: 8.832887268066406
epoch: 1 training_loss 15.776506471633912 test_loss: 6.101909637451172
epoch: 2 training_loss 12.142507371902466 test_loss: 4.980299377441407
epoch: 3 training_loss 9.933538913726807 test_loss: 4.3789104461669925
epoch: 4 training_loss 8.785140323638917 test_loss: 3.9544143676757812
epoch: 5 training_loss 7.935300335884095 test_loss: 3.6880138397216795
epoch: 6 training_loss 7.245448589324951 test_loss: 3.470275115966797
epoch: 7 training_loss 6.773751173019409 test_loss: 3.285183334350586
epoch: 8 training_loss 6.38882010936737 test_loss: 3.1036775588989256
epoch: 9 training_loss 6.092507524490356 test_loss: 2.9807723999023437
epoch: 10 training_loss 5.797223839759827 test_loss: 2.9025876998901365
epoch: 11 training_loss 5.57710572719574 test_loss: 2.7390743255615235
epoch: 12 training_loss 5.30608407497406 test_loss: 2.6735986709594726
epoch: 13 training_loss 5.127469017505645 test_loss: 2.6061302185058595
epoch: 14 training_loss 4.956419327259064 test_loss: 2.5553117752075196
epoch: 15 training_loss 4.67942880153656 test_loss: 2.46600399017334
epoch: 16 training_loss 4.612846374511719 test_loss: 2.3868459701538085
epoch: 17 training_loss 4.493904433250427 test_loss: 2.3571924209594726
epoch: 18 training_loss 4.276060631275177 test_loss: 2.2989858627319335
epoch: 19 training_loss 4.287600064277649 test_loss: 2.252627372741699
epoch: 20 training_loss 4.101087985038757 test_loss: 2.24670352935791
epoch: 21 training_loss 4.025167679786682 test_loss: 2.165551948547363
epoch: 22 training_loss 3.9141774463653562 test_loss: 2.1786256790161134
epoch: 23 training_loss 3.8782394623756407 test_loss: 2.090310478210449
epoch: 24 training_loss 3.7488741779327395 test_loss: 2.0231348037719727
epoch: 25 training_loss 3.655112376213074 test_loss: 2.006112480163574
epoch: 26 training_loss 3.6592070770263674 test_loss: 1.990750503540039
epoch: 27 training_loss 3.5773613023757935 test_loss: 1.9487953186035156
epoch: 28 training_loss 3.5260906863212584 test_loss: 1.9316192626953126
epoch: 29 training_loss 3.4876329255104066 test_loss: 1.9031526565551757
epoch: 30 training_loss 3.434020001888275 test_loss: 1.877904510498047
epoch: 31 training_loss 3.327200517654419 test_loss: 1.8527379989624024
epoch: 32 training_loss 3.312843396663666 test_loss: 1.8370140075683594
epoch: 33 training_loss 3.2686830353736878 test_loss: 1.8037029266357423
epoch: 34 training_loss 3.20885826587677 test_loss: 1.794843864440918
epoch: 35 training_loss 3.1548536467552184 test_loss: 1.771331787109375
epoch: 36 training_loss 3.1661122250556946 test_loss: 1.745269203186035
epoch: 37 training_loss 3.11264666557312 test_loss: 1.718940544128418
epoch: 38 training_loss 3.117206678390503 test_loss: 1.743028450012207
epoch: 39 training_loss 2.9987335181236268 test_loss: 1.7332517623901367
epoch: 40 training_loss 2.9361701369285584 test_loss: 1.720574188232422
epoch: 41 training_loss 3.013235909938812 test_loss: 1.7081819534301759
epoch: 42 training_loss 2.909364171028137 test_loss: 1.6597339630126953
epoch: 43 training_loss 2.8882555198669433 test_loss: 1.663212203979492
epoch: 44 training_loss 2.889663429260254 test_loss: 1.6523656845092773
epoch: 45 training_loss 2.8312613487243654 test_loss: 1.6381511688232422
epoch: 46 training_loss 2.7824698066711426 test_loss: 1.6065258026123046
epoch: 47 training_loss 2.8513552260398867 test_loss: 1.599799346923828
epoch: 48 training_loss 2.8148984575271605 test_loss: 1.5855406761169433
epoch: 49 training_loss 2.751383512020111 test_loss: 1.591396427154541
epoch: 50 training_loss 2.7479289865493772 test_loss: 1.5649078369140625
epoch: 51 training_loss 2.791743538379669 test_loss: 1.5783513069152832
epoch: 52 training_loss 2.7213204216957094 test_loss: 1.5521127700805664
epoch: 53 training_loss 2.6875943672657012 test_loss: 1.530409049987793
epoch: 54 training_loss 2.7168229246139526 test_loss: 1.5647658348083495
epoch: 55 training_loss 2.6734339594841003 test_loss: 1.5191814422607421
epoch: 56 training_loss 2.6686417078971862 test_loss: 1.504769515991211
epoch: 57 training_loss 2.589221669435501 test_loss: 1.49674654006958
epoch: 58 training_loss 2.580242350101471 test_loss: 1.5158239364624024
epoch: 59 training_loss 2.564716444015503 test_loss: 1.505908489227295
epoch: 60 training_loss 2.5065845346450804 test_loss: 1.5114274978637696
epoch: 61 training_loss 2.613896517753601 test_loss: 1.4866415023803712
epoch: 62 training_loss 2.563537838459015 test_loss: 1.4664068222045898
epoch: 63 training_loss 2.5020524573326113 test_loss: 1.479815673828125
epoch: 64 training_loss 2.5253784430027006 test_loss: 1.4920016288757325
epoch: 65 training_loss 2.456934221982956 test_loss: 1.4615569114685059
epoch: 66 training_loss 2.4474489164352415 test_loss: 1.4684489250183106
epoch: 67 training_loss 2.4696282935142517 test_loss: 1.452834701538086
epoch: 68 training_loss 2.4577673864364624 test_loss: 1.434736442565918
epoch: 69 training_loss 2.4392749321460725 test_loss: 1.450742816925049
epoch: 70 training_loss 2.3969406092166903 test_loss: 1.4368963241577148
epoch: 71 training_loss 2.407826375961304 test_loss: 1.4319986343383788
epoch: 72 training_loss 2.4153082752227784 test_loss: 1.410714340209961
epoch: 73 training_loss 2.3749405014514924 test_loss: 1.4143194198608398
epoch: 74 training_loss 2.340553669929504 test_loss: 1.4249253273010254
epoch: 75 training_loss 2.378754128217697 test_loss: 1.4222833633422851
epoch: 76 training_loss 2.382892587184906 test_loss: 1.4261764526367187
epoch: 77 training_loss 2.3512529921531677 test_loss: 1.382573699951172
epoch: 78 training_loss 2.3394624185562134 test_loss: 1.417293643951416
epoch: 79 training_loss 2.3182450985908507 test_loss: 1.3973702430725097
epoch: 80 training_loss 2.311816246509552 test_loss: 1.3836856842041017
epoch: 81 training_loss 2.273911494016647 test_loss: 1.3601093292236328
epoch: 82 training_loss 2.2860626327991485 test_loss: 1.3912677764892578
epoch: 83 training_loss 2.271603652238846 test_loss: 1.3790328979492188
epoch: 84 training_loss 2.252636501789093 test_loss: 1.363710308074951
epoch: 85 training_loss 2.2546184349060057 test_loss: 1.34481782913208
epoch: 86 training_loss 2.231797994375229 test_loss: 1.3505084037780761
epoch: 87 training_loss 2.22432333111763 test_loss: 1.341891384124756
epoch: 88 training_loss 2.2132672452926636 test_loss: 1.3455231666564942
epoch: 89 training_loss 2.205847762823105 test_loss: 1.3489596366882324
epoch: 90 training_loss 2.22911337018013 test_loss: 1.3381830215454102
epoch: 91 training_loss 2.2003943395614622 test_loss: 1.3482882499694824
epoch: 92 training_loss 2.225184323787689 test_loss: 1.3556662559509278
epoch: 93 training_loss 2.2498100638389587 test_loss: 1.3599418640136718
epoch: 94 training_loss 2.2413402700424196 test_loss: 1.337214183807373
epoch: 95 training_loss 2.186805522441864 test_loss: 1.3344364166259766
epoch: 96 training_loss 2.153761063814163 test_loss: 1.3128720283508302
epoch: 97 training_loss 2.1581380021572114 test_loss: 1.3260515213012696
epoch: 98 training_loss 2.12051250576973 test_loss: 1.326526165008545
epoch: 99 training_loss 2.1836406219005586 test_loss: 1.3216117858886718
epoch: 100 training_loss 2.133519265651703 test_loss: 1.309134578704834
epoch: 101 training_loss 2.1354047012329103 test_loss: 1.3063715934753417
epoch: 102 training_loss 2.076807954311371 test_loss: 1.2998554229736328
epoch: 103 training_loss 2.1084646928310393 test_loss: 1.2946287155151368
epoch: 104 training_loss 2.1417045545578004 test_loss: 1.307709312438965
epoch: 105 training_loss 2.0900737380981447 test_loss: 1.294820213317871
epoch: 106 training_loss 2.151304497718811 test_loss: 1.3101258277893066
epoch: 107 training_loss 2.103197511434555 test_loss: 1.3135130882263184
epoch: 108 training_loss 2.0813523936271667 test_loss: 1.3017765998840332
epoch: 109 training_loss 2.0593250048160554 test_loss: 1.2979212760925294
epoch: 110 training_loss 2.0756381905078887 test_loss: 1.291556453704834
epoch: 111 training_loss 2.0526055455207826 test_loss: 1.278915786743164
epoch: 112 training_loss 2.062913180589676 test_loss: 1.2906414031982423
epoch: 113 training_loss 2.0504202532768248 test_loss: 1.2704815864562988
epoch: 114 training_loss 2.040394310951233 test_loss: 1.2763748168945312
epoch: 115 training_loss 2.073829106092453 test_loss: 1.2983282089233399
epoch: 116 training_loss 2.0511635839939117 test_loss: 1.2622148513793945
epoch: 117 training_loss 2.034890559911728 test_loss: 1.2780653953552246
epoch: 118 training_loss 2.0623786067962646 test_loss: 1.2712698936462403
epoch: 119 training_loss 2.0314602947235105 test_loss: 1.252924346923828
epoch: 120 training_loss 2.0879995501041413 test_loss: 1.3010569572448731
epoch: 121 training_loss 2.026351372003555 test_loss: 1.2888782501220704
epoch: 122 training_loss 2.0401481771469117 test_loss: 1.280898094177246
epoch: 123 training_loss 2.014136027097702 test_loss: 1.2542583465576171
epoch: 124 training_loss 2.021209477186203 test_loss: 1.2607455253601074
epoch: 125 training_loss 1.979358698129654 test_loss: 1.2560612678527832
epoch: 126 training_loss 2.0273288214206695 test_loss: 1.2730352401733398
epoch: 127 training_loss 1.9933344614505768 test_loss: 1.270134162902832
epoch: 128 training_loss 1.9591967594623565 test_loss: 1.2448522567749023
epoch: 129 training_loss 2.0162827265262604 test_loss: 1.2598931312561035
epoch: 130 training_loss 1.996976854801178 test_loss: 1.237251377105713
epoch: 131 training_loss 1.9602567183971404 test_loss: 1.2433595657348633
epoch: 132 training_loss 1.974434621334076 test_loss: 1.2394798278808594
epoch: 133 training_loss 1.9653034734725952 test_loss: 1.2625473976135253
epoch: 134 training_loss 1.9401919519901276 test_loss: 1.2424031257629395
epoch: 135 training_loss 1.9943450748920442 test_loss: 1.2503896713256837
epoch: 136 training_loss 1.9721132659912108 test_loss: 1.2383108139038086
epoch: 137 training_loss 1.926178343296051 test_loss: 1.2439387321472168
epoch: 138 training_loss 1.9473133325576781 test_loss: 1.2423602104187013
epoch: 139 training_loss 1.942081003189087 test_loss: 1.2242653846740723
epoch: 140 training_loss 1.9405758273601532 test_loss: 1.2354678153991698
epoch: 141 training_loss 1.8924223625659942 test_loss: 1.2377324104309082
epoch: 142 training_loss 1.9555178415775298 test_loss: 1.2346536636352539
epoch: 143 training_loss 1.9454886496067048 test_loss: 1.2527352333068849
epoch: 144 training_loss 1.970439635515213 test_loss: 1.242223072052002
epoch: 145 training_loss 1.9166783046722413 test_loss: 1.2226117134094239
epoch: 146 training_loss 1.905167180299759 test_loss: 1.2368733406066894
epoch: 147 training_loss 1.9384942638874054 test_loss: 1.2190842628479004
epoch: 148 training_loss 1.9500108528137208 test_loss: 1.226776123046875
epoch: 149 training_loss 1.905213395357132 test_loss: 1.2344822883605957
2590.173401927564
episode: 0 training return: tensor(215.3345, device='cuda:0')
episode: 1 training return: tensor(-31.2899, device='cuda:0')
episode: 2 training return: tensor(-269.5125, device='cuda:0')
episode: 3 training return: tensor(122.4379, device='cuda:0')
epoch: 1 test_true_pfm: 2776.7367950888656 sim_pfm: -14.876992219379948
episode: 4 training return: tensor(146.7819, device='cuda:0')
episode: 5 training return: tensor(-329.9916, device='cuda:0')
episode: 6 training return: tensor(266.4419, device='cuda:0')
episode: 7 training return: tensor(42.3105, device='cuda:0')
epoch: 2 test_true_pfm: 2499.0559294800355 sim_pfm: 54.71960142785489
episode: 8 training return: tensor(-197.7344, device='cuda:0')
episode: 9 training return: tensor(-276.5594, device='cuda:0')
episode: 10 training return: tensor(-334.2539, device='cuda:0')
episode: 11 training return: tensor(192.3680, device='cuda:0')
epoch: 3 test_true_pfm: 3165.1356909597403 sim_pfm: -7.108626543389012
episode: 12 training return: tensor(-127.6644, device='cuda:0')
episode: 13 training return: tensor(-168.8610, device='cuda:0')
episode: 14 training return: tensor(189.5948, device='cuda:0')
episode: 15 training return: tensor(-249.9800, device='cuda:0')
epoch: 4 test_true_pfm: 2475.0343280765333 sim_pfm: -88.2986011383861
episode: 16 training return: tensor(-238.2433, device='cuda:0')
episode: 17 training return: tensor(-246.4827, device='cuda:0')
episode: 18 training return: tensor(40.7346, device='cuda:0')
episode: 19 training return: tensor(-271.4374, device='cuda:0')
epoch: 5 test_true_pfm: 2676.3775333082353 sim_pfm: -134.09199457572927
episode: 20 training return: tensor(266.7374, device='cuda:0')
episode: 21 training return: tensor(144.7970, device='cuda:0')
episode: 22 training return: tensor(213.4275, device='cuda:0')
episode: 23 training return: tensor(-141.8008, device='cuda:0')
epoch: 6 test_true_pfm: 2991.3711353738713 sim_pfm: 196.2233696123973
episode: 24 training return: tensor(294.0505, device='cuda:0')
episode: 25 training return: tensor(72.7939, device='cuda:0')
episode: 26 training return: tensor(150.0037, device='cuda:0')
episode: 27 training return: tensor(-83.8645, device='cuda:0')
epoch: 7 test_true_pfm: 3269.144064432962 sim_pfm: 197.6467840373322
episode: 28 training return: tensor(-187.1475, device='cuda:0')
episode: 29 training return: tensor(-203.0761, device='cuda:0')
episode: 30 training return: tensor(-57.4937, device='cuda:0')
episode: 31 training return: tensor(-339.4103, device='cuda:0')
epoch: 8 test_true_pfm: 3245.9179995347076 sim_pfm: 227.01051088009262
episode: 32 training return: tensor(-271.0952, device='cuda:0')
episode: 33 training return: tensor(-117.5685, device='cuda:0')
episode: 34 training return: tensor(-171.9195, device='cuda:0')
episode: 35 training return: tensor(133.9191, device='cuda:0')
epoch: 9 test_true_pfm: 2753.1488612203866 sim_pfm: 221.88225250378795
episode: 36 training return: tensor(180.8561, device='cuda:0')
episode: 37 training return: tensor(254.6498, device='cuda:0')
episode: 38 training return: tensor(-71.2659, device='cuda:0')
episode: 39 training return: tensor(187.9225, device='cuda:0')
epoch: 10 test_true_pfm: 3353.162700305375 sim_pfm: 23.305476267628062
episode: 40 training return: tensor(-168.8049, device='cuda:0')
episode: 41 training return: tensor(-80.6086, device='cuda:0')
episode: 42 training return: tensor(-95.1722, device='cuda:0')
episode: 43 training return: tensor(105.2405, device='cuda:0')
epoch: 11 test_true_pfm: 2061.3369310320154 sim_pfm: 122.29459374580377
episode: 44 training return: tensor(193.8391, device='cuda:0')
episode: 45 training return: tensor(-88.2819, device='cuda:0')
episode: 46 training return: tensor(170.1514, device='cuda:0')
episode: 47 training return: tensor(281.8900, device='cuda:0')
epoch: 12 test_true_pfm: 2533.202579150356 sim_pfm: 215.396978113257
episode: 48 training return: tensor(387.0558, device='cuda:0')
episode: 49 training return: tensor(-154.8153, device='cuda:0')
episode: 50 training return: tensor(106.0270, device='cuda:0')
episode: 51 training return: tensor(172.8478, device='cuda:0')
epoch: 13 test_true_pfm: 3448.375802631255 sim_pfm: 134.99623359330386
episode: 52 training return: tensor(126.8734, device='cuda:0')
episode: 53 training return: tensor(11.8196, device='cuda:0')
episode: 54 training return: tensor(178.7756, device='cuda:0')
episode: 55 training return: tensor(-198.9523, device='cuda:0')
epoch: 14 test_true_pfm: 3357.6670588466477 sim_pfm: 176.96015876038777
episode: 56 training return: tensor(41.7680, device='cuda:0')
episode: 57 training return: tensor(-252.9084, device='cuda:0')
episode: 58 training return: tensor(-233.4757, device='cuda:0')
episode: 59 training return: tensor(-210.3963, device='cuda:0')
epoch: 15 test_true_pfm: 3445.5812098424285 sim_pfm: 46.15137800369606
episode: 60 training return: tensor(145.8964, device='cuda:0')
episode: 61 training return: tensor(221.3639, device='cuda:0')
episode: 62 training return: tensor(-12.2420, device='cuda:0')
episode: 63 training return: tensor(176.2260, device='cuda:0')
epoch: 16 test_true_pfm: 3443.8831636882537 sim_pfm: 114.5831755810262
episode: 64 training return: tensor(66.3679, device='cuda:0')
episode: 65 training return: tensor(217.1559, device='cuda:0')
episode: 66 training return: tensor(204.4717, device='cuda:0')
episode: 67 training return: tensor(215.7929, device='cuda:0')
epoch: 17 test_true_pfm: 3469.860068224864 sim_pfm: 109.57135223742807
episode: 68 training return: tensor(244.6472, device='cuda:0')
episode: 69 training return: tensor(48.1698, device='cuda:0')
episode: 70 training return: tensor(-37.2038, device='cuda:0')
episode: 71 training return: tensor(151.2256, device='cuda:0')
epoch: 18 test_true_pfm: 3214.5485554871316 sim_pfm: 197.325738304576
episode: 72 training return: tensor(236.1404, device='cuda:0')
episode: 73 training return: tensor(-208.5762, device='cuda:0')
episode: 74 training return: tensor(156.2744, device='cuda:0')
episode: 75 training return: tensor(-253.0389, device='cuda:0')
epoch: 19 test_true_pfm: 3456.077469273938 sim_pfm: 69.19775846188229
episode: 76 training return: tensor(153.6562, device='cuda:0')
episode: 77 training return: tensor(204.8142, device='cuda:0')
episode: 78 training return: tensor(157.0344, device='cuda:0')
episode: 79 training return: tensor(-439.6410, device='cuda:0')
epoch: 20 test_true_pfm: 3430.8992245537606 sim_pfm: 108.46530533444214
episode: 80 training return: tensor(-176.5515, device='cuda:0')
episode: 81 training return: tensor(-55.6511, device='cuda:0')
episode: 82 training return: tensor(280.6210, device='cuda:0')
episode: 83 training return: tensor(-11.6024, device='cuda:0')
epoch: 21 test_true_pfm: 2845.0216139959794 sim_pfm: 186.9901538033349
episode: 84 training return: tensor(-424.1313, device='cuda:0')
episode: 85 training return: tensor(238.3500, device='cuda:0')
episode: 86 training return: tensor(169.8306, device='cuda:0')
episode: 87 training return: tensor(220.9152, device='cuda:0')
epoch: 22 test_true_pfm: 2956.113093347578 sim_pfm: 145.81076701940037
episode: 88 training return: tensor(212.1125, device='cuda:0')
episode: 89 training return: tensor(98.8862, device='cuda:0')
episode: 90 training return: tensor(254.9627, device='cuda:0')
episode: 91 training return: tensor(-43.0335, device='cuda:0')
epoch: 23 test_true_pfm: 2831.3339545497097 sim_pfm: 199.36185845492096
episode: 92 training return: tensor(270.3220, device='cuda:0')
episode: 93 training return: tensor(35.4730, device='cuda:0')
episode: 94 training return: tensor(-154.3826, device='cuda:0')
episode: 95 training return: tensor(213.5699, device='cuda:0')
epoch: 24 test_true_pfm: 3369.948026276777 sim_pfm: 169.4703362309083
episode: 96 training return: tensor(159.1725, device='cuda:0')
episode: 97 training return: tensor(-330.1097, device='cuda:0')
episode: 98 training return: tensor(-73.8644, device='cuda:0')
episode: 99 training return: tensor(212.5132, device='cuda:0')
epoch: 25 test_true_pfm: 3139.2342475961836 sim_pfm: -1.1508957451151218
episode: 100 training return: tensor(140.6194, device='cuda:0')
episode: 101 training return: tensor(239.7492, device='cuda:0')
episode: 102 training return: tensor(89.8878, device='cuda:0')
episode: 103 training return: tensor(169.5349, device='cuda:0')
epoch: 26 test_true_pfm: 2966.484049648008 sim_pfm: 98.33678268633473
episode: 104 training return: tensor(78.4193, device='cuda:0')
episode: 105 training return: tensor(158.7805, device='cuda:0')
episode: 106 training return: tensor(223.3750, device='cuda:0')
episode: 107 training return: tensor(162.6372, device='cuda:0')
epoch: 27 test_true_pfm: 3056.822311186965 sim_pfm: 180.04070112401192
episode: 108 training return: tensor(190.0023, device='cuda:0')
episode: 109 training return: tensor(253.4439, device='cuda:0')
episode: 110 training return: tensor(174.9845, device='cuda:0')
episode: 111 training return: tensor(197.4656, device='cuda:0')
epoch: 28 test_true_pfm: 3424.027367785675 sim_pfm: 109.31815930699425
episode: 112 training return: tensor(237.9678, device='cuda:0')
episode: 113 training return: tensor(335.6953, device='cuda:0')
episode: 114 training return: tensor(-238.4382, device='cuda:0')
episode: 115 training return: tensor(287.3805, device='cuda:0')
epoch: 29 test_true_pfm: 3438.7086959495887 sim_pfm: 329.2299917769269
episode: 116 training return: tensor(-63.6324, device='cuda:0')
episode: 117 training return: tensor(267.6230, device='cuda:0')
episode: 118 training return: tensor(213.4429, device='cuda:0')
episode: 119 training return: tensor(260.6362, device='cuda:0')
epoch: 30 test_true_pfm: 3394.7465933745025 sim_pfm: 6.310194436538343
episode: 120 training return: tensor(255.3346, device='cuda:0')
episode: 121 training return: tensor(134.7474, device='cuda:0')
episode: 122 training return: tensor(178.1735, device='cuda:0')
episode: 123 training return: tensor(-208.3053, device='cuda:0')
epoch: 31 test_true_pfm: 3465.0452639533128 sim_pfm: 191.50847966937968
episode: 124 training return: tensor(42.4536, device='cuda:0')
episode: 125 training return: tensor(244.9458, device='cuda:0')
episode: 126 training return: tensor(349.2119, device='cuda:0')
episode: 127 training return: tensor(-317.0101, device='cuda:0')
epoch: 32 test_true_pfm: 2311.1153578358617 sim_pfm: 274.1940438312595
episode: 128 training return: tensor(209.2737, device='cuda:0')
episode: 129 training return: tensor(-26.6154, device='cuda:0')
episode: 130 training return: tensor(140.7453, device='cuda:0')
episode: 131 training return: tensor(220.6764, device='cuda:0')
epoch: 33 test_true_pfm: 3137.075410141899 sim_pfm: 263.050864116832
episode: 132 training return: tensor(208.5676, device='cuda:0')
episode: 133 training return: tensor(-347.4246, device='cuda:0')
episode: 134 training return: tensor(-211.6730, device='cuda:0')
episode: 135 training return: tensor(156.7494, device='cuda:0')
epoch: 34 test_true_pfm: 3348.07917741334 sim_pfm: 276.53437175420305
episode: 136 training return: tensor(181.6275, device='cuda:0')
episode: 137 training return: tensor(344.4476, device='cuda:0')
episode: 138 training return: tensor(97.9997, device='cuda:0')
episode: 139 training return: tensor(207.5485, device='cuda:0')
epoch: 35 test_true_pfm: 3413.644697817697 sim_pfm: 305.9032492520443
episode: 140 training return: tensor(274.6873, device='cuda:0')
episode: 141 training return: tensor(254.4391, device='cuda:0')
episode: 142 training return: tensor(-246.2293, device='cuda:0')
episode: 143 training return: tensor(123.8658, device='cuda:0')
epoch: 36 test_true_pfm: 3166.0749797463322 sim_pfm: 235.5300253018504
episode: 144 training return: tensor(-258.7071, device='cuda:0')
episode: 145 training return: tensor(207.2937, device='cuda:0')
episode: 146 training return: tensor(158.0918, device='cuda:0')
episode: 147 training return: tensor(332.7126, device='cuda:0')
epoch: 37 test_true_pfm: 3231.6588391287137 sim_pfm: 102.9342009514415
episode: 148 training return: tensor(289.4174, device='cuda:0')
episode: 149 training return: tensor(186.0377, device='cuda:0')
episode: 150 training return: tensor(171.5152, device='cuda:0')
episode: 151 training return: tensor(335.1263, device='cuda:0')
epoch: 38 test_true_pfm: 3449.337330152395 sim_pfm: 284.4756250984113
episode: 152 training return: tensor(207.5192, device='cuda:0')
episode: 153 training return: tensor(289.5607, device='cuda:0')
episode: 154 training return: tensor(226.3603, device='cuda:0')
episode: 155 training return: tensor(216.3862, device='cuda:0')
epoch: 39 test_true_pfm: 3530.2203085781603 sim_pfm: 290.76400309530436
episode: 156 training return: tensor(-207.8962, device='cuda:0')
episode: 157 training return: tensor(146.1358, device='cuda:0')
episode: 158 training return: tensor(229.0463, device='cuda:0')
episode: 159 training return: tensor(220.5994, device='cuda:0')
epoch: 40 test_true_pfm: 3488.5268525479855 sim_pfm: 147.2611012958611
episode: 160 training return: tensor(218.1766, device='cuda:0')
episode: 161 training return: tensor(154.8144, device='cuda:0')
episode: 162 training return: tensor(184.6192, device='cuda:0')
episode: 163 training return: tensor(197.6247, device='cuda:0')
epoch: 41 test_true_pfm: 3424.2946939833314 sim_pfm: 239.03421352226482
episode: 164 training return: tensor(170.1364, device='cuda:0')
episode: 165 training return: tensor(125.5441, device='cuda:0')
episode: 166 training return: tensor(304.8743, device='cuda:0')
episode: 167 training return: tensor(250.0739, device='cuda:0')
epoch: 42 test_true_pfm: 3462.283120394912 sim_pfm: 203.03719944212935
episode: 168 training return: tensor(154.3538, device='cuda:0')
episode: 169 training return: tensor(266.4422, device='cuda:0')
episode: 170 training return: tensor(251.3446, device='cuda:0')
episode: 171 training return: tensor(-95.6600, device='cuda:0')
epoch: 43 test_true_pfm: 3449.2872349922886 sim_pfm: 206.41037013490373
episode: 172 training return: tensor(135.6932, device='cuda:0')
episode: 173 training return: tensor(188.0350, device='cuda:0')
episode: 174 training return: tensor(306.4790, device='cuda:0')
episode: 175 training return: tensor(260.8864, device='cuda:0')
epoch: 44 test_true_pfm: 3459.907995430573 sim_pfm: 248.432115286026
episode: 176 training return: tensor(165.9468, device='cuda:0')
episode: 177 training return: tensor(285.7899, device='cuda:0')
episode: 178 training return: tensor(212.9000, device='cuda:0')
episode: 179 training return: tensor(237.0692, device='cuda:0')
epoch: 45 test_true_pfm: 3399.134730197691 sim_pfm: 189.98416366540673
episode: 180 training return: tensor(263.7913, device='cuda:0')
episode: 181 training return: tensor(128.9615, device='cuda:0')
episode: 182 training return: tensor(205.3418, device='cuda:0')
episode: 183 training return: tensor(248.1544, device='cuda:0')
epoch: 46 test_true_pfm: 3512.263931557902 sim_pfm: 278.84918656518374
episode: 184 training return: tensor(334.0931, device='cuda:0')
episode: 185 training return: tensor(-326.8625, device='cuda:0')
episode: 186 training return: tensor(265.3500, device='cuda:0')
episode: 187 training return: tensor(217.9514, device='cuda:0')
epoch: 47 test_true_pfm: 3400.824176128459 sim_pfm: 275.30351833502453
episode: 188 training return: tensor(163.2442, device='cuda:0')
episode: 189 training return: tensor(-136.9296, device='cuda:0')
episode: 190 training return: tensor(237.9212, device='cuda:0')
episode: 191 training return: tensor(258.9536, device='cuda:0')
epoch: 48 test_true_pfm: 3382.1488368846526 sim_pfm: 266.1151684963843
episode: 192 training return: tensor(306.8564, device='cuda:0')
episode: 193 training return: tensor(62.2654, device='cuda:0')
episode: 194 training return: tensor(262.7166, device='cuda:0')
episode: 195 training return: tensor(251.2876, device='cuda:0')
epoch: 49 test_true_pfm: 3458.7594464202393 sim_pfm: 255.7440051450394
episode: 196 training return: tensor(115.9802, device='cuda:0')
episode: 197 training return: tensor(286.6481, device='cuda:0')
episode: 198 training return: tensor(207.8153, device='cuda:0')
episode: 199 training return: tensor(238.0289, device='cuda:0')
epoch: 50 test_true_pfm: 3409.903979934907 sim_pfm: 184.57213333349014
episode: 200 training return: tensor(323.7805, device='cuda:0')
episode: 201 training return: tensor(244.1140, device='cuda:0')
episode: 202 training return: tensor(95.4515, device='cuda:0')
episode: 203 training return: tensor(238.2457, device='cuda:0')
epoch: 51 test_true_pfm: 3401.6114683067663 sim_pfm: 236.33693820800787
episode: 204 training return: tensor(302.2829, device='cuda:0')
episode: 205 training return: tensor(192.3611, device='cuda:0')
episode: 206 training return: tensor(258.7220, device='cuda:0')
episode: 207 training return: tensor(223.5335, device='cuda:0')
epoch: 52 test_true_pfm: 3417.1541751667487 sim_pfm: 253.88816005568756
episode: 208 training return: tensor(208.9322, device='cuda:0')
episode: 209 training return: tensor(134.1333, device='cuda:0')
episode: 210 training return: tensor(191.4396, device='cuda:0')
episode: 211 training return: tensor(127.4279, device='cuda:0')
epoch: 53 test_true_pfm: 3433.892574895457 sim_pfm: 254.439981451617
episode: 212 training return: tensor(227.3729, device='cuda:0')
episode: 213 training return: tensor(291.5024, device='cuda:0')
episode: 214 training return: tensor(128.7298, device='cuda:0')
episode: 215 training return: tensor(257.0998, device='cuda:0')
epoch: 54 test_true_pfm: 3482.105506508906 sim_pfm: 264.6643798446069
episode: 216 training return: tensor(141.8428, device='cuda:0')
episode: 217 training return: tensor(201.6369, device='cuda:0')
episode: 218 training return: tensor(294.1375, device='cuda:0')
episode: 219 training return: tensor(257.3248, device='cuda:0')
epoch: 55 test_true_pfm: 3464.5645440061685 sim_pfm: 306.1071608766409
episode: 220 training return: tensor(352.5822, device='cuda:0')
episode: 221 training return: tensor(221.5526, device='cuda:0')
episode: 222 training return: tensor(231.8544, device='cuda:0')
episode: 223 training return: tensor(246.7767, device='cuda:0')
epoch: 56 test_true_pfm: 3473.380090362292 sim_pfm: 317.83960466857144
episode: 224 training return: tensor(143.2902, device='cuda:0')
episode: 225 training return: tensor(267.5423, device='cuda:0')
episode: 226 training return: tensor(231.8224, device='cuda:0')
episode: 227 training return: tensor(221.5396, device='cuda:0')
epoch: 57 test_true_pfm: 3501.107915963574 sim_pfm: 279.4564125177761
episode: 228 training return: tensor(223.2442, device='cuda:0')
episode: 229 training return: tensor(266.9565, device='cuda:0')
episode: 230 training return: tensor(161.7986, device='cuda:0')
episode: 231 training return: tensor(187.4731, device='cuda:0')
epoch: 58 test_true_pfm: 3434.267486667695 sim_pfm: 302.36182740167715
episode: 232 training return: tensor(203.1373, device='cuda:0')
episode: 233 training return: tensor(170.5184, device='cuda:0')
episode: 234 training return: tensor(199.2358, device='cuda:0')
episode: 235 training return: tensor(142.3133, device='cuda:0')
epoch: 59 test_true_pfm: 3444.067492549919 sim_pfm: 307.8359812410393
episode: 236 training return: tensor(172.9772, device='cuda:0')
episode: 237 training return: tensor(150.4130, device='cuda:0')
episode: 238 training return: tensor(314.7695, device='cuda:0')
episode: 239 training return: tensor(291.5507, device='cuda:0')
epoch: 60 test_true_pfm: 3355.4616925083988 sim_pfm: 205.9323849394374
episode: 240 training return: tensor(181.0833, device='cuda:0')
episode: 241 training return: tensor(98.5756, device='cuda:0')
episode: 242 training return: tensor(207.5945, device='cuda:0')
episode: 243 training return: tensor(275.8658, device='cuda:0')
epoch: 61 test_true_pfm: 3442.955236298157 sim_pfm: 304.3018031887477
episode: 244 training return: tensor(125.9630, device='cuda:0')
episode: 245 training return: tensor(207.8770, device='cuda:0')
episode: 246 training return: tensor(221.1174, device='cuda:0')
episode: 247 training return: tensor(332.3399, device='cuda:0')
epoch: 62 test_true_pfm: 3488.283560994308 sim_pfm: 278.3986563809837
episode: 248 training return: tensor(216.2451, device='cuda:0')
episode: 249 training return: tensor(183.4668, device='cuda:0')
episode: 250 training return: tensor(251.4607, device='cuda:0')
episode: 251 training return: tensor(121.3237, device='cuda:0')
epoch: 63 test_true_pfm: 3401.019200865719 sim_pfm: 139.5433081255469
episode: 252 training return: tensor(262.6565, device='cuda:0')
episode: 253 training return: tensor(177.4852, device='cuda:0')
episode: 254 training return: tensor(239.9549, device='cuda:0')
episode: 255 training return: tensor(167.3946, device='cuda:0')
epoch: 64 test_true_pfm: 3374.964083607094 sim_pfm: 203.5737795132154
episode: 256 training return: tensor(238.3231, device='cuda:0')
episode: 257 training return: tensor(132.0136, device='cuda:0')
episode: 258 training return: tensor(204.4296, device='cuda:0')
episode: 259 training return: tensor(240.9332, device='cuda:0')
epoch: 65 test_true_pfm: 3402.487788045586 sim_pfm: 212.0628235032976
episode: 260 training return: tensor(111.0374, device='cuda:0')
episode: 261 training return: tensor(262.8848, device='cuda:0')
episode: 262 training return: tensor(245.9881, device='cuda:0')
episode: 263 training return: tensor(237.8021, device='cuda:0')
epoch: 66 test_true_pfm: 3462.279907193315 sim_pfm: 330.4574285292183
episode: 264 training return: tensor(169.5932, device='cuda:0')
episode: 265 training return: tensor(266.0060, device='cuda:0')
episode: 266 training return: tensor(290.3636, device='cuda:0')
episode: 267 training return: tensor(70.4897, device='cuda:0')
epoch: 67 test_true_pfm: 3367.468441641293 sim_pfm: 291.0991295204537
episode: 268 training return: tensor(216.9920, device='cuda:0')
episode: 269 training return: tensor(257.5119, device='cuda:0')
episode: 270 training return: tensor(233.8107, device='cuda:0')
episode: 271 training return: tensor(98.8014, device='cuda:0')
epoch: 68 test_true_pfm: 3486.0252224727496 sim_pfm: 246.97099673651005
episode: 272 training return: tensor(165.3458, device='cuda:0')
episode: 273 training return: tensor(212.4974, device='cuda:0')
episode: 274 training return: tensor(158.0133, device='cuda:0')
episode: 275 training return: tensor(196.2864, device='cuda:0')
epoch: 69 test_true_pfm: 3506.3414325331228 sim_pfm: 291.58453082848183
episode: 276 training return: tensor(200.3936, device='cuda:0')
episode: 277 training return: tensor(57.1783, device='cuda:0')
episode: 278 training return: tensor(245.2940, device='cuda:0')
episode: 279 training return: tensor(-294.1679, device='cuda:0')
epoch: 70 test_true_pfm: 3436.0567733422517 sim_pfm: 188.29936962836655
episode: 280 training return: tensor(265.6034, device='cuda:0')
episode: 281 training return: tensor(262.2036, device='cuda:0')
episode: 282 training return: tensor(308.3504, device='cuda:0')
episode: 283 training return: tensor(-140.8650, device='cuda:0')
epoch: 71 test_true_pfm: 3439.052853463833 sim_pfm: 131.79523307057875
episode: 284 training return: tensor(235.1830, device='cuda:0')
episode: 285 training return: tensor(194.2768, device='cuda:0')
episode: 286 training return: tensor(330.5471, device='cuda:0')
episode: 287 training return: tensor(-318.7924, device='cuda:0')
epoch: 72 test_true_pfm: 3502.771348219703 sim_pfm: 277.3399845154102
episode: 288 training return: tensor(239.5052, device='cuda:0')
episode: 289 training return: tensor(214.1038, device='cuda:0')
episode: 290 training return: tensor(193.1557, device='cuda:0')
episode: 291 training return: tensor(305.4850, device='cuda:0')
epoch: 73 test_true_pfm: 3475.4235577927006 sim_pfm: 202.45581072658146
episode: 292 training return: tensor(342.6238, device='cuda:0')
episode: 293 training return: tensor(286.8468, device='cuda:0')
episode: 294 training return: tensor(110.0816, device='cuda:0')
episode: 295 training return: tensor(240.9817, device='cuda:0')
epoch: 74 test_true_pfm: 3415.5364567481975 sim_pfm: 299.72781435225625
episode: 296 training return: tensor(168.6634, device='cuda:0')
episode: 297 training return: tensor(267.1972, device='cuda:0')
episode: 298 training return: tensor(267.8542, device='cuda:0')
episode: 299 training return: tensor(200.3862, device='cuda:0')
epoch: 75 test_true_pfm: 3142.6120100514545 sim_pfm: 333.49667916450807
episode: 300 training return: tensor(269.1009, device='cuda:0')
episode: 301 training return: tensor(235.7855, device='cuda:0')
episode: 302 training return: tensor(307.7362, device='cuda:0')
episode: 303 training return: tensor(-209.8197, device='cuda:0')
epoch: 76 test_true_pfm: 3424.37461901057 sim_pfm: 220.24373859896636
episode: 304 training return: tensor(222.0499, device='cuda:0')
episode: 305 training return: tensor(248.1523, device='cuda:0')
episode: 306 training return: tensor(220.5669, device='cuda:0')
episode: 307 training return: tensor(251.9999, device='cuda:0')
epoch: 77 test_true_pfm: 3323.909466505554 sim_pfm: 294.49752337730024
episode: 308 training return: tensor(292.8844, device='cuda:0')
episode: 309 training return: tensor(332.6231, device='cuda:0')
episode: 310 training return: tensor(234.3512, device='cuda:0')
episode: 311 training return: tensor(112.7164, device='cuda:0')
epoch: 78 test_true_pfm: 3458.228892678439 sim_pfm: 207.4699347469917
episode: 312 training return: tensor(291.3487, device='cuda:0')
episode: 313 training return: tensor(178.4374, device='cuda:0')
episode: 314 training return: tensor(222.3701, device='cuda:0')
episode: 315 training return: tensor(208.3957, device='cuda:0')
epoch: 79 test_true_pfm: 3444.031142483335 sim_pfm: 300.90636320996174
episode: 316 training return: tensor(179.9622, device='cuda:0')
episode: 317 training return: tensor(272.7686, device='cuda:0')
episode: 318 training return: tensor(243.5882, device='cuda:0')
episode: 319 training return: tensor(191.7349, device='cuda:0')
epoch: 80 test_true_pfm: 3475.76640349301 sim_pfm: 300.7671227400618
episode: 320 training return: tensor(-175.8670, device='cuda:0')
episode: 321 training return: tensor(249.1653, device='cuda:0')
episode: 322 training return: tensor(159.6437, device='cuda:0')
episode: 323 training return: tensor(232.5891, device='cuda:0')
epoch: 81 test_true_pfm: 3500.125275621756 sim_pfm: 313.15472801728174
episode: 324 training return: tensor(195.4451, device='cuda:0')
episode: 325 training return: tensor(49.6255, device='cuda:0')
episode: 326 training return: tensor(256.3310, device='cuda:0')
episode: 327 training return: tensor(304.4189, device='cuda:0')
epoch: 82 test_true_pfm: 3502.860948070378 sim_pfm: 349.0713547640674
episode: 328 training return: tensor(-76.2147, device='cuda:0')
episode: 329 training return: tensor(170.6709, device='cuda:0')
episode: 330 training return: tensor(204.1705, device='cuda:0')
episode: 331 training return: tensor(305.2532, device='cuda:0')
epoch: 83 test_true_pfm: 3505.961449052766 sim_pfm: 318.7616953261119
episode: 332 training return: tensor(296.4628, device='cuda:0')
episode: 333 training return: tensor(52.1142, device='cuda:0')
episode: 334 training return: tensor(258.6526, device='cuda:0')
episode: 335 training return: tensor(253.7322, device='cuda:0')
epoch: 84 test_true_pfm: 3485.399479388912 sim_pfm: 352.5690485580126
episode: 336 training return: tensor(210.4059, device='cuda:0')
episode: 337 training return: tensor(260.8820, device='cuda:0')
episode: 338 training return: tensor(184.4230, device='cuda:0')
episode: 339 training return: tensor(300.2951, device='cuda:0')
epoch: 85 test_true_pfm: 3468.210489040368 sim_pfm: 348.12834843507153
episode: 340 training return: tensor(221.8652, device='cuda:0')
episode: 341 training return: tensor(185.2438, device='cuda:0')
episode: 342 training return: tensor(213.2500, device='cuda:0')
episode: 343 training return: tensor(236.0383, device='cuda:0')
epoch: 86 test_true_pfm: 3403.548328127217 sim_pfm: 309.27476113590336
episode: 344 training return: tensor(259.4724, device='cuda:0')
episode: 345 training return: tensor(304.3689, device='cuda:0')
episode: 346 training return: tensor(221.7061, device='cuda:0')
episode: 347 training return: tensor(208.2174, device='cuda:0')
epoch: 87 test_true_pfm: 3484.395292006558 sim_pfm: 351.3731671126249
episode: 348 training return: tensor(237.6905, device='cuda:0')
episode: 349 training return: tensor(267.1837, device='cuda:0')
episode: 350 training return: tensor(249.4823, device='cuda:0')
episode: 351 training return: tensor(283.0894, device='cuda:0')
epoch: 88 test_true_pfm: 3445.7537282717167 sim_pfm: 316.76913275862654
episode: 352 training return: tensor(284.5804, device='cuda:0')
episode: 353 training return: tensor(249.5856, device='cuda:0')
episode: 354 training return: tensor(224.6293, device='cuda:0')
episode: 355 training return: tensor(253.4236, device='cuda:0')
epoch: 89 test_true_pfm: 3464.1154921304665 sim_pfm: 300.12858662131475
episode: 356 training return: tensor(255.3688, device='cuda:0')
episode: 357 training return: tensor(190.2386, device='cuda:0')
episode: 358 training return: tensor(326.2067, device='cuda:0')
episode: 359 training return: tensor(323.2336, device='cuda:0')
epoch: 90 test_true_pfm: 3438.173568815564 sim_pfm: 293.0899418513388
episode: 360 training return: tensor(310.5743, device='cuda:0')
episode: 361 training return: tensor(321.0990, device='cuda:0')
episode: 362 training return: tensor(316.4071, device='cuda:0')
episode: 363 training return: tensor(179.8117, device='cuda:0')
epoch: 91 test_true_pfm: 3456.66892029608 sim_pfm: 294.3785274106194
episode: 364 training return: tensor(178.5278, device='cuda:0')
episode: 365 training return: tensor(267.9435, device='cuda:0')
episode: 366 training return: tensor(331.6628, device='cuda:0')
episode: 367 training return: tensor(265.5612, device='cuda:0')
epoch: 92 test_true_pfm: 3478.313327076196 sim_pfm: 276.7615563031286
episode: 368 training return: tensor(253.4956, device='cuda:0')
episode: 369 training return: tensor(193.1230, device='cuda:0')
episode: 370 training return: tensor(323.7642, device='cuda:0')
episode: 371 training return: tensor(268.5904, device='cuda:0')
epoch: 93 test_true_pfm: 3481.9700011556347 sim_pfm: 318.51256819531164
episode: 372 training return: tensor(227.4217, device='cuda:0')
episode: 373 training return: tensor(268.0088, device='cuda:0')
episode: 374 training return: tensor(219.1016, device='cuda:0')
episode: 375 training return: tensor(197.9295, device='cuda:0')
epoch: 94 test_true_pfm: 3494.8199082182637 sim_pfm: 199.3372841242623
episode: 376 training return: tensor(235.4298, device='cuda:0')
episode: 377 training return: tensor(209.4823, device='cuda:0')
episode: 378 training return: tensor(372.1603, device='cuda:0')
episode: 379 training return: tensor(248.5995, device='cuda:0')
epoch: 95 test_true_pfm: 3476.0338929174904 sim_pfm: 254.90658276563045
episode: 380 training return: tensor(305.5625, device='cuda:0')
episode: 381 training return: tensor(317.9774, device='cuda:0')
episode: 382 training return: tensor(181.3770, device='cuda:0')
episode: 383 training return: tensor(289.0396, device='cuda:0')
epoch: 96 test_true_pfm: 3487.5859486987915 sim_pfm: 205.06853638034468
episode: 384 training return: tensor(289.4622, device='cuda:0')
episode: 385 training return: tensor(296.1110, device='cuda:0')
episode: 386 training return: tensor(158.3900, device='cuda:0')
episode: 387 training return: tensor(285.6366, device='cuda:0')
epoch: 97 test_true_pfm: 3130.8093581433827 sim_pfm: 283.68950367492897
episode: 388 training return: tensor(200.5251, device='cuda:0')
episode: 389 training return: tensor(219.3520, device='cuda:0')
episode: 390 training return: tensor(232.0013, device='cuda:0')
episode: 391 training return: tensor(239.7977, device='cuda:0')
epoch: 98 test_true_pfm: 3428.144457269965 sim_pfm: 264.22700725938194
episode: 392 training return: tensor(238.5064, device='cuda:0')
episode: 393 training return: tensor(194.8649, device='cuda:0')
episode: 394 training return: tensor(288.8558, device='cuda:0')
episode: 395 training return: tensor(226.8614, device='cuda:0')
epoch: 99 test_true_pfm: 3453.8051318365565 sim_pfm: 367.92895514437504
episode: 396 training return: tensor(-320.2406, device='cuda:0')
episode: 397 training return: tensor(141.9364, device='cuda:0')
episode: 398 training return: tensor(239.8897, device='cuda:0')
episode: 399 training return: tensor(264.5775, device='cuda:0')
epoch: 100 test_true_pfm: 3469.911770105438 sim_pfm: 216.96337978025744
episode: 400 training return: tensor(331.3578, device='cuda:0')
episode: 401 training return: tensor(256.9748, device='cuda:0')
episode: 402 training return: tensor(348.4366, device='cuda:0')
episode: 403 training return: tensor(226.5530, device='cuda:0')
epoch: 101 test_true_pfm: 3495.1254995355507 sim_pfm: 242.87197379059702
episode: 404 training return: tensor(362.1078, device='cuda:0')
episode: 405 training return: tensor(185.3075, device='cuda:0')
episode: 406 training return: tensor(246.5880, device='cuda:0')
episode: 407 training return: tensor(264.5195, device='cuda:0')
epoch: 102 test_true_pfm: 3497.139829310359 sim_pfm: 309.6472986045119
episode: 408 training return: tensor(222.5477, device='cuda:0')
episode: 409 training return: tensor(314.6500, device='cuda:0')
episode: 410 training return: tensor(297.9474, device='cuda:0')
episode: 411 training return: tensor(352.1946, device='cuda:0')
epoch: 103 test_true_pfm: 3465.3667355516577 sim_pfm: 294.52980332776013
episode: 412 training return: tensor(260.8221, device='cuda:0')
episode: 413 training return: tensor(203.6054, device='cuda:0')
episode: 414 training return: tensor(311.6943, device='cuda:0')
episode: 415 training return: tensor(223.1799, device='cuda:0')
epoch: 104 test_true_pfm: 3502.314827049873 sim_pfm: 313.58395782881416
episode: 416 training return: tensor(301.7933, device='cuda:0')
episode: 417 training return: tensor(252.2337, device='cuda:0')
episode: 418 training return: tensor(228.7718, device='cuda:0')
episode: 419 training return: tensor(348.8084, device='cuda:0')
epoch: 105 test_true_pfm: 3448.9669424019526 sim_pfm: 236.34046426190375
episode: 420 training return: tensor(265.9399, device='cuda:0')
episode: 421 training return: tensor(250.0749, device='cuda:0')
episode: 422 training return: tensor(282.0153, device='cuda:0')
episode: 423 training return: tensor(287.5216, device='cuda:0')
epoch: 106 test_true_pfm: 3456.2313904895655 sim_pfm: 324.1704362282374
episode: 424 training return: tensor(267.6848, device='cuda:0')
episode: 425 training return: tensor(-346.7393, device='cuda:0')
episode: 426 training return: tensor(204.6137, device='cuda:0')
episode: 427 training return: tensor(189.7761, device='cuda:0')
epoch: 107 test_true_pfm: 3455.763565174928 sim_pfm: 303.8125454416925
episode: 428 training return: tensor(295.3538, device='cuda:0')
episode: 429 training return: tensor(269.8972, device='cuda:0')
episode: 430 training return: tensor(309.6219, device='cuda:0')
episode: 431 training return: tensor(305.1968, device='cuda:0')
epoch: 108 test_true_pfm: 3490.7085151271526 sim_pfm: 321.6769958578516
episode: 432 training return: tensor(257.9859, device='cuda:0')
episode: 433 training return: tensor(310.0207, device='cuda:0')
episode: 434 training return: tensor(139.3565, device='cuda:0')
episode: 435 training return: tensor(313.3634, device='cuda:0')
epoch: 109 test_true_pfm: 3454.6420454998333 sim_pfm: 285.23831036077655
episode: 436 training return: tensor(286.3531, device='cuda:0')
episode: 437 training return: tensor(298.6673, device='cuda:0')
episode: 438 training return: tensor(251.5611, device='cuda:0')
episode: 439 training return: tensor(242.0466, device='cuda:0')
epoch: 110 test_true_pfm: 3456.97508988699 sim_pfm: 273.923119697293
episode: 440 training return: tensor(267.3070, device='cuda:0')
episode: 441 training return: tensor(167.0492, device='cuda:0')
episode: 442 training return: tensor(224.8624, device='cuda:0')
episode: 443 training return: tensor(322.4693, device='cuda:0')
epoch: 111 test_true_pfm: 3480.675782308123 sim_pfm: 304.6576055836825
episode: 444 training return: tensor(303.1261, device='cuda:0')
episode: 445 training return: tensor(251.3499, device='cuda:0')
episode: 446 training return: tensor(289.3844, device='cuda:0')
episode: 447 training return: tensor(395.3454, device='cuda:0')
epoch: 112 test_true_pfm: 3505.4366543019974 sim_pfm: 293.8098669430474
episode: 448 training return: tensor(216.2627, device='cuda:0')
episode: 449 training return: tensor(308.3647, device='cuda:0')
episode: 450 training return: tensor(300.4520, device='cuda:0')
episode: 451 training return: tensor(255.2163, device='cuda:0')
epoch: 113 test_true_pfm: 3439.9096829172327 sim_pfm: 280.4440469820499
episode: 452 training return: tensor(337.0529, device='cuda:0')
episode: 453 training return: tensor(290.6021, device='cuda:0')
episode: 454 training return: tensor(170.2143, device='cuda:0')
episode: 455 training return: tensor(342.8675, device='cuda:0')
epoch: 114 test_true_pfm: 3492.9901402909054 sim_pfm: 316.36716015155736
episode: 456 training return: tensor(265.2197, device='cuda:0')
episode: 457 training return: tensor(219.3967, device='cuda:0')
episode: 458 training return: tensor(260.5011, device='cuda:0')
episode: 459 training return: tensor(190.5560, device='cuda:0')
epoch: 115 test_true_pfm: 3500.1731878041332 sim_pfm: 289.2416345170932
episode: 460 training return: tensor(286.8255, device='cuda:0')
episode: 461 training return: tensor(281.0650, device='cuda:0')
episode: 462 training return: tensor(296.0685, device='cuda:0')
episode: 463 training return: tensor(215.2741, device='cuda:0')
epoch: 116 test_true_pfm: 3449.886454611997 sim_pfm: 297.0512026860961
episode: 464 training return: tensor(281.1927, device='cuda:0')
episode: 465 training return: tensor(220.1557, device='cuda:0')
episode: 466 training return: tensor(418.3374, device='cuda:0')
episode: 467 training return: tensor(287.4729, device='cuda:0')
epoch: 117 test_true_pfm: 3457.365231362363 sim_pfm: 340.18627087733086
episode: 468 training return: tensor(311.2448, device='cuda:0')
episode: 469 training return: tensor(295.1565, device='cuda:0')
episode: 470 training return: tensor(314.8759, device='cuda:0')
episode: 471 training return: tensor(312.4209, device='cuda:0')
epoch: 118 test_true_pfm: 3464.7348846039094 sim_pfm: 302.1039306722717
episode: 472 training return: tensor(-358.8384, device='cuda:0')
episode: 473 training return: tensor(283.6967, device='cuda:0')
episode: 474 training return: tensor(265.0208, device='cuda:0')
episode: 475 training return: tensor(207.1657, device='cuda:0')
epoch: 119 test_true_pfm: 3520.2462612766017 sim_pfm: 330.3627701374547
episode: 476 training return: tensor(224.1913, device='cuda:0')
episode: 477 training return: tensor(294.1919, device='cuda:0')
episode: 478 training return: tensor(256.3745, device='cuda:0')
episode: 479 training return: tensor(235.2630, device='cuda:0')
epoch: 120 test_true_pfm: 3454.6493790032837 sim_pfm: 280.78301237108343
episode: 480 training return: tensor(256.4868, device='cuda:0')
episode: 481 training return: tensor(265.0911, device='cuda:0')
episode: 482 training return: tensor(149.4642, device='cuda:0')
episode: 483 training return: tensor(249.6060, device='cuda:0')
epoch: 121 test_true_pfm: 3504.3526522341776 sim_pfm: 327.16477683745325
episode: 484 training return: tensor(255.0343, device='cuda:0')
episode: 485 training return: tensor(240.0503, device='cuda:0')
episode: 486 training return: tensor(343.4340, device='cuda:0')
episode: 487 training return: tensor(240.1384, device='cuda:0')
epoch: 122 test_true_pfm: 3476.432840417743 sim_pfm: 274.80737592978403
episode: 488 training return: tensor(292.6490, device='cuda:0')
episode: 489 training return: tensor(237.7021, device='cuda:0')
episode: 490 training return: tensor(298.1096, device='cuda:0')
episode: 491 training return: tensor(297.3684, device='cuda:0')
epoch: 123 test_true_pfm: 3455.9615833408393 sim_pfm: 275.41197168880416
episode: 492 training return: tensor(241.8428, device='cuda:0')
episode: 493 training return: tensor(318.8102, device='cuda:0')
episode: 494 training return: tensor(237.1008, device='cuda:0')
episode: 495 training return: tensor(243.5691, device='cuda:0')
epoch: 124 test_true_pfm: 3443.896232837868 sim_pfm: 274.3702271763662
episode: 496 training return: tensor(257.6293, device='cuda:0')
episode: 497 training return: tensor(234.3271, device='cuda:0')
episode: 498 training return: tensor(317.9565, device='cuda:0')
episode: 499 training return: tensor(255.6595, device='cuda:0')
epoch: 125 test_true_pfm: 3500.6168621748016 sim_pfm: 333.06029721763724
episode: 500 training return: tensor(281.0174, device='cuda:0')
episode: 501 training return: tensor(233.4551, device='cuda:0')
episode: 502 training return: tensor(301.4829, device='cuda:0')
episode: 503 training return: tensor(250.0884, device='cuda:0')
epoch: 126 test_true_pfm: 3499.7428066483717 sim_pfm: 329.69641944182996
episode: 504 training return: tensor(251.2858, device='cuda:0')
episode: 505 training return: tensor(167.7732, device='cuda:0')
episode: 506 training return: tensor(395.7435, device='cuda:0')
episode: 507 training return: tensor(310.0112, device='cuda:0')
epoch: 127 test_true_pfm: 3483.909018759399 sim_pfm: 318.24823424157995
episode: 508 training return: tensor(366.9240, device='cuda:0')
episode: 509 training return: tensor(321.3731, device='cuda:0')
episode: 510 training return: tensor(270.7841, device='cuda:0')
episode: 511 training return: tensor(381.6756, device='cuda:0')
epoch: 128 test_true_pfm: 3473.7902483910925 sim_pfm: 320.3821986614882
episode: 512 training return: tensor(343.4103, device='cuda:0')
episode: 513 training return: tensor(256.2502, device='cuda:0')
episode: 514 training return: tensor(284.5272, device='cuda:0')
episode: 515 training return: tensor(261.8432, device='cuda:0')
epoch: 129 test_true_pfm: 3470.8168516470687 sim_pfm: 321.3788137702504
episode: 516 training return: tensor(233.9001, device='cuda:0')
episode: 517 training return: tensor(289.6406, device='cuda:0')
episode: 518 training return: tensor(321.9091, device='cuda:0')
episode: 519 training return: tensor(253.6205, device='cuda:0')
epoch: 130 test_true_pfm: 3450.5509374078792 sim_pfm: 307.09232581519365
episode: 520 training return: tensor(298.6552, device='cuda:0')
episode: 521 training return: tensor(272.6844, device='cuda:0')
episode: 522 training return: tensor(307.0490, device='cuda:0')
episode: 523 training return: tensor(-47.8241, device='cuda:0')
epoch: 131 test_true_pfm: 3300.5151186331386 sim_pfm: 296.7439122208161
episode: 524 training return: tensor(287.5939, device='cuda:0')
episode: 525 training return: tensor(366.3956, device='cuda:0')
episode: 526 training return: tensor(271.0557, device='cuda:0')
episode: 527 training return: tensor(236.5657, device='cuda:0')
epoch: 132 test_true_pfm: 3467.9263300679195 sim_pfm: 340.7553353291005
episode: 528 training return: tensor(322.5184, device='cuda:0')
episode: 529 training return: tensor(215.0510, device='cuda:0')
episode: 530 training return: tensor(394.8366, device='cuda:0')
episode: 531 training return: tensor(249.6421, device='cuda:0')
epoch: 133 test_true_pfm: 3503.026286790621 sim_pfm: 356.950905422845
episode: 532 training return: tensor(267.6999, device='cuda:0')
episode: 533 training return: tensor(285.3096, device='cuda:0')
episode: 534 training return: tensor(173.2136, device='cuda:0')
episode: 535 training return: tensor(368.5588, device='cuda:0')
epoch: 134 test_true_pfm: 3428.328071417712 sim_pfm: 297.96381412960665
episode: 536 training return: tensor(247.7905, device='cuda:0')
episode: 537 training return: tensor(286.1586, device='cuda:0')
episode: 538 training return: tensor(302.3444, device='cuda:0')
episode: 539 training return: tensor(335.8230, device='cuda:0')
epoch: 135 test_true_pfm: 3527.1975805246097 sim_pfm: 298.0914825869647
episode: 540 training return: tensor(272.4741, device='cuda:0')
episode: 541 training return: tensor(253.7092, device='cuda:0')
episode: 542 training return: tensor(269.6919, device='cuda:0')
episode: 543 training return: tensor(168.5195, device='cuda:0')
epoch: 136 test_true_pfm: 3486.48304157904 sim_pfm: 280.6647160438394
episode: 544 training return: tensor(307.5361, device='cuda:0')
episode: 545 training return: tensor(297.4666, device='cuda:0')
episode: 546 training return: tensor(342.4067, device='cuda:0')
episode: 547 training return: tensor(307.9343, device='cuda:0')
epoch: 137 test_true_pfm: 3532.6730286778907 sim_pfm: 326.75851546031964
episode: 548 training return: tensor(272.3011, device='cuda:0')
episode: 549 training return: tensor(297.3901, device='cuda:0')
episode: 550 training return: tensor(285.7326, device='cuda:0')
episode: 551 training return: tensor(279.6412, device='cuda:0')
epoch: 138 test_true_pfm: 3470.485255032587 sim_pfm: 335.17174271069234
episode: 552 training return: tensor(263.2920, device='cuda:0')
episode: 553 training return: tensor(298.7722, device='cuda:0')
episode: 554 training return: tensor(337.1774, device='cuda:0')
episode: 555 training return: tensor(258.6969, device='cuda:0')
epoch: 139 test_true_pfm: 3542.8747201939036 sim_pfm: 368.5579917306216
episode: 556 training return: tensor(294.7955, device='cuda:0')
episode: 557 training return: tensor(305.0163, device='cuda:0')
episode: 558 training return: tensor(263.4737, device='cuda:0')
episode: 559 training return: tensor(252.8542, device='cuda:0')
epoch: 140 test_true_pfm: 3501.6656672794857 sim_pfm: 317.51009065688896
episode: 560 training return: tensor(279.6288, device='cuda:0')
episode: 561 training return: tensor(235.4369, device='cuda:0')
episode: 562 training return: tensor(281.3255, device='cuda:0')
episode: 563 training return: tensor(328.5801, device='cuda:0')
epoch: 141 test_true_pfm: 3479.98925812927 sim_pfm: 309.1697663946543
episode: 564 training return: tensor(219.4528, device='cuda:0')
episode: 565 training return: tensor(294.8786, device='cuda:0')
episode: 566 training return: tensor(264.3494, device='cuda:0')
episode: 567 training return: tensor(299.3120, device='cuda:0')
epoch: 142 test_true_pfm: 3492.5850256944796 sim_pfm: 195.45356474704263
episode: 568 training return: tensor(271.3439, device='cuda:0')
episode: 569 training return: tensor(302.3340, device='cuda:0')
episode: 570 training return: tensor(365.1234, device='cuda:0')
episode: 571 training return: tensor(333.1295, device='cuda:0')
epoch: 143 test_true_pfm: 3471.012616530763 sim_pfm: 298.84701206174213
episode: 572 training return: tensor(338.7756, device='cuda:0')
episode: 573 training return: tensor(326.6841, device='cuda:0')
episode: 574 training return: tensor(290.7710, device='cuda:0')
episode: 575 training return: tensor(215.5932, device='cuda:0')
epoch: 144 test_true_pfm: 3508.672613020089 sim_pfm: 338.9959644084059
episode: 576 training return: tensor(325.3394, device='cuda:0')
episode: 577 training return: tensor(226.7900, device='cuda:0')
episode: 578 training return: tensor(173.0121, device='cuda:0')
episode: 579 training return: tensor(300.0528, device='cuda:0')
epoch: 145 test_true_pfm: 3505.0293542381237 sim_pfm: 298.21040397679707
episode: 580 training return: tensor(261.3502, device='cuda:0')
episode: 581 training return: tensor(289.8441, device='cuda:0')
episode: 582 training return: tensor(358.3509, device='cuda:0')
episode: 583 training return: tensor(295.5694, device='cuda:0')
epoch: 146 test_true_pfm: 3458.4391124120666 sim_pfm: 290.29998538338504
episode: 584 training return: tensor(236.3503, device='cuda:0')
episode: 585 training return: tensor(256.0007, device='cuda:0')
episode: 586 training return: tensor(171.3430, device='cuda:0')
episode: 587 training return: tensor(212.1597, device='cuda:0')
epoch: 147 test_true_pfm: 3471.8353045104127 sim_pfm: 288.19569983460434
episode: 588 training return: tensor(256.3900, device='cuda:0')
episode: 589 training return: tensor(378.7599, device='cuda:0')
episode: 590 training return: tensor(202.8581, device='cuda:0')
episode: 591 training return: tensor(293.5715, device='cuda:0')
epoch: 148 test_true_pfm: 3215.2812539955794 sim_pfm: 316.34869850166916
episode: 592 training return: tensor(316.9371, device='cuda:0')
episode: 593 training return: tensor(295.4734, device='cuda:0')
episode: 594 training return: tensor(290.8494, device='cuda:0')
episode: 595 training return: tensor(42.7621, device='cuda:0')
epoch: 149 test_true_pfm: 3462.756499506558 sim_pfm: 318.15113607165404
episode: 596 training return: tensor(243.5886, device='cuda:0')
episode: 597 training return: tensor(254.3619, device='cuda:0')
episode: 598 training return: tensor(255.0804, device='cuda:0')
episode: 599 training return: tensor(250.6673, device='cuda:0')
epoch: 150 test_true_pfm: 3466.264194108795 sim_pfm: 328.12082150574616
