['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '3', '--data', '3000', '--sub']
epoch: 0 training_loss 0.24438111521303654 test_loss: 0.08001519441604614
epoch: 1 training_loss 0.1418052228540182 test_loss: 0.07838844060897827
epoch: 2 training_loss 0.11714851681143046 test_loss: 0.06640208959579467
epoch: 3 training_loss 0.1163367747142911 test_loss: 0.07274333834648132
epoch: 4 training_loss 0.09413043979555369 test_loss: 0.07208017110824586
epoch: 5 training_loss 0.09775172801688313 test_loss: 0.07845517992973328
epoch: 6 training_loss 0.10080402841791511 test_loss: 0.07241862416267394
epoch: 7 training_loss 0.0882303818129003 test_loss: 0.08302092552185059
epoch: 8 training_loss 0.07982081664726139 test_loss: 0.07856670022010803
epoch: 9 training_loss 0.08027058228850364 test_loss: 0.0714220404624939
epoch: 10 training_loss 0.0802834647987038 test_loss: 0.07956618666648865
epoch: 11 training_loss 0.08650551777333021 test_loss: 0.09417209029197693
epoch: 12 training_loss 0.07586661150678992 test_loss: 0.07798106670379638
epoch: 13 training_loss 0.0772359244339168 test_loss: 0.08359003067016602
epoch: 14 training_loss 0.07795757957734167 test_loss: 0.08451275825500489
epoch: 15 training_loss 0.07634165173396468 test_loss: 0.07923692464828491
epoch: 16 training_loss 0.07356382567435503 test_loss: 0.07808172106742858
epoch: 17 training_loss 0.06782209601253271 test_loss: 0.0812563955783844
epoch: 18 training_loss 0.06417532859370112 test_loss: 0.08669698834419251
epoch: 19 training_loss 0.0705108685977757 test_loss: 0.08076896071434021
epoch: 20 training_loss 0.0668015943840146 test_loss: 0.07704285979270935
epoch: 21 training_loss 0.06350671919062734 test_loss: 0.08163385987281799
epoch: 22 training_loss 0.05954811594448984 test_loss: 0.08916367292404175
epoch: 23 training_loss 0.06374373219907284 test_loss: 0.08938686847686768
epoch: 24 training_loss 0.05668105756863952 test_loss: 0.09162997603416442
epoch: 25 training_loss 0.06862694337032735 test_loss: 0.09052765965461732
epoch: 26 training_loss 0.05690083676949143 test_loss: 0.08743886351585388
epoch: 27 training_loss 0.057536797393113376 test_loss: 0.08589280843734741
epoch: 28 training_loss 0.052567071383818983 test_loss: 0.09296907782554627
epoch: 29 training_loss 0.05624760717153549 test_loss: 0.09371238946914673
epoch: 30 training_loss 0.05959295843727887 test_loss: 0.09112417101860046
epoch: 31 training_loss 0.05617259244434535 test_loss: 0.09499507546424865
epoch: 32 training_loss 0.05566734883002937 test_loss: 0.09909061789512634
epoch: 33 training_loss 0.05652111309580505 test_loss: 0.09650691151618958
epoch: 34 training_loss 0.056557581424713135 test_loss: 0.09417636394500732
epoch: 35 training_loss 0.05102965662255883 test_loss: 0.09564561247825623
epoch: 36 training_loss 0.05208472864702344 test_loss: 0.10022662878036499
epoch: 37 training_loss 0.0466782440431416 test_loss: 0.09280633330345153
epoch: 38 training_loss 0.04785299095325172 test_loss: 0.09792867302894592
epoch: 39 training_loss 0.043581033097580074 test_loss: 0.09523443579673767
epoch: 40 training_loss 0.04589221584610641 test_loss: 0.1186947226524353
epoch: 41 training_loss 0.05097106281202286 test_loss: 0.13216649293899535
epoch: 42 training_loss 0.05308534782379866 test_loss: 0.1048123598098755
epoch: 43 training_loss 0.04766347768250853 test_loss: 0.10850245952606201
epoch: 44 training_loss 0.04973764552734792 test_loss: 0.11603115797042847
epoch: 45 training_loss 0.04563529192004353 test_loss: 0.1024783968925476
epoch: 46 training_loss 0.03809309462085366 test_loss: 0.11928017139434814
epoch: 47 training_loss 0.04067600514739752 test_loss: 0.10723775625228882
epoch: 48 training_loss 0.046092986864969133 test_loss: 0.10806218385696412
epoch: 49 training_loss 0.03791588712949306 test_loss: 0.10909037590026856
epoch: 50 training_loss 0.04597804814577103 test_loss: 0.11456329822540283
epoch: 51 training_loss 0.039482674980536106 test_loss: 0.11392194032669067
epoch: 52 training_loss 0.04239616233855486 test_loss: 0.11603991985321045
epoch: 53 training_loss 0.04017240405548364 test_loss: 0.11682606935501098
epoch: 54 training_loss 0.033426811704412106 test_loss: 0.10718690156936646
epoch: 55 training_loss 0.030938552198931574 test_loss: 0.12047761678695679
epoch: 56 training_loss 0.03581228287890553 test_loss: 0.11628047227859498
epoch: 57 training_loss 0.032839871444739405 test_loss: 0.11100703477859497
epoch: 58 training_loss 0.03783516494557262 test_loss: 0.1173168420791626
epoch: 59 training_loss 0.03591051368508488 test_loss: 0.1147623062133789
epoch: 60 training_loss 0.032493224125355484 test_loss: 0.1161620020866394
epoch: 61 training_loss 0.028538151639513672 test_loss: 0.10767501592636108
epoch: 62 training_loss 0.03225895829964429 test_loss: 0.10644385814666749
epoch: 63 training_loss 0.03591633707750588 test_loss: 0.1323036789894104
epoch: 64 training_loss 0.038610562537796794 test_loss: 0.1084479570388794
epoch: 65 training_loss 0.032640223153866825 test_loss: 0.11260408163070679
epoch: 66 training_loss 0.032744574858807024 test_loss: 0.1127633810043335
epoch: 67 training_loss 0.030854080552235246 test_loss: 0.1257834553718567
epoch: 68 training_loss 0.027649643588811158 test_loss: 0.112815523147583
epoch: 69 training_loss 0.027281548278406263 test_loss: 0.12003936767578124
epoch: 70 training_loss 0.025412687663920224 test_loss: 0.12428778409957886
epoch: 71 training_loss 0.025154695704113692 test_loss: 0.1296954870223999
epoch: 72 training_loss 0.028863452703226357 test_loss: 0.12826796770095825
epoch: 73 training_loss 0.023757354035042227 test_loss: 0.10968459844589233
epoch: 74 training_loss 0.02486747196409851 test_loss: 0.12309420108795166
epoch: 75 training_loss 0.02675429962342605 test_loss: 0.12383083105087281
epoch: 76 training_loss 0.021699830039869993 test_loss: 0.13271430730819703
epoch: 77 training_loss 0.020742736696265637 test_loss: 0.1286397933959961
epoch: 78 training_loss 0.027369787245988847 test_loss: 0.11050747632980347
epoch: 79 training_loss 0.022925864653661848 test_loss: 0.11950597763061524
epoch: 80 training_loss 0.03067938258871436 test_loss: 0.10630764961242675
epoch: 81 training_loss 0.028307484523393215 test_loss: 0.11555713415145874
epoch: 82 training_loss 0.02267977869254537 test_loss: 0.11772527694702148
epoch: 83 training_loss 0.024232349721714855 test_loss: 0.12932173013687134
epoch: 84 training_loss 0.025016457962337882 test_loss: 0.1330300450325012
epoch: 85 training_loss 0.023600864834152163 test_loss: 0.12886489629745485
epoch: 86 training_loss 0.018434851544443517 test_loss: 0.12923527956008912
epoch: 87 training_loss 0.020624208773951977 test_loss: 0.1283332109451294
epoch: 88 training_loss 0.0260256393160671 test_loss: 0.14403659105300903
epoch: 89 training_loss 0.020239963796921074 test_loss: 0.13536810874938965
epoch: 90 training_loss 0.016497300469782202 test_loss: 0.13145934343338012
epoch: 91 training_loss 0.019482608367688954 test_loss: 0.14936174154281617
epoch: 92 training_loss 0.020611378434114157 test_loss: 0.11637799739837647
epoch: 93 training_loss 0.01583301080390811 test_loss: 0.12817118167877198
epoch: 94 training_loss 0.021761249024420976 test_loss: 0.1338430404663086
epoch: 95 training_loss 0.018941438670735805 test_loss: 0.1402498722076416
epoch: 96 training_loss 0.014328550652135164 test_loss: 0.12571107149124144
epoch: 97 training_loss 0.0159586561890319 test_loss: 0.14063037633895875
epoch: 98 training_loss 0.019968228156212717 test_loss: 0.1348915219306946
epoch: 99 training_loss 0.018367998441681264 test_loss: 0.12479195594787598
epoch: 100 training_loss 0.015468036816455423 test_loss: 0.13584736585617066
epoch: 101 training_loss 0.028588439049199225 test_loss: 0.15676608085632324
epoch: 102 training_loss 0.017466685730032624 test_loss: 0.13290724754333497
epoch: 103 training_loss 0.014301631387788803 test_loss: 0.13751859664916993
epoch: 104 training_loss 0.014763340333011 test_loss: 0.13614219427108765
epoch: 105 training_loss 0.013970024134032429 test_loss: 0.12565762996673585
epoch: 106 training_loss 0.016277267010882498 test_loss: 0.13511152267456056
epoch: 107 training_loss 0.018720938868355005 test_loss: 0.14492766857147216
epoch: 108 training_loss 0.017818089488428085 test_loss: 0.13058067560195924
epoch: 109 training_loss 0.013224068502895535 test_loss: 0.1401672124862671
epoch: 110 training_loss 0.011228140798048116 test_loss: 0.14221798181533812
epoch: 111 training_loss 0.010361800973769278 test_loss: 0.13691290616989135
epoch: 112 training_loss 0.011184039730578662 test_loss: 0.13629846572875975
epoch: 113 training_loss 0.010195032823830844 test_loss: 0.13346381187438966
epoch: 114 training_loss 0.013826476981630548 test_loss: 0.13821545839309693
epoch: 115 training_loss 0.010669796660076827 test_loss: 0.1410103440284729
epoch: 116 training_loss 0.01750873057404533 test_loss: 0.14789090156555176
epoch: 117 training_loss 0.010692305392585694 test_loss: 0.1365967273712158
epoch: 118 training_loss 0.0096103580086492 test_loss: 0.12849782705307006
epoch: 119 training_loss 0.008687811602139845 test_loss: 0.13951876163482665
epoch: 120 training_loss 0.012362601070199162 test_loss: 0.15962742567062377
epoch: 121 training_loss 0.024220899776555598 test_loss: 0.16792141199111937
epoch: 122 training_loss 0.047950483807362616 test_loss: 0.12573692798614503
epoch: 123 training_loss 0.02202566918451339 test_loss: 0.14050716161727905
epoch: 124 training_loss 0.01296596477739513 test_loss: 0.12481818199157715
epoch: 125 training_loss 0.010975073946174235 test_loss: 0.13162657022476196
epoch: 126 training_loss 0.011363074907567352 test_loss: 0.1473685622215271
epoch: 127 training_loss 0.010802401426481083 test_loss: 0.1390506386756897
epoch: 128 training_loss 0.010656080765184015 test_loss: 0.14863530397415162
epoch: 129 training_loss 0.008558588733430952 test_loss: 0.13693637847900392
epoch: 130 training_loss 0.007948084690142423 test_loss: 0.15005193948745726
epoch: 131 training_loss 0.00692276980727911 test_loss: 0.1426316738128662
epoch: 132 training_loss 0.014055232849204912 test_loss: 0.13935225009918212
epoch: 133 training_loss 0.013410131291020661 test_loss: 0.13393054008483887
epoch: 134 training_loss 0.010219615653622895 test_loss: 0.14311769008636474
epoch: 135 training_loss 0.009056219978374429 test_loss: 0.13764842748641967
epoch: 136 training_loss 0.008507547390181572 test_loss: 0.13461716175079347
epoch: 137 training_loss 0.013399200092535465 test_loss: 0.15383855104446412
epoch: 138 training_loss 0.012712992582819424 test_loss: 0.14866976737976073
epoch: 139 training_loss 0.017847313124220818 test_loss: 0.1371203899383545
epoch: 140 training_loss 0.008442010935395957 test_loss: 0.13666253089904784
epoch: 141 training_loss 0.007052226456580684 test_loss: 0.13363709449768066
epoch: 142 training_loss 0.006325171934440732 test_loss: 0.14753642082214355
epoch: 143 training_loss 0.00759942565462552 test_loss: 0.13954098224639894
epoch: 144 training_loss 0.006068828427232802 test_loss: 0.14322881698608397
epoch: 145 training_loss 0.004506776777561754 test_loss: 0.14363222122192382
epoch: 146 training_loss 0.0069687471864745025 test_loss: 0.13868231773376466
epoch: 147 training_loss 0.005402953851735219 test_loss: 0.1416995644569397
epoch: 148 training_loss 0.0046937871223781255 test_loss: 0.14266483783721923
epoch: 149 training_loss 0.005746877287747338 test_loss: 0.14338326454162598
epoch: 0 training_loss 42.76378400802612 test_loss: 10.443008422851562
epoch: 1 training_loss 17.40403120994568 test_loss: 6.875447082519531
epoch: 2 training_loss 12.939927606582641 test_loss: 5.431181716918945
epoch: 3 training_loss 10.589691677093505 test_loss: 4.627545547485352
epoch: 4 training_loss 9.179566020965575 test_loss: 4.1695556640625
epoch: 5 training_loss 8.36167887687683 test_loss: 3.848170852661133
epoch: 6 training_loss 7.672239356040954 test_loss: 3.6769100189208985
epoch: 7 training_loss 7.163575897216797 test_loss: 3.417733383178711
epoch: 8 training_loss 6.728949885368348 test_loss: 3.230826187133789
epoch: 9 training_loss 6.478294763565064 test_loss: 3.114348030090332
epoch: 10 training_loss 6.126925358772278 test_loss: 2.980513572692871
epoch: 11 training_loss 5.888634557723999 test_loss: 2.8521600723266602
epoch: 12 training_loss 5.725882487297058 test_loss: 2.7507532119750975
epoch: 13 training_loss 5.451724617481232 test_loss: 2.6627481460571287
epoch: 14 training_loss 5.257455797195434 test_loss: 2.6000595092773438
epoch: 15 training_loss 4.912464406490326 test_loss: 2.5035762786865234
epoch: 16 training_loss 4.76494959115982 test_loss: 2.4500537872314454
epoch: 17 training_loss 4.739408657550812 test_loss: 2.358183479309082
epoch: 18 training_loss 4.604436869621277 test_loss: 2.3165679931640626
epoch: 19 training_loss 4.436874923706054 test_loss: 2.2710437774658203
epoch: 20 training_loss 4.307421443462371 test_loss: 2.2609724044799804
epoch: 21 training_loss 4.188450138568879 test_loss: 2.1643703460693358
epoch: 22 training_loss 4.096073834896088 test_loss: 2.1322980880737306
epoch: 23 training_loss 4.0440561079978945 test_loss: 2.1218929290771484
epoch: 24 training_loss 4.034903287887573 test_loss: 2.063379096984863
epoch: 25 training_loss 3.876747965812683 test_loss: 2.033428764343262
epoch: 26 training_loss 3.8241802883148193 test_loss: 2.037295341491699
epoch: 27 training_loss 3.758858096599579 test_loss: 1.9554634094238281
epoch: 28 training_loss 3.70288033246994 test_loss: 1.9325017929077148
epoch: 29 training_loss 3.6126568031311037 test_loss: 1.9305286407470703
epoch: 30 training_loss 3.5877025651931764 test_loss: 1.8974441528320312
epoch: 31 training_loss 3.5417440509796143 test_loss: 1.8588788986206055
epoch: 32 training_loss 3.4008053946495056 test_loss: 1.8441253662109376
epoch: 33 training_loss 3.4258864665031434 test_loss: 1.8588218688964844
epoch: 34 training_loss 3.374782667160034 test_loss: 1.7945846557617187
epoch: 35 training_loss 3.3511760425567627 test_loss: 1.7787025451660157
epoch: 36 training_loss 3.2473016667366026 test_loss: 1.766646957397461
epoch: 37 training_loss 3.2379246592521667 test_loss: 1.760892677307129
epoch: 38 training_loss 3.1443086433410645 test_loss: 1.7599832534790039
epoch: 39 training_loss 3.2359130001068115 test_loss: 1.7652889251708985
epoch: 40 training_loss 3.1889635038375856 test_loss: 1.7324722290039063
epoch: 41 training_loss 3.1383676171302795 test_loss: 1.7453855514526366
epoch: 42 training_loss 3.051257464885712 test_loss: 1.708806037902832
epoch: 43 training_loss 3.114128029346466 test_loss: 1.66815242767334
epoch: 44 training_loss 3.025562973022461 test_loss: 1.6819890975952148
epoch: 45 training_loss 2.967534370422363 test_loss: 1.675351905822754
epoch: 46 training_loss 3.030601840019226 test_loss: 1.6402908325195313
epoch: 47 training_loss 2.959758212566376 test_loss: 1.6393648147583009
epoch: 48 training_loss 2.933121311664581 test_loss: 1.6397592544555664
epoch: 49 training_loss 2.8938099765777587 test_loss: 1.6112581253051759
epoch: 50 training_loss 2.906681897640228 test_loss: 1.6324947357177735
epoch: 51 training_loss 2.8878887581825254 test_loss: 1.6226819992065429
epoch: 52 training_loss 2.829126014709473 test_loss: 1.5999625205993653
epoch: 53 training_loss 2.81878342628479 test_loss: 1.5758821487426757
epoch: 54 training_loss 2.802039361000061 test_loss: 1.5770919799804688
epoch: 55 training_loss 2.732061858177185 test_loss: 1.5674935340881349
epoch: 56 training_loss 2.7328263759613036 test_loss: 1.585959529876709
epoch: 57 training_loss 2.7346680188179016 test_loss: 1.5699666023254395
epoch: 58 training_loss 2.7099105739593505 test_loss: 1.540229034423828
epoch: 59 training_loss 2.714955632686615 test_loss: 1.534139919281006
epoch: 60 training_loss 2.703545093536377 test_loss: 1.5458158493041991
epoch: 61 training_loss 2.687959561347961 test_loss: 1.54376220703125
epoch: 62 training_loss 2.6190196669101713 test_loss: 1.5107138633728028
epoch: 63 training_loss 2.6453733158111574 test_loss: 1.5132458686828614
epoch: 64 training_loss 2.691886270046234 test_loss: 1.5315719604492188
epoch: 65 training_loss 2.647690818309784 test_loss: 1.5156538963317872
epoch: 66 training_loss 2.6703490018844604 test_loss: 1.5171202659606933
epoch: 67 training_loss 2.594512629508972 test_loss: 1.4949010848999023
epoch: 68 training_loss 2.5741622030735014 test_loss: 1.496320915222168
epoch: 69 training_loss 2.596036260128021 test_loss: 1.4996597290039062
epoch: 70 training_loss 2.572936009168625 test_loss: 1.4935482025146485
epoch: 71 training_loss 2.598054654598236 test_loss: 1.4644559860229491
epoch: 72 training_loss 2.550305116176605 test_loss: 1.4710158348083495
epoch: 73 training_loss 2.5479627621173857 test_loss: 1.4619595527648925
epoch: 74 training_loss 2.5867003238201143 test_loss: 1.4753600120544434
epoch: 75 training_loss 2.5385014700889585 test_loss: 1.4590158462524414
epoch: 76 training_loss 2.527011661529541 test_loss: 1.4502023696899413
epoch: 77 training_loss 2.4489388847351075 test_loss: 1.4240083694458008
epoch: 78 training_loss 2.4615367341041563 test_loss: 1.4349032402038575
epoch: 79 training_loss 2.4968419790267946 test_loss: 1.4302339553833008
epoch: 80 training_loss 2.4451006972789764 test_loss: 1.42824125289917
epoch: 81 training_loss 2.4663324999809264 test_loss: 1.4162041664123535
epoch: 82 training_loss 2.4387498247623443 test_loss: 1.4225814819335938
epoch: 83 training_loss 2.3944823503494264 test_loss: 1.4159618377685548
epoch: 84 training_loss 2.425186276435852 test_loss: 1.417354202270508
epoch: 85 training_loss 2.420516219139099 test_loss: 1.4306618690490722
epoch: 86 training_loss 2.4491045594215395 test_loss: 1.4214722633361816
epoch: 87 training_loss 2.4005772030353545 test_loss: 1.429927444458008
epoch: 88 training_loss 2.3647678005695343 test_loss: 1.4031020164489747
epoch: 89 training_loss 2.3628329920768736 test_loss: 1.4102545738220216
epoch: 90 training_loss 2.3330596125125886 test_loss: 1.4209976196289062
epoch: 91 training_loss 2.3960965740680695 test_loss: 1.417379665374756
epoch: 92 training_loss 2.437350986003876 test_loss: 1.4274541854858398
epoch: 93 training_loss 2.3790261971950533 test_loss: 1.3657835960388183
epoch: 94 training_loss 2.3195438635349275 test_loss: 1.3953641891479491
epoch: 95 training_loss 2.3230471777915955 test_loss: 1.3789495468139648
epoch: 96 training_loss 2.3131940114498137 test_loss: 1.369616413116455
epoch: 97 training_loss 2.3228886115550993 test_loss: 1.3731779098510741
epoch: 98 training_loss 2.304493955373764 test_loss: 1.3639504432678222
epoch: 99 training_loss 2.3476392495632172 test_loss: 1.3667071342468262
epoch: 100 training_loss 2.2927447628974913 test_loss: 1.3612850189208985
epoch: 101 training_loss 2.284879915714264 test_loss: 1.3728859901428223
epoch: 102 training_loss 2.2691681671142576 test_loss: 1.3686031341552733
epoch: 103 training_loss 2.290433588027954 test_loss: 1.3481504440307617
epoch: 104 training_loss 2.253918571472168 test_loss: 1.3331089973449708
epoch: 105 training_loss 2.278155914545059 test_loss: 1.338383102416992
epoch: 106 training_loss 2.2605395901203154 test_loss: 1.3690918922424316
epoch: 107 training_loss 2.24356596827507 test_loss: 1.361515426635742
epoch: 108 training_loss 2.2128975415229797 test_loss: 1.334242057800293
epoch: 109 training_loss 2.2539134871959687 test_loss: 1.357602596282959
epoch: 110 training_loss 2.212340646982193 test_loss: 1.3354570388793945
epoch: 111 training_loss 2.241039571762085 test_loss: 1.3469488143920898
epoch: 112 training_loss 2.2225350773334505 test_loss: 1.340183162689209
epoch: 113 training_loss 2.2461014330387115 test_loss: 1.3552847862243653
epoch: 114 training_loss 2.224493157863617 test_loss: 1.339950180053711
epoch: 115 training_loss 2.2291646432876586 test_loss: 1.3428217887878418
epoch: 116 training_loss 2.2424718177318574 test_loss: 1.3346860885620118
epoch: 117 training_loss 2.2062601339817047 test_loss: 1.3206924438476562
epoch: 118 training_loss 2.17198135137558 test_loss: 1.3307181358337403
epoch: 119 training_loss 2.227589863538742 test_loss: 1.3280815124511718
epoch: 120 training_loss 2.165923396348953 test_loss: 1.327289867401123
epoch: 121 training_loss 2.1784450829029085 test_loss: 1.3191905975341798
epoch: 122 training_loss 2.143324365615845 test_loss: 1.3243335723876952
epoch: 123 training_loss 2.1662625813484193 test_loss: 1.317052459716797
epoch: 124 training_loss 2.1693916451931 test_loss: 1.3303541183471679
epoch: 125 training_loss 2.1889264619350435 test_loss: 1.3178345680236816
epoch: 126 training_loss 2.148202314376831 test_loss: 1.2990211486816405
epoch: 127 training_loss 2.1707533407211304 test_loss: 1.3253184318542481
epoch: 128 training_loss 2.1382328748703 test_loss: 1.305610466003418
epoch: 129 training_loss 2.141721781492233 test_loss: 1.3184978485107421
epoch: 130 training_loss 2.12249795794487 test_loss: 1.311141014099121
epoch: 131 training_loss 2.1999593532085417 test_loss: 1.2871675491333008
epoch: 132 training_loss 2.0773651826381685 test_loss: 1.291041374206543
epoch: 133 training_loss 2.12074321269989 test_loss: 1.3306102752685547
epoch: 134 training_loss 2.134228721857071 test_loss: 1.3129636764526367
epoch: 135 training_loss 2.136589356660843 test_loss: 1.3022132873535157
epoch: 136 training_loss 2.103653017282486 test_loss: 1.2854498863220214
epoch: 137 training_loss 2.1005683553218844 test_loss: 1.294645118713379
epoch: 138 training_loss 2.097602729797363 test_loss: 1.2848388671875
epoch: 139 training_loss 2.1437756562232972 test_loss: 1.282664966583252
epoch: 140 training_loss 2.073061900138855 test_loss: 1.2920354843139648
epoch: 141 training_loss 2.0615097773075104 test_loss: 1.2887639999389648
epoch: 142 training_loss 2.109441328048706 test_loss: 1.2807616233825683
epoch: 143 training_loss 2.052916533946991 test_loss: 1.2822925567626953
epoch: 144 training_loss 2.091473823785782 test_loss: 1.3075264930725097
epoch: 145 training_loss 2.121263896226883 test_loss: 1.3362482070922852
epoch: 146 training_loss 2.081974912881851 test_loss: 1.2796077728271484
epoch: 147 training_loss 2.0791237282752992 test_loss: 1.2634781837463378
epoch: 148 training_loss 2.0701515996456146 test_loss: 1.2967938423156737
epoch: 149 training_loss 2.0944072139263152 test_loss: 1.2759024620056152
2782.8339104263973
episode: 0 training return: tensor(169.1690, device='cuda:0')
episode: 1 training return: tensor(-176.9197, device='cuda:0')
episode: 2 training return: tensor(181.1396, device='cuda:0')
episode: 3 training return: tensor(65.2805, device='cuda:0')
epoch: 1 test_true_pfm: 2258.293730872864 sim_pfm: 24.82067798897818
episode: 4 training return: tensor(-289.2036, device='cuda:0')
episode: 5 training return: tensor(136.3467, device='cuda:0')
episode: 6 training return: tensor(-108.8192, device='cuda:0')
episode: 7 training return: tensor(-238.9111, device='cuda:0')
epoch: 2 test_true_pfm: 3087.1391070769146 sim_pfm: 164.24119431030704
episode: 8 training return: tensor(-39.0900, device='cuda:0')
episode: 9 training return: tensor(-98.9424, device='cuda:0')
episode: 10 training return: tensor(208.2749, device='cuda:0')
episode: 11 training return: tensor(-196.7512, device='cuda:0')
epoch: 3 test_true_pfm: 2830.9989443895734 sim_pfm: 20.209297016554046
episode: 12 training return: tensor(-337.9440, device='cuda:0')
episode: 13 training return: tensor(-46.1686, device='cuda:0')
episode: 14 training return: tensor(27.5208, device='cuda:0')
episode: 15 training return: tensor(323.2994, device='cuda:0')
epoch: 4 test_true_pfm: 2701.446296669281 sim_pfm: 70.87526814750163
episode: 16 training return: tensor(160.1063, device='cuda:0')
episode: 17 training return: tensor(116.8526, device='cuda:0')
episode: 18 training return: tensor(-344.5469, device='cuda:0')
episode: 19 training return: tensor(-167.5505, device='cuda:0')
epoch: 5 test_true_pfm: 1698.3780614775212 sim_pfm: 73.2166412261043
episode: 20 training return: tensor(-91.3159, device='cuda:0')
episode: 21 training return: tensor(-427.2621, device='cuda:0')
episode: 22 training return: tensor(89.9407, device='cuda:0')
episode: 23 training return: tensor(265.8266, device='cuda:0')
epoch: 6 test_true_pfm: 2891.125079077445 sim_pfm: 106.5182894456666
episode: 24 training return: tensor(222.7564, device='cuda:0')
episode: 25 training return: tensor(-362.8782, device='cuda:0')
episode: 26 training return: tensor(145.7558, device='cuda:0')
episode: 27 training return: tensor(168.8311, device='cuda:0')
epoch: 7 test_true_pfm: 2706.604990552211 sim_pfm: 37.34045492780084
episode: 28 training return: tensor(-166.0079, device='cuda:0')
episode: 29 training return: tensor(-277.5258, device='cuda:0')
episode: 30 training return: tensor(389.9454, device='cuda:0')
episode: 31 training return: tensor(317.1297, device='cuda:0')
epoch: 8 test_true_pfm: 2235.974728025274 sim_pfm: -56.82370428572176
episode: 32 training return: tensor(-1.7327, device='cuda:0')
episode: 33 training return: tensor(220.2913, device='cuda:0')
episode: 34 training return: tensor(158.9811, device='cuda:0')
episode: 35 training return: tensor(149.0090, device='cuda:0')
epoch: 9 test_true_pfm: 2282.026680899516 sim_pfm: 59.16796999991251
episode: 36 training return: tensor(215.7356, device='cuda:0')
episode: 37 training return: tensor(257.9865, device='cuda:0')
episode: 38 training return: tensor(-212.4744, device='cuda:0')
episode: 39 training return: tensor(-285.8633, device='cuda:0')
epoch: 10 test_true_pfm: 3210.3113616853593 sim_pfm: -89.77309454769905
episode: 40 training return: tensor(-366.5787, device='cuda:0')
episode: 41 training return: tensor(-210.2786, device='cuda:0')
episode: 42 training return: tensor(-137.2414, device='cuda:0')
episode: 43 training return: tensor(269.2612, device='cuda:0')
epoch: 11 test_true_pfm: 3062.514401500139 sim_pfm: 104.12746262837511
episode: 44 training return: tensor(-169.5344, device='cuda:0')
episode: 45 training return: tensor(-325.7568, device='cuda:0')
episode: 46 training return: tensor(-219.1671, device='cuda:0')
episode: 47 training return: tensor(-138.6874, device='cuda:0')
epoch: 12 test_true_pfm: 2511.3590166260815 sim_pfm: -37.42499422000643
episode: 48 training return: tensor(233.7267, device='cuda:0')
episode: 49 training return: tensor(-216.8453, device='cuda:0')
episode: 50 training return: tensor(-180.1465, device='cuda:0')
episode: 51 training return: tensor(14.5524, device='cuda:0')
epoch: 13 test_true_pfm: 2761.436673945354 sim_pfm: -122.27965626345637
episode: 52 training return: tensor(-332.7783, device='cuda:0')
episode: 53 training return: tensor(-130.5612, device='cuda:0')
episode: 54 training return: tensor(-347.8366, device='cuda:0')
episode: 55 training return: tensor(158.4022, device='cuda:0')
epoch: 14 test_true_pfm: 2596.936211720127 sim_pfm: -396.08833035673405
episode: 56 training return: tensor(242.2609, device='cuda:0')
episode: 57 training return: tensor(285.2709, device='cuda:0')
episode: 58 training return: tensor(130.0513, device='cuda:0')
episode: 59 training return: tensor(223.2247, device='cuda:0')
epoch: 15 test_true_pfm: 2774.7934914134676 sim_pfm: 28.180336359267432
episode: 60 training return: tensor(155.5220, device='cuda:0')
episode: 61 training return: tensor(-308.1711, device='cuda:0')
episode: 62 training return: tensor(134.9891, device='cuda:0')
episode: 63 training return: tensor(-67.6677, device='cuda:0')
epoch: 16 test_true_pfm: 3085.7074047421315 sim_pfm: 203.10104361206564
episode: 64 training return: tensor(121.5310, device='cuda:0')
episode: 65 training return: tensor(140.2128, device='cuda:0')
episode: 66 training return: tensor(457.5346, device='cuda:0')
episode: 67 training return: tensor(252.8566, device='cuda:0')
epoch: 17 test_true_pfm: 2908.8723528529717 sim_pfm: 8.1131410333716
episode: 68 training return: tensor(143.0827, device='cuda:0')
episode: 69 training return: tensor(-254.1181, device='cuda:0')
episode: 70 training return: tensor(-513.1734, device='cuda:0')
episode: 71 training return: tensor(242.5676, device='cuda:0')
epoch: 18 test_true_pfm: 2738.461033358606 sim_pfm: -66.47121238568798
episode: 72 training return: tensor(100.8520, device='cuda:0')
episode: 73 training return: tensor(244.2434, device='cuda:0')
episode: 74 training return: tensor(280.4457, device='cuda:0')
episode: 75 training return: tensor(-181.5891, device='cuda:0')
epoch: 19 test_true_pfm: 3439.8433451526816 sim_pfm: 225.61457841520314
episode: 76 training return: tensor(48.0126, device='cuda:0')
episode: 77 training return: tensor(-254.4647, device='cuda:0')
episode: 78 training return: tensor(262.7267, device='cuda:0')
episode: 79 training return: tensor(41.3564, device='cuda:0')
epoch: 20 test_true_pfm: 2911.4934649890456 sim_pfm: 120.48650270952687
episode: 80 training return: tensor(244.1471, device='cuda:0')
episode: 81 training return: tensor(343.8361, device='cuda:0')
episode: 82 training return: tensor(-19.8457, device='cuda:0')
episode: 83 training return: tensor(182.7964, device='cuda:0')
epoch: 21 test_true_pfm: 3402.2085060074946 sim_pfm: 210.7569040413946
episode: 84 training return: tensor(299.7601, device='cuda:0')
episode: 85 training return: tensor(116.3146, device='cuda:0')
episode: 86 training return: tensor(147.1446, device='cuda:0')
episode: 87 training return: tensor(190.7426, device='cuda:0')
epoch: 22 test_true_pfm: 3071.49979499302 sim_pfm: 90.20662280037261
episode: 88 training return: tensor(228.5137, device='cuda:0')
episode: 89 training return: tensor(380.3978, device='cuda:0')
episode: 90 training return: tensor(-128.5020, device='cuda:0')
episode: 91 training return: tensor(83.7335, device='cuda:0')
epoch: 23 test_true_pfm: 3409.3378701537017 sim_pfm: 194.42130618681162
episode: 92 training return: tensor(314.1680, device='cuda:0')
episode: 93 training return: tensor(190.0090, device='cuda:0')
episode: 94 training return: tensor(-4.1506, device='cuda:0')
episode: 95 training return: tensor(280.0925, device='cuda:0')
epoch: 24 test_true_pfm: 3232.752531289014 sim_pfm: 214.73004478677953
episode: 96 training return: tensor(150.5754, device='cuda:0')
episode: 97 training return: tensor(190.6238, device='cuda:0')
episode: 98 training return: tensor(223.2839, device='cuda:0')
episode: 99 training return: tensor(290.6378, device='cuda:0')
epoch: 25 test_true_pfm: 3488.709093780846 sim_pfm: 204.2085931096808
episode: 100 training return: tensor(304.8046, device='cuda:0')
episode: 101 training return: tensor(267.4109, device='cuda:0')
episode: 102 training return: tensor(35.5028, device='cuda:0')
episode: 103 training return: tensor(216.0691, device='cuda:0')
epoch: 26 test_true_pfm: 3367.5060327488295 sim_pfm: 224.52700146336187
episode: 104 training return: tensor(157.5187, device='cuda:0')
episode: 105 training return: tensor(113.3739, device='cuda:0')
episode: 106 training return: tensor(301.0255, device='cuda:0')
episode: 107 training return: tensor(217.2436, device='cuda:0')
epoch: 27 test_true_pfm: 3407.742005002207 sim_pfm: 287.76141671444447
episode: 108 training return: tensor(180.2295, device='cuda:0')
episode: 109 training return: tensor(302.9804, device='cuda:0')
episode: 110 training return: tensor(-174.0329, device='cuda:0')
episode: 111 training return: tensor(154.6635, device='cuda:0')
epoch: 28 test_true_pfm: 3193.218893879306 sim_pfm: 235.9285601444717
episode: 112 training return: tensor(298.0392, device='cuda:0')
episode: 113 training return: tensor(-368.0848, device='cuda:0')
episode: 114 training return: tensor(166.4914, device='cuda:0')
episode: 115 training return: tensor(334.1807, device='cuda:0')
epoch: 29 test_true_pfm: 3444.389807074322 sim_pfm: 332.81681960894883
episode: 116 training return: tensor(270.6471, device='cuda:0')
episode: 117 training return: tensor(176.8722, device='cuda:0')
episode: 118 training return: tensor(331.2657, device='cuda:0')
episode: 119 training return: tensor(284.1410, device='cuda:0')
epoch: 30 test_true_pfm: 3460.6804792460566 sim_pfm: 312.06336090704036
episode: 120 training return: tensor(211.1622, device='cuda:0')
episode: 121 training return: tensor(103.6916, device='cuda:0')
episode: 122 training return: tensor(257.2409, device='cuda:0')
episode: 123 training return: tensor(274.2902, device='cuda:0')
epoch: 31 test_true_pfm: 3375.0219180410118 sim_pfm: 314.3566729015438
episode: 124 training return: tensor(260.9364, device='cuda:0')
episode: 125 training return: tensor(-290.5174, device='cuda:0')
episode: 126 training return: tensor(-280.3914, device='cuda:0')
episode: 127 training return: tensor(234.6890, device='cuda:0')
epoch: 32 test_true_pfm: 3285.2673083684363 sim_pfm: 288.4901748612853
episode: 128 training return: tensor(256.3757, device='cuda:0')
episode: 129 training return: tensor(182.5956, device='cuda:0')
episode: 130 training return: tensor(267.2247, device='cuda:0')
episode: 131 training return: tensor(179.8829, device='cuda:0')
epoch: 33 test_true_pfm: 3418.9518831900095 sim_pfm: 318.59055844003643
episode: 132 training return: tensor(62.7515, device='cuda:0')
episode: 133 training return: tensor(276.1049, device='cuda:0')
episode: 134 training return: tensor(328.4091, device='cuda:0')
episode: 135 training return: tensor(274.6132, device='cuda:0')
epoch: 34 test_true_pfm: 3082.8987314937317 sim_pfm: 331.19168071687454
episode: 136 training return: tensor(261.7053, device='cuda:0')
episode: 137 training return: tensor(49.0896, device='cuda:0')
episode: 138 training return: tensor(-80.3116, device='cuda:0')
episode: 139 training return: tensor(376.6656, device='cuda:0')
epoch: 35 test_true_pfm: 3409.9879196709862 sim_pfm: 337.4428556140435
episode: 140 training return: tensor(223.0090, device='cuda:0')
episode: 141 training return: tensor(318.8574, device='cuda:0')
episode: 142 training return: tensor(4.6799, device='cuda:0')
episode: 143 training return: tensor(294.1172, device='cuda:0')
epoch: 36 test_true_pfm: 3455.913016536859 sim_pfm: 294.2577208573736
episode: 144 training return: tensor(264.8733, device='cuda:0')
episode: 145 training return: tensor(-208.0675, device='cuda:0')
episode: 146 training return: tensor(268.1555, device='cuda:0')
episode: 147 training return: tensor(36.0420, device='cuda:0')
epoch: 37 test_true_pfm: 3361.0402149576566 sim_pfm: 253.51541891436986
episode: 148 training return: tensor(304.0622, device='cuda:0')
episode: 149 training return: tensor(169.2555, device='cuda:0')
episode: 150 training return: tensor(181.9372, device='cuda:0')
episode: 151 training return: tensor(248.5147, device='cuda:0')
epoch: 38 test_true_pfm: 3451.6979091528697 sim_pfm: 312.5521324752481
episode: 152 training return: tensor(240.1590, device='cuda:0')
episode: 153 training return: tensor(251.0986, device='cuda:0')
episode: 154 training return: tensor(367.7596, device='cuda:0')
episode: 155 training return: tensor(316.8148, device='cuda:0')
epoch: 39 test_true_pfm: 3448.8434922053452 sim_pfm: 385.7559959224697
episode: 156 training return: tensor(181.6036, device='cuda:0')
episode: 157 training return: tensor(-17.4200, device='cuda:0')
episode: 158 training return: tensor(14.4664, device='cuda:0')
episode: 159 training return: tensor(241.3459, device='cuda:0')
epoch: 40 test_true_pfm: 3511.516560032783 sim_pfm: 173.33260383469556
episode: 160 training return: tensor(79.9104, device='cuda:0')
episode: 161 training return: tensor(273.6608, device='cuda:0')
episode: 162 training return: tensor(404.6655, device='cuda:0')
episode: 163 training return: tensor(256.0541, device='cuda:0')
epoch: 41 test_true_pfm: 3488.432699120722 sim_pfm: 274.8237398439087
episode: 164 training return: tensor(292.3258, device='cuda:0')
episode: 165 training return: tensor(338.7368, device='cuda:0')
episode: 166 training return: tensor(295.8338, device='cuda:0')
episode: 167 training return: tensor(264.6689, device='cuda:0')
epoch: 42 test_true_pfm: 3460.198697785354 sim_pfm: 271.36932546495035
episode: 168 training return: tensor(427.3561, device='cuda:0')
episode: 169 training return: tensor(306.0741, device='cuda:0')
episode: 170 training return: tensor(304.5154, device='cuda:0')
episode: 171 training return: tensor(405.9267, device='cuda:0')
epoch: 43 test_true_pfm: 3453.761169069366 sim_pfm: 243.13199756610751
episode: 172 training return: tensor(368.1401, device='cuda:0')
episode: 173 training return: tensor(-218.8262, device='cuda:0')
episode: 174 training return: tensor(-241.0703, device='cuda:0')
episode: 175 training return: tensor(97.7820, device='cuda:0')
epoch: 44 test_true_pfm: 3158.510384675086 sim_pfm: 98.49186407499171
episode: 176 training return: tensor(275.2592, device='cuda:0')
episode: 177 training return: tensor(303.5974, device='cuda:0')
episode: 178 training return: tensor(288.6798, device='cuda:0')
episode: 179 training return: tensor(-247.2451, device='cuda:0')
epoch: 45 test_true_pfm: 3490.2009333740752 sim_pfm: 296.073960636073
episode: 180 training return: tensor(-392.6418, device='cuda:0')
episode: 181 training return: tensor(203.2147, device='cuda:0')
episode: 182 training return: tensor(347.9505, device='cuda:0')
episode: 183 training return: tensor(-355.5010, device='cuda:0')
epoch: 46 test_true_pfm: 3038.70620181826 sim_pfm: 324.4147749088006
episode: 184 training return: tensor(229.9560, device='cuda:0')
episode: 185 training return: tensor(51.7376, device='cuda:0')
episode: 186 training return: tensor(300.2185, device='cuda:0')
episode: 187 training return: tensor(371.8861, device='cuda:0')
epoch: 47 test_true_pfm: 3007.153666315208 sim_pfm: 311.0600081688608
episode: 188 training return: tensor(266.7549, device='cuda:0')
episode: 189 training return: tensor(383.5673, device='cuda:0')
episode: 190 training return: tensor(-340.9351, device='cuda:0')
episode: 191 training return: tensor(217.9070, device='cuda:0')
epoch: 48 test_true_pfm: 3466.342489539264 sim_pfm: 312.7908186339191
episode: 192 training return: tensor(256.5260, device='cuda:0')
episode: 193 training return: tensor(162.4341, device='cuda:0')
episode: 194 training return: tensor(180.0821, device='cuda:0')
episode: 195 training return: tensor(325.6131, device='cuda:0')
epoch: 49 test_true_pfm: 3189.7904709808795 sim_pfm: 289.46655696378247
episode: 196 training return: tensor(242.4155, device='cuda:0')
episode: 197 training return: tensor(236.8208, device='cuda:0')
episode: 198 training return: tensor(384.7294, device='cuda:0')
episode: 199 training return: tensor(387.7078, device='cuda:0')
epoch: 50 test_true_pfm: 3455.848276783135 sim_pfm: 230.37084632903375
episode: 200 training return: tensor(291.1883, device='cuda:0')
episode: 201 training return: tensor(260.8574, device='cuda:0')
episode: 202 training return: tensor(410.5352, device='cuda:0')
episode: 203 training return: tensor(203.7843, device='cuda:0')
epoch: 51 test_true_pfm: 3536.7111746892588 sim_pfm: 293.68317719345214
episode: 204 training return: tensor(36.2195, device='cuda:0')
episode: 205 training return: tensor(422.1046, device='cuda:0')
episode: 206 training return: tensor(338.9175, device='cuda:0')
episode: 207 training return: tensor(-64.5445, device='cuda:0')
epoch: 52 test_true_pfm: 3486.1150717597397 sim_pfm: 274.2378665424476
episode: 208 training return: tensor(353.1656, device='cuda:0')
episode: 209 training return: tensor(313.3618, device='cuda:0')
episode: 210 training return: tensor(225.8564, device='cuda:0')
episode: 211 training return: tensor(284.7894, device='cuda:0')
epoch: 53 test_true_pfm: 3084.6386459449573 sim_pfm: 178.1440020527807
episode: 212 training return: tensor(272.9388, device='cuda:0')
episode: 213 training return: tensor(292.1916, device='cuda:0')
episode: 214 training return: tensor(-258.3797, device='cuda:0')
episode: 215 training return: tensor(31.5850, device='cuda:0')
epoch: 54 test_true_pfm: 3236.2992731734653 sim_pfm: 273.38411949621513
episode: 216 training return: tensor(-68.7157, device='cuda:0')
episode: 217 training return: tensor(449.3717, device='cuda:0')
episode: 218 training return: tensor(387.0508, device='cuda:0')
episode: 219 training return: tensor(-77.9546, device='cuda:0')
epoch: 55 test_true_pfm: 2982.334005195546 sim_pfm: 144.18376758765467
episode: 220 training return: tensor(-427.6892, device='cuda:0')
episode: 221 training return: tensor(337.7359, device='cuda:0')
episode: 222 training return: tensor(53.3249, device='cuda:0')
episode: 223 training return: tensor(281.2363, device='cuda:0')
epoch: 56 test_true_pfm: 3199.752436301362 sim_pfm: 276.4982472814542
episode: 224 training return: tensor(322.7337, device='cuda:0')
episode: 225 training return: tensor(247.0007, device='cuda:0')
episode: 226 training return: tensor(308.8702, device='cuda:0')
episode: 227 training return: tensor(313.3163, device='cuda:0')
epoch: 57 test_true_pfm: 3426.370969106151 sim_pfm: 259.6055011000717
episode: 228 training return: tensor(327.0331, device='cuda:0')
episode: 229 training return: tensor(73.1854, device='cuda:0')
episode: 230 training return: tensor(315.6245, device='cuda:0')
episode: 231 training return: tensor(141.6465, device='cuda:0')
epoch: 58 test_true_pfm: 3460.6463316245845 sim_pfm: 300.0037089627779
episode: 232 training return: tensor(314.9806, device='cuda:0')
episode: 233 training return: tensor(226.8824, device='cuda:0')
episode: 234 training return: tensor(385.3383, device='cuda:0')
episode: 235 training return: tensor(415.6102, device='cuda:0')
epoch: 59 test_true_pfm: 3506.563338943152 sim_pfm: 321.6050936138393
episode: 236 training return: tensor(445.0586, device='cuda:0')
episode: 237 training return: tensor(298.0709, device='cuda:0')
episode: 238 training return: tensor(289.7590, device='cuda:0')
episode: 239 training return: tensor(135.9349, device='cuda:0')
epoch: 60 test_true_pfm: 3452.26719841153 sim_pfm: 329.0789627119763
episode: 240 training return: tensor(209.7705, device='cuda:0')
episode: 241 training return: tensor(351.5288, device='cuda:0')
episode: 242 training return: tensor(369.2624, device='cuda:0')
episode: 243 training return: tensor(171.9399, device='cuda:0')
epoch: 61 test_true_pfm: 3009.5002890016617 sim_pfm: 255.1218985702629
episode: 244 training return: tensor(230.1794, device='cuda:0')
episode: 245 training return: tensor(338.3670, device='cuda:0')
episode: 246 training return: tensor(313.8334, device='cuda:0')
episode: 247 training return: tensor(64.9568, device='cuda:0')
epoch: 62 test_true_pfm: 3427.891357876107 sim_pfm: 171.8765763178235
episode: 248 training return: tensor(154.1539, device='cuda:0')
episode: 249 training return: tensor(214.5759, device='cuda:0')
episode: 250 training return: tensor(380.2344, device='cuda:0')
episode: 251 training return: tensor(432.6760, device='cuda:0')
epoch: 63 test_true_pfm: 3478.5201979746857 sim_pfm: 306.23713869428803
episode: 252 training return: tensor(366.6396, device='cuda:0')
episode: 253 training return: tensor(389.6620, device='cuda:0')
episode: 254 training return: tensor(359.4709, device='cuda:0')
episode: 255 training return: tensor(-378.5520, device='cuda:0')
epoch: 64 test_true_pfm: 3482.379046309778 sim_pfm: 221.71422750572674
episode: 256 training return: tensor(274.6120, device='cuda:0')
episode: 257 training return: tensor(334.2126, device='cuda:0')
episode: 258 training return: tensor(-94.6273, device='cuda:0')
episode: 259 training return: tensor(-260.3952, device='cuda:0')
epoch: 65 test_true_pfm: 3287.488066638622 sim_pfm: -35.27133010710046
episode: 260 training return: tensor(309.6443, device='cuda:0')
episode: 261 training return: tensor(347.5451, device='cuda:0')
episode: 262 training return: tensor(285.6246, device='cuda:0')
episode: 263 training return: tensor(-252.6417, device='cuda:0')
epoch: 66 test_true_pfm: 3311.82399887742 sim_pfm: 317.53507362767897
episode: 264 training return: tensor(345.2792, device='cuda:0')
episode: 265 training return: tensor(389.8572, device='cuda:0')
episode: 266 training return: tensor(323.8072, device='cuda:0')
episode: 267 training return: tensor(278.7779, device='cuda:0')
epoch: 67 test_true_pfm: 3522.424095034252 sim_pfm: 348.96657727858593
episode: 268 training return: tensor(269.9418, device='cuda:0')
episode: 269 training return: tensor(-342.5312, device='cuda:0')
episode: 270 training return: tensor(53.0133, device='cuda:0')
episode: 271 training return: tensor(301.9616, device='cuda:0')
epoch: 68 test_true_pfm: 3518.031496282098 sim_pfm: 306.88659907294397
episode: 272 training return: tensor(275.0649, device='cuda:0')
episode: 273 training return: tensor(309.3482, device='cuda:0')
episode: 274 training return: tensor(380.9094, device='cuda:0')
episode: 275 training return: tensor(309.6489, device='cuda:0')
epoch: 69 test_true_pfm: 3473.9366357349445 sim_pfm: 313.4136006303791
episode: 276 training return: tensor(223.0138, device='cuda:0')
episode: 277 training return: tensor(329.6562, device='cuda:0')
episode: 278 training return: tensor(293.5086, device='cuda:0')
episode: 279 training return: tensor(380.3077, device='cuda:0')
epoch: 70 test_true_pfm: 3466.0206610950722 sim_pfm: 317.6434044247629
episode: 280 training return: tensor(444.7691, device='cuda:0')
episode: 281 training return: tensor(159.7256, device='cuda:0')
episode: 282 training return: tensor(77.5299, device='cuda:0')
episode: 283 training return: tensor(383.8900, device='cuda:0')
epoch: 71 test_true_pfm: 3476.70493283044 sim_pfm: 313.6024664401775
episode: 284 training return: tensor(365.6937, device='cuda:0')
episode: 285 training return: tensor(438.6218, device='cuda:0')
episode: 286 training return: tensor(-163.5318, device='cuda:0')
episode: 287 training return: tensor(403.9700, device='cuda:0')
epoch: 72 test_true_pfm: 3527.097782021532 sim_pfm: 339.1941051193571
episode: 288 training return: tensor(507.3662, device='cuda:0')
episode: 289 training return: tensor(326.6222, device='cuda:0')
episode: 290 training return: tensor(348.6335, device='cuda:0')
episode: 291 training return: tensor(308.2878, device='cuda:0')
epoch: 73 test_true_pfm: 3466.031841194382 sim_pfm: 271.27465619363164
episode: 292 training return: tensor(189.3590, device='cuda:0')
episode: 293 training return: tensor(364.8550, device='cuda:0')
episode: 294 training return: tensor(284.3893, device='cuda:0')
episode: 295 training return: tensor(334.9754, device='cuda:0')
epoch: 74 test_true_pfm: 3467.662931693187 sim_pfm: 258.3199177107502
episode: 296 training return: tensor(335.0399, device='cuda:0')
episode: 297 training return: tensor(189.8986, device='cuda:0')
episode: 298 training return: tensor(316.7263, device='cuda:0')
episode: 299 training return: tensor(225.3649, device='cuda:0')
epoch: 75 test_true_pfm: 3459.8111324837314 sim_pfm: 350.2759762345813
episode: 300 training return: tensor(70.9666, device='cuda:0')
episode: 301 training return: tensor(261.0089, device='cuda:0')
episode: 302 training return: tensor(380.8156, device='cuda:0')
episode: 303 training return: tensor(375.2796, device='cuda:0')
epoch: 76 test_true_pfm: 3123.2653181918317 sim_pfm: 356.24480647657765
episode: 304 training return: tensor(398.2523, device='cuda:0')
episode: 305 training return: tensor(238.8225, device='cuda:0')
episode: 306 training return: tensor(333.4799, device='cuda:0')
episode: 307 training return: tensor(220.9352, device='cuda:0')
epoch: 77 test_true_pfm: 3505.1047807692007 sim_pfm: 312.86654862676124
episode: 308 training return: tensor(386.6228, device='cuda:0')
episode: 309 training return: tensor(249.2229, device='cuda:0')
episode: 310 training return: tensor(394.3051, device='cuda:0')
episode: 311 training return: tensor(382.8626, device='cuda:0')
epoch: 78 test_true_pfm: 3506.6987708822876 sim_pfm: 408.97074872420245
episode: 312 training return: tensor(252.5352, device='cuda:0')
episode: 313 training return: tensor(360.2826, device='cuda:0')
episode: 314 training return: tensor(335.9276, device='cuda:0')
episode: 315 training return: tensor(312.2166, device='cuda:0')
epoch: 79 test_true_pfm: 3542.235870475068 sim_pfm: 310.93787501738797
episode: 316 training return: tensor(122.4403, device='cuda:0')
episode: 317 training return: tensor(-80.1628, device='cuda:0')
episode: 318 training return: tensor(15.8944, device='cuda:0')
episode: 319 training return: tensor(336.0479, device='cuda:0')
epoch: 80 test_true_pfm: 3228.4528000357313 sim_pfm: 327.5868524563654
episode: 320 training return: tensor(156.9701, device='cuda:0')
episode: 321 training return: tensor(440.1418, device='cuda:0')
episode: 322 training return: tensor(291.8671, device='cuda:0')
episode: 323 training return: tensor(261.0739, device='cuda:0')
epoch: 81 test_true_pfm: 3478.1681549245677 sim_pfm: 345.4458097757306
episode: 324 training return: tensor(354.5663, device='cuda:0')
episode: 325 training return: tensor(329.2750, device='cuda:0')
episode: 326 training return: tensor(388.6710, device='cuda:0')
episode: 327 training return: tensor(-24.9235, device='cuda:0')
epoch: 82 test_true_pfm: 3525.3042712319316 sim_pfm: 161.71886111587324
episode: 328 training return: tensor(294.5900, device='cuda:0')
episode: 329 training return: tensor(391.0234, device='cuda:0')
episode: 330 training return: tensor(432.6628, device='cuda:0')
episode: 331 training return: tensor(352.2080, device='cuda:0')
epoch: 83 test_true_pfm: 3345.826417613122 sim_pfm: 347.19878947214846
episode: 332 training return: tensor(186.2390, device='cuda:0')
episode: 333 training return: tensor(299.6601, device='cuda:0')
episode: 334 training return: tensor(230.1449, device='cuda:0')
episode: 335 training return: tensor(395.9150, device='cuda:0')
epoch: 84 test_true_pfm: 3515.2245668188975 sim_pfm: 274.0410832425987
episode: 336 training return: tensor(256.9504, device='cuda:0')
episode: 337 training return: tensor(275.3447, device='cuda:0')
episode: 338 training return: tensor(263.8376, device='cuda:0')
episode: 339 training return: tensor(315.2468, device='cuda:0')
epoch: 85 test_true_pfm: 3463.7647709276084 sim_pfm: 175.15545930878338
episode: 340 training return: tensor(338.4839, device='cuda:0')
episode: 341 training return: tensor(243.9117, device='cuda:0')
episode: 342 training return: tensor(314.3260, device='cuda:0')
episode: 343 training return: tensor(250.2413, device='cuda:0')
epoch: 86 test_true_pfm: 3432.085284542079 sim_pfm: 375.50799034685286
episode: 344 training return: tensor(427.2437, device='cuda:0')
episode: 345 training return: tensor(330.4268, device='cuda:0')
episode: 346 training return: tensor(337.2546, device='cuda:0')
episode: 347 training return: tensor(368.3946, device='cuda:0')
epoch: 87 test_true_pfm: 3527.7022293299037 sim_pfm: 363.8470799878026
episode: 348 training return: tensor(348.1066, device='cuda:0')
episode: 349 training return: tensor(-70.5523, device='cuda:0')
episode: 350 training return: tensor(-92.2284, device='cuda:0')
episode: 351 training return: tensor(391.2012, device='cuda:0')
epoch: 88 test_true_pfm: 3441.493474596659 sim_pfm: 255.88328258138304
episode: 352 training return: tensor(344.6222, device='cuda:0')
episode: 353 training return: tensor(-412.6659, device='cuda:0')
episode: 354 training return: tensor(393.0110, device='cuda:0')
episode: 355 training return: tensor(333.1816, device='cuda:0')
epoch: 89 test_true_pfm: 3482.3426809230546 sim_pfm: 362.85985032609705
episode: 356 training return: tensor(91.7454, device='cuda:0')
episode: 357 training return: tensor(416.5293, device='cuda:0')
episode: 358 training return: tensor(397.8870, device='cuda:0')
episode: 359 training return: tensor(-30.5522, device='cuda:0')
epoch: 90 test_true_pfm: 3477.5253668656474 sim_pfm: 348.3260721602516
episode: 360 training return: tensor(193.1691, device='cuda:0')
episode: 361 training return: tensor(316.1292, device='cuda:0')
episode: 362 training return: tensor(259.4410, device='cuda:0')
episode: 363 training return: tensor(269.4004, device='cuda:0')
epoch: 91 test_true_pfm: 3548.934066415897 sim_pfm: 455.66955390412477
episode: 364 training return: tensor(330.5641, device='cuda:0')
episode: 365 training return: tensor(325.4405, device='cuda:0')
episode: 366 training return: tensor(392.3143, device='cuda:0')
episode: 367 training return: tensor(311.1555, device='cuda:0')
epoch: 92 test_true_pfm: 3487.6986644172225 sim_pfm: 364.7862045445945
episode: 368 training return: tensor(388.9552, device='cuda:0')
episode: 369 training return: tensor(345.9470, device='cuda:0')
episode: 370 training return: tensor(386.2695, device='cuda:0')
episode: 371 training return: tensor(403.7785, device='cuda:0')
epoch: 93 test_true_pfm: 3577.8074957789345 sim_pfm: 384.06372797477525
episode: 372 training return: tensor(336.9825, device='cuda:0')
episode: 373 training return: tensor(300.9504, device='cuda:0')
episode: 374 training return: tensor(339.4516, device='cuda:0')
episode: 375 training return: tensor(368.6249, device='cuda:0')
epoch: 94 test_true_pfm: 3523.927508718269 sim_pfm: 233.05968833396523
episode: 376 training return: tensor(311.6634, device='cuda:0')
episode: 377 training return: tensor(261.8192, device='cuda:0')
episode: 378 training return: tensor(213.0341, device='cuda:0')
episode: 379 training return: tensor(238.5822, device='cuda:0')
epoch: 95 test_true_pfm: 3352.3528341938872 sim_pfm: 400.0971798269505
episode: 380 training return: tensor(497.6892, device='cuda:0')
episode: 381 training return: tensor(321.5781, device='cuda:0')
episode: 382 training return: tensor(371.0376, device='cuda:0')
episode: 383 training return: tensor(219.7295, device='cuda:0')
epoch: 96 test_true_pfm: 3646.078305967402 sim_pfm: 115.60859210285707
episode: 384 training return: tensor(356.6085, device='cuda:0')
episode: 385 training return: tensor(349.0296, device='cuda:0')
episode: 386 training return: tensor(-210.6854, device='cuda:0')
episode: 387 training return: tensor(376.8187, device='cuda:0')
epoch: 97 test_true_pfm: 3294.5440835452428 sim_pfm: 399.9180609585795
episode: 388 training return: tensor(383.1577, device='cuda:0')
episode: 389 training return: tensor(194.1490, device='cuda:0')
episode: 390 training return: tensor(320.3826, device='cuda:0')
episode: 391 training return: tensor(385.5477, device='cuda:0')
epoch: 98 test_true_pfm: 3586.3954165428695 sim_pfm: 369.375346389085
episode: 392 training return: tensor(-181.1184, device='cuda:0')
episode: 393 training return: tensor(408.3903, device='cuda:0')
episode: 394 training return: tensor(283.4580, device='cuda:0')
episode: 395 training return: tensor(463.4945, device='cuda:0')
epoch: 99 test_true_pfm: 3490.1686669774676 sim_pfm: 298.93027945716557
episode: 396 training return: tensor(288.2837, device='cuda:0')
episode: 397 training return: tensor(228.9612, device='cuda:0')
episode: 398 training return: tensor(234.6940, device='cuda:0')
episode: 399 training return: tensor(309.6380, device='cuda:0')
epoch: 100 test_true_pfm: 3590.35651909274 sim_pfm: 362.69958027108805
episode: 400 training return: tensor(416.1929, device='cuda:0')
episode: 401 training return: tensor(244.6116, device='cuda:0')
episode: 402 training return: tensor(331.9785, device='cuda:0')
episode: 403 training return: tensor(341.6060, device='cuda:0')
epoch: 101 test_true_pfm: 3439.2135504835496 sim_pfm: 3.9914670189318713
episode: 404 training return: tensor(249.5238, device='cuda:0')
episode: 405 training return: tensor(275.9397, device='cuda:0')
episode: 406 training return: tensor(399.0576, device='cuda:0')
episode: 407 training return: tensor(318.3778, device='cuda:0')
epoch: 102 test_true_pfm: 3521.5050711030435 sim_pfm: 395.90401567549753
episode: 408 training return: tensor(178.9880, device='cuda:0')
episode: 409 training return: tensor(426.1600, device='cuda:0')
episode: 410 training return: tensor(381.2648, device='cuda:0')
episode: 411 training return: tensor(142.8383, device='cuda:0')
epoch: 103 test_true_pfm: 3521.3295133344322 sim_pfm: 466.9120331412414
episode: 412 training return: tensor(321.7297, device='cuda:0')
episode: 413 training return: tensor(438.3093, device='cuda:0')
episode: 414 training return: tensor(359.2912, device='cuda:0')
episode: 415 training return: tensor(278.4629, device='cuda:0')
epoch: 104 test_true_pfm: 3529.7289919249656 sim_pfm: 346.6219441261589
episode: 416 training return: tensor(347.8170, device='cuda:0')
episode: 417 training return: tensor(377.5150, device='cuda:0')
episode: 418 training return: tensor(349.2835, device='cuda:0')
episode: 419 training return: tensor(373.4556, device='cuda:0')
epoch: 105 test_true_pfm: 3063.3184949469155 sim_pfm: 412.18131761338253
episode: 420 training return: tensor(387.7029, device='cuda:0')
episode: 421 training return: tensor(301.8233, device='cuda:0')
episode: 422 training return: tensor(278.7083, device='cuda:0')
episode: 423 training return: tensor(336.9835, device='cuda:0')
epoch: 106 test_true_pfm: 3583.7397731779783 sim_pfm: 273.9277046971256
episode: 424 training return: tensor(325.0353, device='cuda:0')
episode: 425 training return: tensor(455.8091, device='cuda:0')
episode: 426 training return: tensor(353.5130, device='cuda:0')
episode: 427 training return: tensor(416.0992, device='cuda:0')
epoch: 107 test_true_pfm: 3637.508836280956 sim_pfm: 278.2862708540342
episode: 428 training return: tensor(334.5325, device='cuda:0')
episode: 429 training return: tensor(368.0800, device='cuda:0')
episode: 430 training return: tensor(402.2404, device='cuda:0')
episode: 431 training return: tensor(-439.1375, device='cuda:0')
epoch: 108 test_true_pfm: 3484.200686313112 sim_pfm: 403.6527099437856
episode: 432 training return: tensor(322.7270, device='cuda:0')
episode: 433 training return: tensor(415.0221, device='cuda:0')
episode: 434 training return: tensor(356.0323, device='cuda:0')
episode: 435 training return: tensor(416.5201, device='cuda:0')
epoch: 109 test_true_pfm: 3512.7813192215235 sim_pfm: 381.5470168168734
episode: 436 training return: tensor(285.1301, device='cuda:0')
episode: 437 training return: tensor(413.7354, device='cuda:0')
episode: 438 training return: tensor(232.6242, device='cuda:0')
episode: 439 training return: tensor(-14.5022, device='cuda:0')
epoch: 110 test_true_pfm: 3521.583606961138 sim_pfm: 372.3778016305296
episode: 440 training return: tensor(281.7638, device='cuda:0')
episode: 441 training return: tensor(277.3295, device='cuda:0')
episode: 442 training return: tensor(395.7481, device='cuda:0')
episode: 443 training return: tensor(281.0894, device='cuda:0')
epoch: 111 test_true_pfm: 3555.349878117373 sim_pfm: 393.0399156832136
episode: 444 training return: tensor(201.6800, device='cuda:0')
episode: 445 training return: tensor(410.2305, device='cuda:0')
episode: 446 training return: tensor(229.5277, device='cuda:0')
episode: 447 training return: tensor(437.0550, device='cuda:0')
epoch: 112 test_true_pfm: 3597.5541811000235 sim_pfm: 374.9024696110088
episode: 448 training return: tensor(399.2445, device='cuda:0')
episode: 449 training return: tensor(377.5186, device='cuda:0')
episode: 450 training return: tensor(388.7021, device='cuda:0')
episode: 451 training return: tensor(382.7754, device='cuda:0')
epoch: 113 test_true_pfm: 3545.96405190815 sim_pfm: 438.38305312675465
episode: 452 training return: tensor(430.1192, device='cuda:0')
episode: 453 training return: tensor(352.2150, device='cuda:0')
episode: 454 training return: tensor(341.7938, device='cuda:0')
episode: 455 training return: tensor(346.2183, device='cuda:0')
epoch: 114 test_true_pfm: 3565.127715981669 sim_pfm: 399.60143296953174
episode: 456 training return: tensor(372.7264, device='cuda:0')
episode: 457 training return: tensor(432.4895, device='cuda:0')
episode: 458 training return: tensor(387.2539, device='cuda:0')
episode: 459 training return: tensor(337.8297, device='cuda:0')
epoch: 115 test_true_pfm: 3541.7008099217055 sim_pfm: 429.79211320437025
episode: 460 training return: tensor(249.8603, device='cuda:0')
episode: 461 training return: tensor(507.3482, device='cuda:0')
episode: 462 training return: tensor(401.7560, device='cuda:0')
episode: 463 training return: tensor(345.8531, device='cuda:0')
epoch: 116 test_true_pfm: 3603.7723980878054 sim_pfm: 388.4383487342857
episode: 464 training return: tensor(456.4868, device='cuda:0')
episode: 465 training return: tensor(326.0050, device='cuda:0')
episode: 466 training return: tensor(310.6449, device='cuda:0')
episode: 467 training return: tensor(301.2967, device='cuda:0')
epoch: 117 test_true_pfm: 3392.3562202162616 sim_pfm: 386.22031890414655
episode: 468 training return: tensor(311.0949, device='cuda:0')
episode: 469 training return: tensor(357.2632, device='cuda:0')
episode: 470 training return: tensor(287.9749, device='cuda:0')
episode: 471 training return: tensor(5.9827, device='cuda:0')
epoch: 118 test_true_pfm: 2990.296427175257 sim_pfm: 388.3422679495998
episode: 472 training return: tensor(439.3219, device='cuda:0')
episode: 473 training return: tensor(368.7214, device='cuda:0')
episode: 474 training return: tensor(367.7231, device='cuda:0')
episode: 475 training return: tensor(356.9913, device='cuda:0')
epoch: 119 test_true_pfm: 3635.790416600201 sim_pfm: 442.84178129055846
episode: 476 training return: tensor(331.2801, device='cuda:0')
episode: 477 training return: tensor(370.7990, device='cuda:0')
episode: 478 training return: tensor(-177.8833, device='cuda:0')
episode: 479 training return: tensor(382.0995, device='cuda:0')
epoch: 120 test_true_pfm: 3561.2755666090993 sim_pfm: 424.2007454886334
episode: 480 training return: tensor(51.9628, device='cuda:0')
episode: 481 training return: tensor(367.7107, device='cuda:0')
episode: 482 training return: tensor(-285.7701, device='cuda:0')
episode: 483 training return: tensor(188.5105, device='cuda:0')
epoch: 121 test_true_pfm: 3518.4534877281185 sim_pfm: 378.4341036906699
episode: 484 training return: tensor(377.9879, device='cuda:0')
episode: 485 training return: tensor(298.7865, device='cuda:0')
episode: 486 training return: tensor(-114.3965, device='cuda:0')
episode: 487 training return: tensor(449.6782, device='cuda:0')
epoch: 122 test_true_pfm: 3634.756538819105 sim_pfm: 462.3281429720034
episode: 488 training return: tensor(421.2751, device='cuda:0')
episode: 489 training return: tensor(-35.2019, device='cuda:0')
episode: 490 training return: tensor(394.4308, device='cuda:0')
episode: 491 training return: tensor(202.2482, device='cuda:0')
epoch: 123 test_true_pfm: 3528.889190088667 sim_pfm: 394.9721013860544
episode: 492 training return: tensor(101.0162, device='cuda:0')
episode: 493 training return: tensor(420.7282, device='cuda:0')
episode: 494 training return: tensor(436.6875, device='cuda:0')
episode: 495 training return: tensor(354.4280, device='cuda:0')
epoch: 124 test_true_pfm: 3531.805140834094 sim_pfm: 390.7632012068061
episode: 496 training return: tensor(490.2750, device='cuda:0')
episode: 497 training return: tensor(265.2901, device='cuda:0')
episode: 498 training return: tensor(419.0543, device='cuda:0')
episode: 499 training return: tensor(307.5263, device='cuda:0')
epoch: 125 test_true_pfm: 3546.5209657552678 sim_pfm: 440.87265811486094
episode: 500 training return: tensor(244.0321, device='cuda:0')
episode: 501 training return: tensor(407.8594, device='cuda:0')
episode: 502 training return: tensor(399.6227, device='cuda:0')
episode: 503 training return: tensor(368.1202, device='cuda:0')
epoch: 126 test_true_pfm: 3663.9942078547297 sim_pfm: 363.2373986307842
episode: 504 training return: tensor(462.4136, device='cuda:0')
episode: 505 training return: tensor(399.3890, device='cuda:0')
episode: 506 training return: tensor(276.9148, device='cuda:0')
episode: 507 training return: tensor(365.9048, device='cuda:0')
epoch: 127 test_true_pfm: 3545.652794093848 sim_pfm: 493.7352097167556
episode: 508 training return: tensor(433.9725, device='cuda:0')
episode: 509 training return: tensor(377.5670, device='cuda:0')
episode: 510 training return: tensor(409.0072, device='cuda:0')
episode: 511 training return: tensor(298.2993, device='cuda:0')
epoch: 128 test_true_pfm: 3578.1085975869446 sim_pfm: 407.1931467399506
episode: 512 training return: tensor(337.2210, device='cuda:0')
episode: 513 training return: tensor(123.8412, device='cuda:0')
episode: 514 training return: tensor(207.7342, device='cuda:0')
episode: 515 training return: tensor(323.6451, device='cuda:0')
epoch: 129 test_true_pfm: 3561.8694599502683 sim_pfm: 411.37537558046944
episode: 516 training return: tensor(425.3652, device='cuda:0')
episode: 517 training return: tensor(393.9841, device='cuda:0')
episode: 518 training return: tensor(447.8534, device='cuda:0')
episode: 519 training return: tensor(444.6133, device='cuda:0')
epoch: 130 test_true_pfm: 3562.506169106822 sim_pfm: 426.33531174855307
episode: 520 training return: tensor(327.9067, device='cuda:0')
episode: 521 training return: tensor(401.4010, device='cuda:0')
episode: 522 training return: tensor(399.2397, device='cuda:0')
episode: 523 training return: tensor(473.5942, device='cuda:0')
epoch: 131 test_true_pfm: 3566.9075000885296 sim_pfm: 191.09986335620246
episode: 524 training return: tensor(438.4230, device='cuda:0')
episode: 525 training return: tensor(419.7485, device='cuda:0')
episode: 526 training return: tensor(452.3738, device='cuda:0')
episode: 527 training return: tensor(462.5756, device='cuda:0')
epoch: 132 test_true_pfm: 3589.72533297578 sim_pfm: 422.43580234842375
episode: 528 training return: tensor(388.4328, device='cuda:0')
episode: 529 training return: tensor(292.8326, device='cuda:0')
episode: 530 training return: tensor(283.1267, device='cuda:0')
episode: 531 training return: tensor(287.8853, device='cuda:0')
epoch: 133 test_true_pfm: 3558.1215695339874 sim_pfm: 441.0873308493562
episode: 532 training return: tensor(342.7615, device='cuda:0')
episode: 533 training return: tensor(-433.0930, device='cuda:0')
episode: 534 training return: tensor(392.7793, device='cuda:0')
episode: 535 training return: tensor(307.4826, device='cuda:0')
epoch: 134 test_true_pfm: 3493.937337600231 sim_pfm: 452.3892915707159
episode: 536 training return: tensor(-127.9646, device='cuda:0')
episode: 537 training return: tensor(327.8225, device='cuda:0')
episode: 538 training return: tensor(-328.6125, device='cuda:0')
episode: 539 training return: tensor(402.4627, device='cuda:0')
epoch: 135 test_true_pfm: 3542.468899976569 sim_pfm: 421.1604095588943
episode: 540 training return: tensor(519.9374, device='cuda:0')
episode: 541 training return: tensor(408.5220, device='cuda:0')
episode: 542 training return: tensor(380.1507, device='cuda:0')
episode: 543 training return: tensor(452.3374, device='cuda:0')
epoch: 136 test_true_pfm: 3563.975001204673 sim_pfm: 139.66298797965283
episode: 544 training return: tensor(345.6381, device='cuda:0')
episode: 545 training return: tensor(462.9678, device='cuda:0')
episode: 546 training return: tensor(511.1721, device='cuda:0')
episode: 547 training return: tensor(296.3883, device='cuda:0')
epoch: 137 test_true_pfm: 3627.0932193551503 sim_pfm: 386.9040789458668
episode: 548 training return: tensor(407.7642, device='cuda:0')
episode: 549 training return: tensor(402.1424, device='cuda:0')
episode: 550 training return: tensor(408.6023, device='cuda:0')
episode: 551 training return: tensor(407.1658, device='cuda:0')
epoch: 138 test_true_pfm: 3539.3176853071996 sim_pfm: 101.77978915063431
episode: 552 training return: tensor(-192.2454, device='cuda:0')
episode: 553 training return: tensor(351.4591, device='cuda:0')
episode: 554 training return: tensor(382.9461, device='cuda:0')
episode: 555 training return: tensor(453.1320, device='cuda:0')
epoch: 139 test_true_pfm: 3564.969499198922 sim_pfm: 414.81341235914925
episode: 556 training return: tensor(339.3365, device='cuda:0')
episode: 557 training return: tensor(380.0471, device='cuda:0')
episode: 558 training return: tensor(383.1670, device='cuda:0')
episode: 559 training return: tensor(354.4597, device='cuda:0')
epoch: 140 test_true_pfm: 3518.9532027042715 sim_pfm: 423.64012815460836
episode: 560 training return: tensor(365.6353, device='cuda:0')
episode: 561 training return: tensor(-309.1383, device='cuda:0')
episode: 562 training return: tensor(290.3256, device='cuda:0')
episode: 563 training return: tensor(278.1257, device='cuda:0')
epoch: 141 test_true_pfm: 3470.383915193137 sim_pfm: 368.93806263036095
episode: 564 training return: tensor(402.0081, device='cuda:0')
episode: 565 training return: tensor(333.2271, device='cuda:0')
episode: 566 training return: tensor(347.7672, device='cuda:0')
episode: 567 training return: tensor(365.0842, device='cuda:0')
epoch: 142 test_true_pfm: 3571.648943479089 sim_pfm: 411.3864152065168
episode: 568 training return: tensor(365.0031, device='cuda:0')
episode: 569 training return: tensor(393.0980, device='cuda:0')
episode: 570 training return: tensor(367.2657, device='cuda:0')
episode: 571 training return: tensor(393.8938, device='cuda:0')
epoch: 143 test_true_pfm: 3546.029060018361 sim_pfm: 389.9656644903977
episode: 572 training return: tensor(440.8646, device='cuda:0')
episode: 573 training return: tensor(372.9522, device='cuda:0')
episode: 574 training return: tensor(328.4131, device='cuda:0')
episode: 575 training return: tensor(375.9278, device='cuda:0')
epoch: 144 test_true_pfm: 3643.1219279848533 sim_pfm: 483.46924331227393
episode: 576 training return: tensor(351.0810, device='cuda:0')
episode: 577 training return: tensor(395.5461, device='cuda:0')
episode: 578 training return: tensor(462.0829, device='cuda:0')
episode: 579 training return: tensor(361.6109, device='cuda:0')
epoch: 145 test_true_pfm: 3539.8186488075175 sim_pfm: 316.9153127699974
episode: 580 training return: tensor(350.1683, device='cuda:0')
episode: 581 training return: tensor(414.8932, device='cuda:0')
episode: 582 training return: tensor(352.2080, device='cuda:0')
episode: 583 training return: tensor(390.1105, device='cuda:0')
epoch: 146 test_true_pfm: 3567.139534320777 sim_pfm: 410.60486116372823
episode: 584 training return: tensor(410.8018, device='cuda:0')
episode: 585 training return: tensor(432.6584, device='cuda:0')
episode: 586 training return: tensor(395.0124, device='cuda:0')
episode: 587 training return: tensor(353.1404, device='cuda:0')
epoch: 147 test_true_pfm: 3507.253166643042 sim_pfm: 439.5365337453938
episode: 588 training return: tensor(396.0912, device='cuda:0')
episode: 589 training return: tensor(318.8821, device='cuda:0')
episode: 590 training return: tensor(445.6744, device='cuda:0')
episode: 591 training return: tensor(420.2954, device='cuda:0')
epoch: 148 test_true_pfm: 3621.6589775954562 sim_pfm: 256.3539627644156
episode: 592 training return: tensor(317.4254, device='cuda:0')
episode: 593 training return: tensor(305.3627, device='cuda:0')
episode: 594 training return: tensor(445.8768, device='cuda:0')
episode: 595 training return: tensor(413.6172, device='cuda:0')
epoch: 149 test_true_pfm: 3559.3198924576295 sim_pfm: 448.4636832193937
episode: 596 training return: tensor(348.4335, device='cuda:0')
episode: 597 training return: tensor(419.9997, device='cuda:0')
episode: 598 training return: tensor(403.7330, device='cuda:0')
episode: 599 training return: tensor(389.1201, device='cuda:0')
epoch: 150 test_true_pfm: 2809.3688207115047 sim_pfm: 425.95149591953185
