['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '3', '--data', '3000']
epoch: 0 training_loss 0.24344196803867818 test_loss: 0.10578469038009644
epoch: 1 training_loss 0.2039126054942608 test_loss: 0.1055065631866455
epoch: 2 training_loss 0.20201695948839188 test_loss: 0.09210886359214783
epoch: 3 training_loss 0.18304831348359585 test_loss: 0.09252462983131408
epoch: 4 training_loss 0.1894545914232731 test_loss: 0.09485096335411072
epoch: 5 training_loss 0.1814048283547163 test_loss: 0.0939379096031189
epoch: 6 training_loss 0.19163875393569468 test_loss: 0.089265376329422
epoch: 7 training_loss 0.18306865878403186 test_loss: 0.09516193270683289
epoch: 8 training_loss 0.18311653435230255 test_loss: 0.09323341250419617
epoch: 9 training_loss 0.1789642332494259 test_loss: 0.08984283804893493
epoch: 10 training_loss 0.18414216943085193 test_loss: 0.09360519647598267
epoch: 11 training_loss 0.1691381299495697 test_loss: 0.0940065085887909
epoch: 12 training_loss 0.17080168381333352 test_loss: 0.09346645474433898
epoch: 13 training_loss 0.18259634777903558 test_loss: 0.08906281590461732
epoch: 14 training_loss 0.18150508746504784 test_loss: 0.0967205286026001
epoch: 15 training_loss 0.17850687377154828 test_loss: 0.09106810688972473
epoch: 16 training_loss 0.1780832339078188 test_loss: 0.09063809514045715
epoch: 17 training_loss 0.17506322741508484 test_loss: 0.09648098945617675
epoch: 18 training_loss 0.1759216743707657 test_loss: 0.08751729130744934
epoch: 19 training_loss 0.1760169118642807 test_loss: 0.09509944915771484
epoch: 20 training_loss 0.17645418137311936 test_loss: 0.09172013998031617
epoch: 21 training_loss 0.17393287431448698 test_loss: 0.09750139117240905
epoch: 22 training_loss 0.1737587858736515 test_loss: 0.09139163494110107
epoch: 23 training_loss 0.16664355576038362 test_loss: 0.09437154531478882
epoch: 24 training_loss 0.17691371850669385 test_loss: 0.09663232564926147
epoch: 25 training_loss 0.16756636533886193 test_loss: 0.09146289229393005
epoch: 26 training_loss 0.17637404158711434 test_loss: 0.09181453585624695
epoch: 27 training_loss 0.17175447076559067 test_loss: 0.09450013637542724
epoch: 28 training_loss 0.17068549193441868 test_loss: 0.09269236922264099
epoch: 29 training_loss 0.16586760781705379 test_loss: 0.0915682554244995
epoch: 30 training_loss 0.16645320154726506 test_loss: 0.09578856229782104
epoch: 31 training_loss 0.16293331488966942 test_loss: 0.09907365441322327
epoch: 32 training_loss 0.1657319984957576 test_loss: 0.09457806944847107
epoch: 33 training_loss 0.172660653591156 test_loss: 0.0974555790424347
epoch: 34 training_loss 0.16380131673067808 test_loss: 0.09543905258178711
epoch: 35 training_loss 0.16871409557759762 test_loss: 0.09822391271591187
epoch: 36 training_loss 0.16484798572957515 test_loss: 0.09584929943084716
epoch: 37 training_loss 0.16566287606954574 test_loss: 0.09524788856506347
epoch: 38 training_loss 0.15622123919427394 test_loss: 0.09470415711402894
epoch: 39 training_loss 0.16788630187511444 test_loss: 0.0950146734714508
epoch: 40 training_loss 0.16748700268566608 test_loss: 0.09374855160713196
epoch: 41 training_loss 0.15469586573541164 test_loss: 0.09786019325256348
epoch: 42 training_loss 0.15619325216859578 test_loss: 0.09481607675552368
epoch: 43 training_loss 0.1648850304633379 test_loss: 0.09297480583190917
epoch: 44 training_loss 0.1643011599034071 test_loss: 0.09737483859062195
epoch: 45 training_loss 0.1658314334601164 test_loss: 0.09779248237609864
epoch: 46 training_loss 0.1623704357817769 test_loss: 0.09663317203521729
epoch: 47 training_loss 0.16248090386390687 test_loss: 0.09729305505752564
epoch: 48 training_loss 0.15248430557549 test_loss: 0.09712905883789062
epoch: 49 training_loss 0.1569714479893446 test_loss: 0.09466850161552429
epoch: 50 training_loss 0.1594262135028839 test_loss: 0.09743738770484925
epoch: 51 training_loss 0.16447912260890007 test_loss: 0.10279505252838135
epoch: 52 training_loss 0.15656277649104594 test_loss: 0.0982535183429718
epoch: 53 training_loss 0.158600679859519 test_loss: 0.09806994795799255
epoch: 54 training_loss 0.15672474674880504 test_loss: 0.09723331332206726
epoch: 55 training_loss 0.1561513775587082 test_loss: 0.10581134557723999
epoch: 56 training_loss 0.1545411840826273 test_loss: 0.09992459416389465
epoch: 57 training_loss 0.1501448581367731 test_loss: 0.09476589560508727
epoch: 58 training_loss 0.1565101220086217 test_loss: 0.09763108491897583
epoch: 59 training_loss 0.14974020697176457 test_loss: 0.09668397307395935
epoch: 60 training_loss 0.14843259923160076 test_loss: 0.1032286286354065
epoch: 61 training_loss 0.15302189163863658 test_loss: 0.1023586630821228
epoch: 62 training_loss 0.15198188841342927 test_loss: 0.10209968090057372
epoch: 63 training_loss 0.15557741835713387 test_loss: 0.10275195837020874
epoch: 64 training_loss 0.1487746063619852 test_loss: 0.10367431640625
epoch: 65 training_loss 0.15062117889523507 test_loss: 0.10496309995651246
epoch: 66 training_loss 0.15271653443574906 test_loss: 0.10423249006271362
epoch: 67 training_loss 0.1520724096149206 test_loss: 0.09800652265548707
epoch: 68 training_loss 0.14786764815449716 test_loss: 0.10560123920440674
epoch: 69 training_loss 0.15056602761149407 test_loss: 0.09847102761268615
epoch: 70 training_loss 0.14403805036097764 test_loss: 0.10933436155319214
epoch: 71 training_loss 0.14659001398831606 test_loss: 0.10537647008895874
epoch: 72 training_loss 0.14395069785416126 test_loss: 0.1066712737083435
epoch: 73 training_loss 0.14692241720855237 test_loss: 0.10486496686935425
epoch: 74 training_loss 0.1442502399533987 test_loss: 0.1057316541671753
epoch: 75 training_loss 0.14496296666562558 test_loss: 0.1071961522102356
epoch: 76 training_loss 0.14132082477211952 test_loss: 0.10686274766921997
epoch: 77 training_loss 0.13821364689618348 test_loss: 0.10855382680892944
epoch: 78 training_loss 0.14372956287115812 test_loss: 0.11453115940093994
epoch: 79 training_loss 0.13515013229101897 test_loss: 0.1130631685256958
epoch: 80 training_loss 0.14144853476434946 test_loss: 0.11632415056228637
epoch: 81 training_loss 0.13862903032451868 test_loss: 0.10916064977645874
epoch: 82 training_loss 0.13818632237613201 test_loss: 0.1118012547492981
epoch: 83 training_loss 0.13740328256040812 test_loss: 0.11318854093551636
epoch: 84 training_loss 0.13952357366681098 test_loss: 0.11342771053314209
epoch: 85 training_loss 0.13070085000246764 test_loss: 0.11906534433364868
epoch: 86 training_loss 0.1303082775324583 test_loss: 0.12126387357711792
epoch: 87 training_loss 0.1263431014865637 test_loss: 0.13176536560058594
epoch: 88 training_loss 0.1299308170005679 test_loss: 0.12289237976074219
epoch: 89 training_loss 0.13164857491850854 test_loss: 0.1152880311012268
epoch: 90 training_loss 0.12728077679872513 test_loss: 0.11547863483428955
epoch: 91 training_loss 0.13290523659437894 test_loss: 0.12080801725387573
epoch: 92 training_loss 0.13122680809348822 test_loss: 0.13001405000686644
epoch: 93 training_loss 0.12852186448872088 test_loss: 0.12611433267593383
epoch: 94 training_loss 0.12610073368996383 test_loss: 0.12194379568099975
epoch: 95 training_loss 0.11851192317903042 test_loss: 0.1269922137260437
epoch: 96 training_loss 0.12620164189487695 test_loss: 0.13080228567123414
epoch: 97 training_loss 0.1235453786328435 test_loss: 0.1252828598022461
epoch: 98 training_loss 0.12717955481261015 test_loss: 0.12851425409317016
epoch: 99 training_loss 0.11591185305267572 test_loss: 0.12931445837020875
epoch: 100 training_loss 0.1196131020411849 test_loss: 0.1310913920402527
epoch: 101 training_loss 0.11125054828822613 test_loss: 0.12920029163360597
epoch: 102 training_loss 0.11882432751357555 test_loss: 0.1357110023498535
epoch: 103 training_loss 0.11362179156392813 test_loss: 0.1375502586364746
epoch: 104 training_loss 0.11664477918297052 test_loss: 0.13999749422073365
epoch: 105 training_loss 0.12528063036501408 test_loss: 0.13713221549987792
epoch: 106 training_loss 0.11325214609503746 test_loss: 0.1306299924850464
epoch: 107 training_loss 0.11217130348086357 test_loss: 0.1450977921485901
epoch: 108 training_loss 0.11154475651681423 test_loss: 0.1402935266494751
epoch: 109 training_loss 0.11426132313907146 test_loss: 0.1313210368156433
epoch: 110 training_loss 0.11137246251106263 test_loss: 0.13780215978622437
epoch: 111 training_loss 0.1118559719249606 test_loss: 0.1485422134399414
epoch: 112 training_loss 0.10830812381580472 test_loss: 0.13806895017623902
epoch: 113 training_loss 0.1054308758676052 test_loss: 0.15019201040267943
epoch: 114 training_loss 0.09871219731867313 test_loss: 0.14578933715820314
epoch: 115 training_loss 0.10389752252027393 test_loss: 0.15789114236831664
epoch: 116 training_loss 0.10571110632270575 test_loss: 0.15077494382858275
epoch: 117 training_loss 0.09438562648370862 test_loss: 0.15783857107162474
epoch: 118 training_loss 0.09957228492945433 test_loss: 0.1516807794570923
epoch: 119 training_loss 0.10223417751491069 test_loss: 0.14824788570404052
epoch: 120 training_loss 0.10256563991308212 test_loss: 0.14962561130523683
epoch: 121 training_loss 0.09010756865143776 test_loss: 0.1490636944770813
epoch: 122 training_loss 0.09996511563658714 test_loss: 0.15635194778442382
epoch: 123 training_loss 0.09436081394553185 test_loss: 0.15392061471939086
epoch: 124 training_loss 0.09480782162398099 test_loss: 0.15825223922729492
epoch: 125 training_loss 0.09418296273797751 test_loss: 0.14598618745803832
epoch: 126 training_loss 0.09183856431394816 test_loss: 0.1543172240257263
epoch: 127 training_loss 0.09202853064984083 test_loss: 0.1575762987136841
epoch: 128 training_loss 0.09054741464555263 test_loss: 0.16563496589660645
epoch: 129 training_loss 0.08757066946476698 test_loss: 0.1669475555419922
epoch: 130 training_loss 0.08406139079481363 test_loss: 0.17099953889846803
epoch: 131 training_loss 0.0815726151689887 test_loss: 0.15869609117507935
epoch: 132 training_loss 0.08325184037908911 test_loss: 0.17011513710021972
epoch: 133 training_loss 0.08111873297020793 test_loss: 0.1661301851272583
epoch: 134 training_loss 0.08793095126748085 test_loss: 0.16067607402801515
epoch: 135 training_loss 0.08700628660619258 test_loss: 0.15553560256958007
epoch: 136 training_loss 0.0831493178382516 test_loss: 0.1752269148826599
epoch: 137 training_loss 0.07987910425290465 test_loss: 0.17490906715393068
epoch: 138 training_loss 0.07768003530800342 test_loss: 0.1835453152656555
epoch: 139 training_loss 0.08160298749804497 test_loss: 0.16705715656280518
epoch: 140 training_loss 0.08141081118956209 test_loss: 0.1664265513420105
epoch: 141 training_loss 0.07388220213353634 test_loss: 0.16114332675933837
epoch: 142 training_loss 0.07823413258418441 test_loss: 0.17905009984970094
epoch: 143 training_loss 0.07454313162714243 test_loss: 0.1781427502632141
epoch: 144 training_loss 0.06807464390993118 test_loss: 0.18835190534591675
epoch: 145 training_loss 0.06809342857450247 test_loss: 0.1784963369369507
epoch: 146 training_loss 0.06942111909389496 test_loss: 0.18309803009033204
epoch: 147 training_loss 0.07639460137113929 test_loss: 0.18399577140808104
epoch: 148 training_loss 0.06334518074989319 test_loss: 0.1790834665298462
epoch: 149 training_loss 0.07449094854295253 test_loss: 0.1710149645805359
epoch: 0 training_loss 7.940774960517883 test_loss: 2.2203338623046873
epoch: 1 training_loss 3.859612212181091 test_loss: 1.5561902046203613
epoch: 2 training_loss 2.7956136751174925 test_loss: 1.1940110206604004
epoch: 3 training_loss 2.237709165811539 test_loss: 0.9936247825622558
epoch: 4 training_loss 1.8814105665683747 test_loss: 0.855168628692627
epoch: 5 training_loss 1.675070163011551 test_loss: 0.7626163005828858
epoch: 6 training_loss 1.5065897357463838 test_loss: 0.6971652507781982
epoch: 7 training_loss 1.4043304872512818 test_loss: 0.6555577278137207
epoch: 8 training_loss 1.2815569531917572 test_loss: 0.6157375335693359
epoch: 9 training_loss 1.202415412068367 test_loss: 0.5826908111572265
epoch: 10 training_loss 1.1488154351711273 test_loss: 0.5570856094360351
epoch: 11 training_loss 1.0986457455158234 test_loss: 0.5511041164398194
epoch: 12 training_loss 1.051469794511795 test_loss: 0.5141212463378906
epoch: 13 training_loss 1.0127844786643982 test_loss: 0.49942774772644044
epoch: 14 training_loss 0.9857702606916428 test_loss: 0.47945523262023926
epoch: 15 training_loss 0.9600924384593964 test_loss: 0.46946215629577637
epoch: 16 training_loss 0.9314437800645828 test_loss: 0.46083688735961914
epoch: 17 training_loss 0.9103223145008087 test_loss: 0.4418914318084717
epoch: 18 training_loss 0.8783797717094421 test_loss: 0.4524266242980957
epoch: 19 training_loss 0.8595862329006195 test_loss: 0.43110170364379885
epoch: 20 training_loss 0.8415006107091904 test_loss: 0.41590080261230467
epoch: 21 training_loss 0.826662027835846 test_loss: 0.4078995227813721
epoch: 22 training_loss 0.8156194865703583 test_loss: 0.4073173522949219
epoch: 23 training_loss 0.80347287774086 test_loss: 0.3941598176956177
epoch: 24 training_loss 0.7891960680484772 test_loss: 0.40074639320373534
epoch: 25 training_loss 0.7733209758996964 test_loss: 0.38459751605987547
epoch: 26 training_loss 0.7777935129404068 test_loss: 0.37983317375183107
epoch: 27 training_loss 0.7458919847011566 test_loss: 0.38403878211975095
epoch: 28 training_loss 0.7387612456083298 test_loss: 0.37789263725280764
epoch: 29 training_loss 0.7345472711324692 test_loss: 0.3747807264328003
epoch: 30 training_loss 0.7226873421669007 test_loss: 0.3610461473464966
epoch: 31 training_loss 0.7331544387340546 test_loss: 0.3835154056549072
epoch: 32 training_loss 0.7182166290283203 test_loss: 0.35176801681518555
epoch: 33 training_loss 0.6992276901006699 test_loss: 0.3455821514129639
epoch: 34 training_loss 0.6922359317541122 test_loss: 0.3503737449645996
epoch: 35 training_loss 0.6923907333612442 test_loss: 0.3483572959899902
epoch: 36 training_loss 0.6854128313064575 test_loss: 0.33469491004943847
epoch: 37 training_loss 0.6748681342601777 test_loss: 0.342427659034729
epoch: 38 training_loss 0.6699674850702286 test_loss: 0.33517410755157473
epoch: 39 training_loss 0.6539457833766937 test_loss: 0.3408481597900391
epoch: 40 training_loss 0.6547498279809951 test_loss: 0.33145239353179934
epoch: 41 training_loss 0.672579293847084 test_loss: 0.34673919677734377
epoch: 42 training_loss 0.6680363190174102 test_loss: 0.3329396963119507
epoch: 43 training_loss 0.6397174263000488 test_loss: 0.31929640769958495
epoch: 44 training_loss 0.6493437623977661 test_loss: 0.3336874723434448
epoch: 45 training_loss 0.6392778640985489 test_loss: 0.319217848777771
epoch: 46 training_loss 0.6255846631526947 test_loss: 0.3106558799743652
epoch: 47 training_loss 0.6364984744787217 test_loss: 0.32345621585845946
epoch: 48 training_loss 0.642333533167839 test_loss: 0.3282289505004883
epoch: 49 training_loss 0.6212806731462479 test_loss: 0.3173999547958374
epoch: 50 training_loss 0.6106451416015625 test_loss: 0.31680750846862793
epoch: 51 training_loss 0.6104796314239502 test_loss: 0.3047779560089111
epoch: 52 training_loss 0.613038163781166 test_loss: 0.3005366325378418
epoch: 53 training_loss 0.6020587080717087 test_loss: 0.2982661724090576
epoch: 54 training_loss 0.6007677501440049 test_loss: 0.3027598142623901
epoch: 55 training_loss 0.5995125377178192 test_loss: 0.30859353542327883
epoch: 56 training_loss 0.5978021013736725 test_loss: 0.29637808799743653
epoch: 57 training_loss 0.5865652054548264 test_loss: 0.2909997940063477
epoch: 58 training_loss 0.5814846587181092 test_loss: 0.2935234546661377
epoch: 59 training_loss 0.5915585774183273 test_loss: 0.30703272819519045
epoch: 60 training_loss 0.5870421254634857 test_loss: 0.29945428371429444
epoch: 61 training_loss 0.5858600723743439 test_loss: 0.297464919090271
epoch: 62 training_loss 0.5810880875587463 test_loss: 0.30637547969818113
epoch: 63 training_loss 0.5726933521032334 test_loss: 0.29265766143798827
epoch: 64 training_loss 0.5845538514852524 test_loss: 0.2924652576446533
epoch: 65 training_loss 0.5671643298864365 test_loss: 0.28854587078094485
epoch: 66 training_loss 0.56561283826828 test_loss: 0.28940417766571047
epoch: 67 training_loss 0.5603049254417419 test_loss: 0.28273463249206543
epoch: 68 training_loss 0.5753641241788864 test_loss: 0.2870598554611206
epoch: 69 training_loss 0.5537062102556228 test_loss: 0.2793370962142944
epoch: 70 training_loss 0.5531914272904396 test_loss: 0.27544076442718507
epoch: 71 training_loss 0.5648514106869698 test_loss: 0.2781637907028198
epoch: 72 training_loss 0.5638396501541137 test_loss: 0.2869623422622681
epoch: 73 training_loss 0.5572539675235748 test_loss: 0.27770395278930665
epoch: 74 training_loss 0.551660678088665 test_loss: 0.2784044504165649
epoch: 75 training_loss 0.5468906107544899 test_loss: 0.27999076843261717
epoch: 76 training_loss 0.5549083995819092 test_loss: 0.27171390056610106
epoch: 77 training_loss 0.550198540687561 test_loss: 0.2730921983718872
epoch: 78 training_loss 0.5513667666912079 test_loss: 0.28661179542541504
epoch: 79 training_loss 0.5561060118675232 test_loss: 0.2755517244338989
epoch: 80 training_loss 0.541788456439972 test_loss: 0.2739424228668213
epoch: 81 training_loss 0.5394327744841576 test_loss: 0.2801291227340698
epoch: 82 training_loss 0.5383589175343514 test_loss: 0.27288453578948973
epoch: 83 training_loss 0.5392312422394753 test_loss: 0.27119812965393064
epoch: 84 training_loss 0.537523478269577 test_loss: 0.26271560192108157
epoch: 85 training_loss 0.5334823581576348 test_loss: 0.2717137336730957
epoch: 86 training_loss 0.5361411532759667 test_loss: 0.2804455280303955
epoch: 87 training_loss 0.5452417030930519 test_loss: 0.27342007160186765
epoch: 88 training_loss 0.533062390089035 test_loss: 0.26099941730499265
epoch: 89 training_loss 0.53132434040308 test_loss: 0.2592161655426025
epoch: 90 training_loss 0.5263708719611168 test_loss: 0.26696066856384276
epoch: 91 training_loss 0.5249015867710114 test_loss: 0.258266806602478
epoch: 92 training_loss 0.5197935158014297 test_loss: 0.26119468212127683
epoch: 93 training_loss 0.5361846885085106 test_loss: 0.2626351833343506
epoch: 94 training_loss 0.5329184892773629 test_loss: 0.25964720249176027
epoch: 95 training_loss 0.5188903933763505 test_loss: 0.25888140201568605
epoch: 96 training_loss 0.516901221871376 test_loss: 0.262613844871521
epoch: 97 training_loss 0.5250286448001862 test_loss: 0.2588942050933838
epoch: 98 training_loss 0.5166001918911934 test_loss: 0.2616742610931396
epoch: 99 training_loss 0.5190213719010353 test_loss: 0.2596397399902344
epoch: 100 training_loss 0.5295392653346062 test_loss: 0.2760142803192139
epoch: 101 training_loss 0.5124743846058846 test_loss: 0.2551154136657715
epoch: 102 training_loss 0.5143449690937996 test_loss: 0.26205623149871826
epoch: 103 training_loss 0.5147045201063156 test_loss: 0.27501983642578126
epoch: 104 training_loss 0.5210425990819931 test_loss: 0.2540605545043945
epoch: 105 training_loss 0.5214797815680504 test_loss: 0.2729379177093506
epoch: 106 training_loss 0.5106903827190399 test_loss: 0.2505242347717285
epoch: 107 training_loss 0.49889015227556227 test_loss: 0.25079917907714844
epoch: 108 training_loss 0.5048792704939842 test_loss: 0.2531684398651123
epoch: 109 training_loss 0.5051174107193946 test_loss: 0.2513082265853882
epoch: 110 training_loss 0.5105627170205116 test_loss: 0.25122776031494143
epoch: 111 training_loss 0.5074440297484398 test_loss: 0.2642372608184814
epoch: 112 training_loss 0.5090581598877907 test_loss: 0.26440632343292236
epoch: 113 training_loss 0.5088338422775268 test_loss: 0.2538498640060425
epoch: 114 training_loss 0.5012780880928039 test_loss: 0.2456139087677002
epoch: 115 training_loss 0.49822681248188017 test_loss: 0.2669950485229492
epoch: 116 training_loss 0.5159606605768203 test_loss: 0.26225054264068604
epoch: 117 training_loss 0.49979723155498507 test_loss: 0.24757683277130127
epoch: 118 training_loss 0.5028945431113243 test_loss: 0.25488972663879395
epoch: 119 training_loss 0.5062772166728974 test_loss: 0.24752981662750245
epoch: 120 training_loss 0.49340571343898776 test_loss: 0.2505666971206665
epoch: 121 training_loss 0.4990896618366241 test_loss: 0.254681396484375
epoch: 122 training_loss 0.500650733411312 test_loss: 0.25676860809326174
epoch: 123 training_loss 0.496830096244812 test_loss: 0.26879937648773194
epoch: 124 training_loss 0.5001103267073631 test_loss: 0.26112661361694334
epoch: 125 training_loss 0.4982297325134277 test_loss: 0.24461901187896729
epoch: 126 training_loss 0.4948150470852852 test_loss: 0.2576702833175659
epoch: 127 training_loss 0.4883613306283951 test_loss: 0.24609105587005614
epoch: 128 training_loss 0.4872597414255142 test_loss: 0.2495496988296509
epoch: 129 training_loss 0.48473508805036547 test_loss: 0.24217920303344725
epoch: 130 training_loss 0.49892442882061006 test_loss: 0.2522254467010498
epoch: 131 training_loss 0.4966915526986122 test_loss: 0.2433032751083374
epoch: 132 training_loss 0.4874513602256775 test_loss: 0.23940248489379884
epoch: 133 training_loss 0.48943625539541247 test_loss: 0.24738447666168212
epoch: 134 training_loss 0.48873389691114427 test_loss: 0.24219837188720703
epoch: 135 training_loss 0.4850106346607208 test_loss: 0.2591679811477661
epoch: 136 training_loss 0.48247642159461973 test_loss: 0.24002668857574463
epoch: 137 training_loss 0.49353305250406265 test_loss: 0.24079735279083253
epoch: 138 training_loss 0.4871181547641754 test_loss: 0.26245453357696535
epoch: 139 training_loss 0.4955267581343651 test_loss: 0.258192253112793
epoch: 140 training_loss 0.4944036999344826 test_loss: 0.24510352611541747
epoch: 141 training_loss 0.4792838329076767 test_loss: 0.24449543952941893
epoch: 142 training_loss 0.47743722528219223 test_loss: 0.24348170757293702
epoch: 143 training_loss 0.48192801207304004 test_loss: 0.23968617916107177
epoch: 144 training_loss 0.48081737875938413 test_loss: 0.24050016403198243
epoch: 145 training_loss 0.479693176150322 test_loss: 0.2499946117401123
epoch: 146 training_loss 0.48152114272117613 test_loss: 0.23962576389312745
epoch: 147 training_loss 0.48544852912425995 test_loss: 0.2363609790802002
epoch: 148 training_loss 0.47608100980520246 test_loss: 0.23772258758544923
epoch: 149 training_loss 0.4877123764157295 test_loss: 0.26172513961791993
1389.707373368545
episode: 0 training return: tensor(31.1209, device='cuda:0')
episode: 1 training return: tensor(-91.3717, device='cuda:0')
episode: 2 training return: tensor(20.8462, device='cuda:0')
episode: 3 training return: tensor(-5.9010, device='cuda:0')
epoch: 1 test_true_pfm: 2188.3260220314223 sim_pfm: 331.3229324785061
episode: 4 training return: tensor(-65.8731, device='cuda:0')
episode: 5 training return: tensor(-101.2556, device='cuda:0')
episode: 6 training return: tensor(311.5948, device='cuda:0')
episode: 7 training return: tensor(-94.3487, device='cuda:0')
epoch: 2 test_true_pfm: 1308.8091635230242 sim_pfm: -58.37233487935737
episode: 8 training return: tensor(43.0499, device='cuda:0')
episode: 9 training return: tensor(136.8967, device='cuda:0')
episode: 10 training return: tensor(116.4913, device='cuda:0')
episode: 11 training return: tensor(-109.1357, device='cuda:0')
epoch: 3 test_true_pfm: 1410.966587897228 sim_pfm: -90.63245168980211
episode: 12 training return: tensor(67.3368, device='cuda:0')
episode: 13 training return: tensor(318.8252, device='cuda:0')
episode: 14 training return: tensor(-72.0006, device='cuda:0')
episode: 15 training return: tensor(-11.6132, device='cuda:0')
epoch: 4 test_true_pfm: 1291.0727382335554 sim_pfm: -41.479502936825156
episode: 16 training return: tensor(-39.1313, device='cuda:0')
episode: 17 training return: tensor(176.5503, device='cuda:0')
episode: 18 training return: tensor(-22.6823, device='cuda:0')
episode: 19 training return: tensor(-100.9835, device='cuda:0')
epoch: 5 test_true_pfm: 1397.5779415612303 sim_pfm: 11.540783940601008
episode: 20 training return: tensor(-136.1047, device='cuda:0')
episode: 21 training return: tensor(-0.6144, device='cuda:0')
episode: 22 training return: tensor(-95.4621, device='cuda:0')
episode: 23 training return: tensor(-28.1581, device='cuda:0')
epoch: 6 test_true_pfm: 1276.2926538254858 sim_pfm: 3.076766503819575
episode: 24 training return: tensor(-25.5204, device='cuda:0')
episode: 25 training return: tensor(-84.1219, device='cuda:0')
episode: 26 training return: tensor(278.0754, device='cuda:0')
episode: 27 training return: tensor(61.8520, device='cuda:0')
epoch: 7 test_true_pfm: 1284.9087420850256 sim_pfm: -62.40826114453375
episode: 28 training return: tensor(24.3843, device='cuda:0')
episode: 29 training return: tensor(198.4576, device='cuda:0')
episode: 30 training return: tensor(-35.0404, device='cuda:0')
episode: 31 training return: tensor(4.5406, device='cuda:0')
epoch: 8 test_true_pfm: 1299.1945271163672 sim_pfm: -49.58335464959964
episode: 32 training return: tensor(273.4584, device='cuda:0')
episode: 33 training return: tensor(115.4694, device='cuda:0')
episode: 34 training return: tensor(59.2882, device='cuda:0')
episode: 35 training return: tensor(-25.8228, device='cuda:0')
epoch: 9 test_true_pfm: 1649.7426292931061 sim_pfm: 168.46174140813915
episode: 36 training return: tensor(23.6019, device='cuda:0')
episode: 37 training return: tensor(27.0139, device='cuda:0')
episode: 38 training return: tensor(-48.5949, device='cuda:0')
episode: 39 training return: tensor(-30.2613, device='cuda:0')
epoch: 10 test_true_pfm: 1480.7288470711344 sim_pfm: -7.519868474802934
episode: 40 training return: tensor(42.2172, device='cuda:0')
episode: 41 training return: tensor(-81.1902, device='cuda:0')
episode: 42 training return: tensor(-59.7240, device='cuda:0')
episode: 43 training return: tensor(-35.8783, device='cuda:0')
epoch: 11 test_true_pfm: 1586.812349439737 sim_pfm: -7.293680564267561
episode: 44 training return: tensor(70.7889, device='cuda:0')
episode: 45 training return: tensor(-102.7046, device='cuda:0')
episode: 46 training return: tensor(127.5615, device='cuda:0')
episode: 47 training return: tensor(-15.6287, device='cuda:0')
epoch: 12 test_true_pfm: 2523.94275885018 sim_pfm: 258.0312677224477
episode: 48 training return: tensor(-78.7628, device='cuda:0')
episode: 49 training return: tensor(15.7211, device='cuda:0')
episode: 50 training return: tensor(-38.4195, device='cuda:0')
episode: 51 training return: tensor(-36.9294, device='cuda:0')
epoch: 13 test_true_pfm: 2494.346065424754 sim_pfm: 266.7804445978254
episode: 52 training return: tensor(-9.8664, device='cuda:0')
episode: 53 training return: tensor(-9.5141, device='cuda:0')
episode: 54 training return: tensor(-13.8457, device='cuda:0')
episode: 55 training return: tensor(-75.9496, device='cuda:0')
epoch: 14 test_true_pfm: 1757.8422738094796 sim_pfm: -9.45607351854172
episode: 56 training return: tensor(88.6700, device='cuda:0')
episode: 57 training return: tensor(-91.2116, device='cuda:0')
episode: 58 training return: tensor(211.4380, device='cuda:0')
episode: 59 training return: tensor(-65.9733, device='cuda:0')
epoch: 15 test_true_pfm: 1549.2548828035558 sim_pfm: 1.2300740052208614
episode: 60 training return: tensor(241.2624, device='cuda:0')
episode: 61 training return: tensor(-109.3389, device='cuda:0')
episode: 62 training return: tensor(143.0823, device='cuda:0')
episode: 63 training return: tensor(337.2023, device='cuda:0')
epoch: 16 test_true_pfm: 2243.0223810775333 sim_pfm: 135.4321664525196
episode: 64 training return: tensor(232.5082, device='cuda:0')
episode: 65 training return: tensor(483.4784, device='cuda:0')
episode: 66 training return: tensor(-49.3677, device='cuda:0')
episode: 67 training return: tensor(-6.6646, device='cuda:0')
epoch: 17 test_true_pfm: 2250.2129153093124 sim_pfm: 443.05881419501384
episode: 68 training return: tensor(314.5692, device='cuda:0')
episode: 69 training return: tensor(167.9037, device='cuda:0')
episode: 70 training return: tensor(-31.3670, device='cuda:0')
episode: 71 training return: tensor(-20.2732, device='cuda:0')
epoch: 18 test_true_pfm: 2959.4239509251943 sim_pfm: 397.50897773680236
episode: 72 training return: tensor(-52.1763, device='cuda:0')
episode: 73 training return: tensor(464.3851, device='cuda:0')
episode: 74 training return: tensor(5.0412, device='cuda:0')
episode: 75 training return: tensor(296.3551, device='cuda:0')
epoch: 19 test_true_pfm: 2171.029859522117 sim_pfm: 293.2498334606644
episode: 76 training return: tensor(308.7261, device='cuda:0')
episode: 77 training return: tensor(-63.8073, device='cuda:0')
episode: 78 training return: tensor(462.4174, device='cuda:0')
episode: 79 training return: tensor(94.8992, device='cuda:0')
epoch: 20 test_true_pfm: 1486.5457460440814 sim_pfm: 96.52930977543777
episode: 80 training return: tensor(-7.8339, device='cuda:0')
episode: 81 training return: tensor(288.9565, device='cuda:0')
episode: 82 training return: tensor(22.8983, device='cuda:0')
episode: 83 training return: tensor(35.2885, device='cuda:0')
epoch: 21 test_true_pfm: 2803.8216735035126 sim_pfm: 379.71072265750263
episode: 84 training return: tensor(-4.8719, device='cuda:0')
episode: 85 training return: tensor(-32.6014, device='cuda:0')
episode: 86 training return: tensor(37.4394, device='cuda:0')
episode: 87 training return: tensor(9.8025, device='cuda:0')
epoch: 22 test_true_pfm: 1535.17240586147 sim_pfm: 50.62283457904899
episode: 88 training return: tensor(156.1412, device='cuda:0')
episode: 89 training return: tensor(121.9628, device='cuda:0')
episode: 90 training return: tensor(-1.1185, device='cuda:0')
episode: 91 training return: tensor(135.5449, device='cuda:0')
epoch: 23 test_true_pfm: 1720.0913061338179 sim_pfm: 131.36064925374617
episode: 92 training return: tensor(516.9996, device='cuda:0')
episode: 93 training return: tensor(66.6274, device='cuda:0')
episode: 94 training return: tensor(-71.1178, device='cuda:0')
episode: 95 training return: tensor(280.1646, device='cuda:0')
epoch: 24 test_true_pfm: 1568.475307121588 sim_pfm: 19.81995991275956
episode: 96 training return: tensor(55.1200, device='cuda:0')
episode: 97 training return: tensor(-33.0515, device='cuda:0')
episode: 98 training return: tensor(85.9657, device='cuda:0')
episode: 99 training return: tensor(41.9890, device='cuda:0')
epoch: 25 test_true_pfm: 1665.7708286610562 sim_pfm: 160.81987209571525
episode: 100 training return: tensor(201.0150, device='cuda:0')
episode: 101 training return: tensor(-102.2903, device='cuda:0')
episode: 102 training return: tensor(182.7359, device='cuda:0')
episode: 103 training return: tensor(71.0734, device='cuda:0')
epoch: 26 test_true_pfm: 2504.918316974008 sim_pfm: 495.05594228891033
episode: 104 training return: tensor(-22.5346, device='cuda:0')
episode: 105 training return: tensor(60.5069, device='cuda:0')
episode: 106 training return: tensor(-76.6806, device='cuda:0')
episode: 107 training return: tensor(-17.5622, device='cuda:0')
epoch: 27 test_true_pfm: 2065.7262376751037 sim_pfm: 127.61238951825847
episode: 108 training return: tensor(-15.4723, device='cuda:0')
episode: 109 training return: tensor(-19.2451, device='cuda:0')
episode: 110 training return: tensor(-7.5182, device='cuda:0')
episode: 111 training return: tensor(347.4636, device='cuda:0')
epoch: 28 test_true_pfm: 3214.707126166579 sim_pfm: 327.1673216716251
episode: 112 training return: tensor(76.9757, device='cuda:0')
episode: 113 training return: tensor(188.4649, device='cuda:0')
episode: 114 training return: tensor(-42.3048, device='cuda:0')
episode: 115 training return: tensor(-80.8553, device='cuda:0')
epoch: 29 test_true_pfm: 1593.5028388975613 sim_pfm: 381.0946801675794
episode: 116 training return: tensor(201.5847, device='cuda:0')
episode: 117 training return: tensor(285.9727, device='cuda:0')
episode: 118 training return: tensor(6.8250, device='cuda:0')
episode: 119 training return: tensor(-69.3950, device='cuda:0')
epoch: 30 test_true_pfm: 2695.2656886574405 sim_pfm: 313.8368132240721
episode: 120 training return: tensor(-63.9860, device='cuda:0')
episode: 121 training return: tensor(149.3900, device='cuda:0')
episode: 122 training return: tensor(-18.5805, device='cuda:0')
episode: 123 training return: tensor(-58.5227, device='cuda:0')
epoch: 31 test_true_pfm: 1528.5889109015345 sim_pfm: 6.6704857643441455
episode: 124 training return: tensor(-25.8568, device='cuda:0')
episode: 125 training return: tensor(-77.0727, device='cuda:0')
episode: 126 training return: tensor(259.4801, device='cuda:0')
episode: 127 training return: tensor(-24.6089, device='cuda:0')
epoch: 32 test_true_pfm: 1592.1519400374575 sim_pfm: 49.0236091138795
episode: 128 training return: tensor(-57.9043, device='cuda:0')
episode: 129 training return: tensor(104.1743, device='cuda:0')
episode: 130 training return: tensor(-81.1703, device='cuda:0')
episode: 131 training return: tensor(-35.7318, device='cuda:0')
epoch: 33 test_true_pfm: 2352.060051292215 sim_pfm: 97.55999040983927
episode: 132 training return: tensor(192.5954, device='cuda:0')
episode: 133 training return: tensor(70.1559, device='cuda:0')
episode: 134 training return: tensor(45.1857, device='cuda:0')
episode: 135 training return: tensor(-67.7763, device='cuda:0')
epoch: 34 test_true_pfm: 2502.1680026320987 sim_pfm: 306.7899073942099
episode: 136 training return: tensor(-62.8934, device='cuda:0')
episode: 137 training return: tensor(444.3460, device='cuda:0')
episode: 138 training return: tensor(-4.9464, device='cuda:0')
episode: 139 training return: tensor(274.2539, device='cuda:0')
epoch: 35 test_true_pfm: 1416.629373875752 sim_pfm: -0.32102071574384655
episode: 140 training return: tensor(-52.3375, device='cuda:0')
episode: 141 training return: tensor(-59.0201, device='cuda:0')
episode: 142 training return: tensor(-10.1448, device='cuda:0')
episode: 143 training return: tensor(90.8585, device='cuda:0')
epoch: 36 test_true_pfm: 2022.9929937134532 sim_pfm: 56.13334090674956
episode: 144 training return: tensor(-12.4221, device='cuda:0')
episode: 145 training return: tensor(-72.2862, device='cuda:0')
episode: 146 training return: tensor(151.3060, device='cuda:0')
episode: 147 training return: tensor(-49.0394, device='cuda:0')
epoch: 37 test_true_pfm: 2776.274434039537 sim_pfm: 368.99745592005394
episode: 148 training return: tensor(-11.4610, device='cuda:0')
episode: 149 training return: tensor(-28.1574, device='cuda:0')
episode: 150 training return: tensor(-81.5367, device='cuda:0')
episode: 151 training return: tensor(-75.4593, device='cuda:0')
epoch: 38 test_true_pfm: 2638.0033059279162 sim_pfm: 449.3541458901018
episode: 152 training return: tensor(-28.7423, device='cuda:0')
episode: 153 training return: tensor(-49.0247, device='cuda:0')
episode: 154 training return: tensor(-0.5042, device='cuda:0')
episode: 155 training return: tensor(-89.1146, device='cuda:0')
epoch: 39 test_true_pfm: 1570.6012960047408 sim_pfm: 113.77272544820637
episode: 156 training return: tensor(244.3624, device='cuda:0')
episode: 157 training return: tensor(-6.6513, device='cuda:0')
episode: 158 training return: tensor(-74.0273, device='cuda:0')
episode: 159 training return: tensor(57.0905, device='cuda:0')
epoch: 40 test_true_pfm: 1583.0221226955553 sim_pfm: 32.95036949699473
episode: 160 training return: tensor(-18.3312, device='cuda:0')
episode: 161 training return: tensor(354.7589, device='cuda:0')
episode: 162 training return: tensor(-26.3019, device='cuda:0')
episode: 163 training return: tensor(311.4134, device='cuda:0')
epoch: 41 test_true_pfm: 1873.4479801922844 sim_pfm: 174.88318423399082
episode: 164 training return: tensor(-56.9794, device='cuda:0')
episode: 165 training return: tensor(-12.2257, device='cuda:0')
episode: 166 training return: tensor(148.9634, device='cuda:0')
episode: 167 training return: tensor(121.8603, device='cuda:0')
epoch: 42 test_true_pfm: 2189.8763035219376 sim_pfm: 284.03209123469424
episode: 168 training return: tensor(23.6356, device='cuda:0')
episode: 169 training return: tensor(266.5957, device='cuda:0')
episode: 170 training return: tensor(-88.5514, device='cuda:0')
episode: 171 training return: tensor(-11.3091, device='cuda:0')
epoch: 43 test_true_pfm: 2651.688688548096 sim_pfm: 302.78226910120185
episode: 172 training return: tensor(-53.3808, device='cuda:0')
episode: 173 training return: tensor(31.4471, device='cuda:0')
episode: 174 training return: tensor(-25.5471, device='cuda:0')
episode: 175 training return: tensor(485.1058, device='cuda:0')
epoch: 44 test_true_pfm: 1739.0514502964998 sim_pfm: 71.77235083219905
episode: 176 training return: tensor(199.4209, device='cuda:0')
episode: 177 training return: tensor(38.4535, device='cuda:0')
episode: 178 training return: tensor(273.2321, device='cuda:0')
episode: 179 training return: tensor(-66.4148, device='cuda:0')
epoch: 45 test_true_pfm: 2300.2501640980163 sim_pfm: 200.20535702984975
episode: 180 training return: tensor(17.3637, device='cuda:0')
episode: 181 training return: tensor(-30.6828, device='cuda:0')
episode: 182 training return: tensor(23.4803, device='cuda:0')
episode: 183 training return: tensor(2.7460, device='cuda:0')
epoch: 46 test_true_pfm: 2382.8936006227245 sim_pfm: 241.63168391841464
episode: 184 training return: tensor(127.2550, device='cuda:0')
episode: 185 training return: tensor(-3.0058, device='cuda:0')
episode: 186 training return: tensor(-78.3364, device='cuda:0')
episode: 187 training return: tensor(209.2106, device='cuda:0')
epoch: 47 test_true_pfm: 1487.8196249861394 sim_pfm: 45.584568285228066
episode: 188 training return: tensor(-14.8674, device='cuda:0')
episode: 189 training return: tensor(-70.9647, device='cuda:0')
episode: 190 training return: tensor(277.5078, device='cuda:0')
episode: 191 training return: tensor(-66.9959, device='cuda:0')
epoch: 48 test_true_pfm: 2284.360584490834 sim_pfm: 352.311940265897
episode: 192 training return: tensor(274.1780, device='cuda:0')
episode: 193 training return: tensor(218.4626, device='cuda:0')
episode: 194 training return: tensor(13.3712, device='cuda:0')
episode: 195 training return: tensor(-18.1625, device='cuda:0')
epoch: 49 test_true_pfm: 2077.3223445472127 sim_pfm: 174.23276220696667
episode: 196 training return: tensor(38.7954, device='cuda:0')
episode: 197 training return: tensor(-62.6918, device='cuda:0')
episode: 198 training return: tensor(-44.0597, device='cuda:0')
episode: 199 training return: tensor(24.7066, device='cuda:0')
epoch: 50 test_true_pfm: 2042.081366956502 sim_pfm: 89.00695364084095
episode: 200 training return: tensor(-4.2732, device='cuda:0')
episode: 201 training return: tensor(70.2226, device='cuda:0')
episode: 202 training return: tensor(21.5959, device='cuda:0')
episode: 203 training return: tensor(289.2497, device='cuda:0')
epoch: 51 test_true_pfm: 3165.9119726823023 sim_pfm: 353.9905099900595
episode: 204 training return: tensor(5.6734, device='cuda:0')
episode: 205 training return: tensor(-1.1305, device='cuda:0')
episode: 206 training return: tensor(42.8261, device='cuda:0')
episode: 207 training return: tensor(248.8118, device='cuda:0')
epoch: 52 test_true_pfm: 2900.3565168284044 sim_pfm: 245.98802887604688
episode: 208 training return: tensor(-82.5839, device='cuda:0')
episode: 209 training return: tensor(-22.8986, device='cuda:0')
episode: 210 training return: tensor(12.1634, device='cuda:0')
episode: 211 training return: tensor(-17.1110, device='cuda:0')
epoch: 53 test_true_pfm: 2197.2198357135744 sim_pfm: 284.86943441677914
episode: 212 training return: tensor(206.7650, device='cuda:0')
episode: 213 training return: tensor(-17.3832, device='cuda:0')
episode: 214 training return: tensor(-58.4113, device='cuda:0')
episode: 215 training return: tensor(-18.3966, device='cuda:0')
epoch: 54 test_true_pfm: 2583.5137536755487 sim_pfm: 79.3644306899708
episode: 216 training return: tensor(-15.1537, device='cuda:0')
episode: 217 training return: tensor(363.2666, device='cuda:0')
episode: 218 training return: tensor(-61.6309, device='cuda:0')
episode: 219 training return: tensor(-42.9672, device='cuda:0')
epoch: 55 test_true_pfm: 2627.5688292404707 sim_pfm: 392.1500884523363
episode: 220 training return: tensor(-10.1173, device='cuda:0')
episode: 221 training return: tensor(-56.7540, device='cuda:0')
episode: 222 training return: tensor(-9.3815, device='cuda:0')
episode: 223 training return: tensor(-79.7803, device='cuda:0')
epoch: 56 test_true_pfm: 2317.1851442330276 sim_pfm: 423.36436379433144
episode: 224 training return: tensor(20.0606, device='cuda:0')
episode: 225 training return: tensor(50.4639, device='cuda:0')
episode: 226 training return: tensor(-45.1128, device='cuda:0')
episode: 227 training return: tensor(192.9756, device='cuda:0')
epoch: 57 test_true_pfm: 2737.091831927349 sim_pfm: 487.9085153504663
episode: 228 training return: tensor(457.4402, device='cuda:0')
episode: 229 training return: tensor(207.2682, device='cuda:0')
episode: 230 training return: tensor(211.8821, device='cuda:0')
episode: 231 training return: tensor(-13.8165, device='cuda:0')
epoch: 58 test_true_pfm: 2198.503375764651 sim_pfm: 232.81141820707126
episode: 232 training return: tensor(164.6103, device='cuda:0')
episode: 233 training return: tensor(-75.9965, device='cuda:0')
episode: 234 training return: tensor(-30.3622, device='cuda:0')
episode: 235 training return: tensor(352.2423, device='cuda:0')
epoch: 59 test_true_pfm: 2006.3372201765187 sim_pfm: 274.1584476234081
episode: 236 training return: tensor(67.9888, device='cuda:0')
episode: 237 training return: tensor(67.3173, device='cuda:0')
episode: 238 training return: tensor(68.2698, device='cuda:0')
episode: 239 training return: tensor(366.7358, device='cuda:0')
epoch: 60 test_true_pfm: 2579.25273585532 sim_pfm: 383.7390193908553
episode: 240 training return: tensor(-28.5165, device='cuda:0')
episode: 241 training return: tensor(417.3356, device='cuda:0')
episode: 242 training return: tensor(84.5867, device='cuda:0')
episode: 243 training return: tensor(-18.5167, device='cuda:0')
epoch: 61 test_true_pfm: 2930.4506138985794 sim_pfm: 243.63573559653014
episode: 244 training return: tensor(76.4471, device='cuda:0')
episode: 245 training return: tensor(-23.3783, device='cuda:0')
episode: 246 training return: tensor(-34.8660, device='cuda:0')
episode: 247 training return: tensor(15.7350, device='cuda:0')
epoch: 62 test_true_pfm: 2287.9846287632063 sim_pfm: 441.52943982711685
episode: 248 training return: tensor(267.9517, device='cuda:0')
episode: 249 training return: tensor(-56.9498, device='cuda:0')
episode: 250 training return: tensor(-27.5159, device='cuda:0')
episode: 251 training return: tensor(-77.7627, device='cuda:0')
epoch: 63 test_true_pfm: 2430.5151927114207 sim_pfm: 134.63082724538012
episode: 252 training return: tensor(-33.4575, device='cuda:0')
episode: 253 training return: tensor(167.3275, device='cuda:0')
episode: 254 training return: tensor(-59.5127, device='cuda:0')
episode: 255 training return: tensor(-4.2280, device='cuda:0')
epoch: 64 test_true_pfm: 2430.0097395381636 sim_pfm: 132.17331465519965
episode: 256 training return: tensor(7.7922, device='cuda:0')
episode: 257 training return: tensor(-50.6812, device='cuda:0')
episode: 258 training return: tensor(-4.7726, device='cuda:0')
episode: 259 training return: tensor(501.2471, device='cuda:0')
epoch: 65 test_true_pfm: 1792.427856756459 sim_pfm: 230.86289453570498
episode: 260 training return: tensor(211.5525, device='cuda:0')
episode: 261 training return: tensor(0.2824, device='cuda:0')
episode: 262 training return: tensor(-52.1192, device='cuda:0')
episode: 263 training return: tensor(46.4469, device='cuda:0')
epoch: 66 test_true_pfm: 2169.7016296956967 sim_pfm: 392.53374602575786
episode: 264 training return: tensor(192.9462, device='cuda:0')
episode: 265 training return: tensor(13.5554, device='cuda:0')
episode: 266 training return: tensor(-21.5869, device='cuda:0')
episode: 267 training return: tensor(-53.0017, device='cuda:0')
epoch: 67 test_true_pfm: 2781.4984299727253 sim_pfm: 126.45822394662537
episode: 268 training return: tensor(537.8907, device='cuda:0')
episode: 269 training return: tensor(-44.1750, device='cuda:0')
episode: 270 training return: tensor(27.1265, device='cuda:0')
episode: 271 training return: tensor(-79.7841, device='cuda:0')
epoch: 68 test_true_pfm: 2676.871218740638 sim_pfm: 473.3720430167353
episode: 272 training return: tensor(13.0760, device='cuda:0')
episode: 273 training return: tensor(-15.8470, device='cuda:0')
episode: 274 training return: tensor(-55.8416, device='cuda:0')
episode: 275 training return: tensor(-46.2341, device='cuda:0')
epoch: 69 test_true_pfm: 2368.606478260055 sim_pfm: 392.50034786715213
episode: 276 training return: tensor(-16.1658, device='cuda:0')
episode: 277 training return: tensor(497.4716, device='cuda:0')
episode: 278 training return: tensor(-67.6514, device='cuda:0')
episode: 279 training return: tensor(-78.3246, device='cuda:0')
epoch: 70 test_true_pfm: 2615.7377992454894 sim_pfm: 351.67666882965324
episode: 280 training return: tensor(506.3350, device='cuda:0')
episode: 281 training return: tensor(-18.8654, device='cuda:0')
episode: 282 training return: tensor(84.7770, device='cuda:0')
episode: 283 training return: tensor(322.5689, device='cuda:0')
epoch: 71 test_true_pfm: 3214.1460474854116 sim_pfm: 231.8396830733012
episode: 284 training return: tensor(-27.0227, device='cuda:0')
episode: 285 training return: tensor(-29.3551, device='cuda:0')
episode: 286 training return: tensor(-27.8383, device='cuda:0')
episode: 287 training return: tensor(81.1062, device='cuda:0')
epoch: 72 test_true_pfm: 2551.695901590812 sim_pfm: 280.92900191200897
episode: 288 training return: tensor(7.8449, device='cuda:0')
episode: 289 training return: tensor(-48.8268, device='cuda:0')
episode: 290 training return: tensor(-77.6430, device='cuda:0')
episode: 291 training return: tensor(-27.4554, device='cuda:0')
epoch: 73 test_true_pfm: 2550.5282865697395 sim_pfm: 279.53828902352444
episode: 292 training return: tensor(248.0045, device='cuda:0')
episode: 293 training return: tensor(209.2233, device='cuda:0')
episode: 294 training return: tensor(-36.4596, device='cuda:0')
episode: 295 training return: tensor(432.6060, device='cuda:0')
epoch: 74 test_true_pfm: 3240.388435787688 sim_pfm: 375.6889057925825
episode: 296 training return: tensor(83.3885, device='cuda:0')
episode: 297 training return: tensor(-49.1250, device='cuda:0')
episode: 298 training return: tensor(500.2526, device='cuda:0')
episode: 299 training return: tensor(10.3272, device='cuda:0')
epoch: 75 test_true_pfm: 2908.5839533620606 sim_pfm: 365.18014218729996
episode: 300 training return: tensor(-20.4641, device='cuda:0')
episode: 301 training return: tensor(-80.9071, device='cuda:0')
episode: 302 training return: tensor(-101.3222, device='cuda:0')
episode: 303 training return: tensor(-62.1785, device='cuda:0')
epoch: 76 test_true_pfm: 2561.8339022407513 sim_pfm: 390.66194753496285
episode: 304 training return: tensor(-40.9601, device='cuda:0')
episode: 305 training return: tensor(222.7104, device='cuda:0')
episode: 306 training return: tensor(-15.0984, device='cuda:0')
episode: 307 training return: tensor(-13.3998, device='cuda:0')
epoch: 77 test_true_pfm: 3289.4190422668685 sim_pfm: 444.25573772063944
episode: 308 training return: tensor(-84.0990, device='cuda:0')
episode: 309 training return: tensor(-75.8082, device='cuda:0')
episode: 310 training return: tensor(-36.3507, device='cuda:0')
episode: 311 training return: tensor(-16.5030, device='cuda:0')
epoch: 78 test_true_pfm: 2635.1791795765603 sim_pfm: 273.1137386023377
episode: 312 training return: tensor(5.3578, device='cuda:0')
episode: 313 training return: tensor(333.3524, device='cuda:0')
episode: 314 training return: tensor(-11.0913, device='cuda:0')
episode: 315 training return: tensor(351.5168, device='cuda:0')
epoch: 79 test_true_pfm: 2760.965480678475 sim_pfm: 292.1204761508852
episode: 316 training return: tensor(342.1358, device='cuda:0')
episode: 317 training return: tensor(20.6295, device='cuda:0')
episode: 318 training return: tensor(515.4803, device='cuda:0')
episode: 319 training return: tensor(-25.7878, device='cuda:0')
epoch: 80 test_true_pfm: 2599.972917973312 sim_pfm: 410.7237687042992
episode: 320 training return: tensor(274.3071, device='cuda:0')
episode: 321 training return: tensor(-19.6227, device='cuda:0')
episode: 322 training return: tensor(411.2252, device='cuda:0')
episode: 323 training return: tensor(-35.6842, device='cuda:0')
epoch: 81 test_true_pfm: 2214.6841473699774 sim_pfm: 148.04761922704833
episode: 324 training return: tensor(3.9923, device='cuda:0')
episode: 325 training return: tensor(-4.3247, device='cuda:0')
episode: 326 training return: tensor(153.7649, device='cuda:0')
episode: 327 training return: tensor(445.8668, device='cuda:0')
epoch: 82 test_true_pfm: 2310.704393842461 sim_pfm: 209.66639920215434
episode: 328 training return: tensor(-29.6062, device='cuda:0')
episode: 329 training return: tensor(137.4891, device='cuda:0')
episode: 330 training return: tensor(363.8503, device='cuda:0')
episode: 331 training return: tensor(121.4841, device='cuda:0')
epoch: 83 test_true_pfm: 2777.6130867183288 sim_pfm: 484.8692283933051
episode: 332 training return: tensor(14.2274, device='cuda:0')
episode: 333 training return: tensor(47.8461, device='cuda:0')
episode: 334 training return: tensor(136.0685, device='cuda:0')
episode: 335 training return: tensor(527.2294, device='cuda:0')
epoch: 84 test_true_pfm: 2703.868166607175 sim_pfm: 465.4023954565637
episode: 336 training return: tensor(187.3631, device='cuda:0')
episode: 337 training return: tensor(-22.7875, device='cuda:0')
episode: 338 training return: tensor(-6.7777, device='cuda:0')
episode: 339 training return: tensor(-25.4568, device='cuda:0')
epoch: 85 test_true_pfm: 2271.2662865844236 sim_pfm: 225.63753154544005
episode: 340 training return: tensor(-32.8477, device='cuda:0')
episode: 341 training return: tensor(184.8117, device='cuda:0')
episode: 342 training return: tensor(60.3082, device='cuda:0')
episode: 343 training return: tensor(7.2645, device='cuda:0')
epoch: 86 test_true_pfm: 1740.0125324792043 sim_pfm: 285.02646521742764
episode: 344 training return: tensor(-56.0203, device='cuda:0')
episode: 345 training return: tensor(-69.9327, device='cuda:0')
episode: 346 training return: tensor(124.1589, device='cuda:0')
episode: 347 training return: tensor(-64.9679, device='cuda:0')
epoch: 87 test_true_pfm: 2875.377557638935 sim_pfm: 470.51604430534644
episode: 348 training return: tensor(216.0681, device='cuda:0')
episode: 349 training return: tensor(-74.4304, device='cuda:0')
episode: 350 training return: tensor(50.4653, device='cuda:0')
episode: 351 training return: tensor(-17.4090, device='cuda:0')
epoch: 88 test_true_pfm: 2690.321417132623 sim_pfm: 298.95253068371676
episode: 352 training return: tensor(-40.7913, device='cuda:0')
episode: 353 training return: tensor(-17.9730, device='cuda:0')
episode: 354 training return: tensor(93.8590, device='cuda:0')
episode: 355 training return: tensor(223.0472, device='cuda:0')
epoch: 89 test_true_pfm: 2182.5903957945343 sim_pfm: 239.83584098147307
episode: 356 training return: tensor(2.7116, device='cuda:0')
episode: 357 training return: tensor(-21.9342, device='cuda:0')
episode: 358 training return: tensor(154.5001, device='cuda:0')
episode: 359 training return: tensor(-21.9962, device='cuda:0')
epoch: 90 test_true_pfm: 2563.503040879518 sim_pfm: 312.03984266866854
episode: 360 training return: tensor(110.5397, device='cuda:0')
episode: 361 training return: tensor(-44.9413, device='cuda:0')
episode: 362 training return: tensor(66.3393, device='cuda:0')
episode: 363 training return: tensor(14.4020, device='cuda:0')
epoch: 91 test_true_pfm: 2976.40357659895 sim_pfm: 474.4061779041076
episode: 364 training return: tensor(-58.2202, device='cuda:0')
episode: 365 training return: tensor(28.7856, device='cuda:0')
episode: 366 training return: tensor(-16.7194, device='cuda:0')
episode: 367 training return: tensor(-13.1075, device='cuda:0')
epoch: 92 test_true_pfm: 2180.55545438726 sim_pfm: 215.13119950403538
episode: 368 training return: tensor(-10.0390, device='cuda:0')
episode: 369 training return: tensor(-87.9286, device='cuda:0')
episode: 370 training return: tensor(116.8236, device='cuda:0')
episode: 371 training return: tensor(-12.3153, device='cuda:0')
epoch: 93 test_true_pfm: 2212.845472656396 sim_pfm: 446.06388974077225
episode: 372 training return: tensor(-12.1249, device='cuda:0')
episode: 373 training return: tensor(25.9375, device='cuda:0')
episode: 374 training return: tensor(-11.1626, device='cuda:0')
episode: 375 training return: tensor(-27.0401, device='cuda:0')
epoch: 94 test_true_pfm: 2666.652389655365 sim_pfm: 299.2312067285723
episode: 376 training return: tensor(-61.7327, device='cuda:0')
episode: 377 training return: tensor(418.3152, device='cuda:0')
episode: 378 training return: tensor(-5.3710, device='cuda:0')
episode: 379 training return: tensor(133.4645, device='cuda:0')
epoch: 95 test_true_pfm: 2553.0306813490124 sim_pfm: 379.67979713205324
episode: 380 training return: tensor(85.9834, device='cuda:0')
episode: 381 training return: tensor(-29.9905, device='cuda:0')
episode: 382 training return: tensor(55.4495, device='cuda:0')
episode: 383 training return: tensor(86.3711, device='cuda:0')
epoch: 96 test_true_pfm: 2484.474803107889 sim_pfm: 302.68731339163304
episode: 384 training return: tensor(-59.4260, device='cuda:0')
episode: 385 training return: tensor(-56.3553, device='cuda:0')
episode: 386 training return: tensor(0.0123, device='cuda:0')
episode: 387 training return: tensor(172.7829, device='cuda:0')
epoch: 97 test_true_pfm: 2793.655199447801 sim_pfm: 464.172302685678
episode: 388 training return: tensor(-25.7863, device='cuda:0')
episode: 389 training return: tensor(6.7756, device='cuda:0')
episode: 390 training return: tensor(252.0065, device='cuda:0')
episode: 391 training return: tensor(42.6833, device='cuda:0')
epoch: 98 test_true_pfm: 2643.0030652076443 sim_pfm: 284.2892999208998
episode: 392 training return: tensor(510.6870, device='cuda:0')
episode: 393 training return: tensor(77.6533, device='cuda:0')
episode: 394 training return: tensor(460.5161, device='cuda:0')
episode: 395 training return: tensor(467.4723, device='cuda:0')
epoch: 99 test_true_pfm: 2594.9642392794117 sim_pfm: 467.4250275954255
episode: 396 training return: tensor(404.5916, device='cuda:0')
episode: 397 training return: tensor(124.1795, device='cuda:0')
episode: 398 training return: tensor(-9.8309, device='cuda:0')
episode: 399 training return: tensor(-38.7621, device='cuda:0')
epoch: 100 test_true_pfm: 3155.910247455728 sim_pfm: 401.89627166083665
episode: 400 training return: tensor(44.1177, device='cuda:0')
episode: 401 training return: tensor(-26.9662, device='cuda:0')
episode: 402 training return: tensor(33.8150, device='cuda:0')
episode: 403 training return: tensor(231.7677, device='cuda:0')
epoch: 101 test_true_pfm: 2334.5568937266776 sim_pfm: 402.58035637267557
episode: 404 training return: tensor(518.6891, device='cuda:0')
episode: 405 training return: tensor(76.5993, device='cuda:0')
episode: 406 training return: tensor(-34.1941, device='cuda:0')
episode: 407 training return: tensor(511.3309, device='cuda:0')
epoch: 102 test_true_pfm: 2425.959056064118 sim_pfm: 223.23935198702384
episode: 408 training return: tensor(-54.1007, device='cuda:0')
episode: 409 training return: tensor(73.8147, device='cuda:0')
episode: 410 training return: tensor(182.9565, device='cuda:0')
episode: 411 training return: tensor(141.5279, device='cuda:0')
epoch: 103 test_true_pfm: 2890.9999083876614 sim_pfm: 356.0010895259523
episode: 412 training return: tensor(5.7792, device='cuda:0')
episode: 413 training return: tensor(164.2575, device='cuda:0')
episode: 414 training return: tensor(-8.5943, device='cuda:0')
episode: 415 training return: tensor(159.0770, device='cuda:0')
epoch: 104 test_true_pfm: 2258.1784305053366 sim_pfm: 167.6956322050149
episode: 416 training return: tensor(5.2757, device='cuda:0')
episode: 417 training return: tensor(486.3144, device='cuda:0')
episode: 418 training return: tensor(-71.4553, device='cuda:0')
episode: 419 training return: tensor(12.6430, device='cuda:0')
epoch: 105 test_true_pfm: 2215.622019763863 sim_pfm: 214.278541285273
episode: 420 training return: tensor(523.5365, device='cuda:0')
episode: 421 training return: tensor(390.0340, device='cuda:0')
episode: 422 training return: tensor(-31.3729, device='cuda:0')
episode: 423 training return: tensor(-44.9668, device='cuda:0')
epoch: 106 test_true_pfm: 2433.3284946247973 sim_pfm: 392.04863401638187
episode: 424 training return: tensor(-29.6399, device='cuda:0')
episode: 425 training return: tensor(6.2202, device='cuda:0')
episode: 426 training return: tensor(-61.7469, device='cuda:0')
episode: 427 training return: tensor(143.2644, device='cuda:0')
epoch: 107 test_true_pfm: 2146.652578178284 sim_pfm: 415.68448015803006
episode: 428 training return: tensor(-62.0805, device='cuda:0')
episode: 429 training return: tensor(486.5049, device='cuda:0')
episode: 430 training return: tensor(126.4263, device='cuda:0')
episode: 431 training return: tensor(72.4240, device='cuda:0')
epoch: 108 test_true_pfm: 2714.8177713426658 sim_pfm: 333.72534363014466
episode: 432 training return: tensor(-40.3575, device='cuda:0')
episode: 433 training return: tensor(203.1359, device='cuda:0')
episode: 434 training return: tensor(13.1826, device='cuda:0')
episode: 435 training return: tensor(331.4250, device='cuda:0')
epoch: 109 test_true_pfm: 2456.095869467023 sim_pfm: 86.5071745107028
episode: 436 training return: tensor(-50.5846, device='cuda:0')
episode: 437 training return: tensor(-51.4329, device='cuda:0')
episode: 438 training return: tensor(74.1018, device='cuda:0')
episode: 439 training return: tensor(-70.0549, device='cuda:0')
epoch: 110 test_true_pfm: 2640.8468416597784 sim_pfm: 324.96802022816456
episode: 440 training return: tensor(25.0171, device='cuda:0')
episode: 441 training return: tensor(427.3213, device='cuda:0')
episode: 442 training return: tensor(-10.7232, device='cuda:0')
episode: 443 training return: tensor(-31.6151, device='cuda:0')
epoch: 111 test_true_pfm: 2617.432175977466 sim_pfm: 429.90751863909344
episode: 444 training return: tensor(-77.9848, device='cuda:0')
episode: 445 training return: tensor(-30.0686, device='cuda:0')
episode: 446 training return: tensor(-51.9917, device='cuda:0')
episode: 447 training return: tensor(-33.3164, device='cuda:0')
epoch: 112 test_true_pfm: 2270.114514465999 sim_pfm: 182.56887017797757
episode: 448 training return: tensor(19.2002, device='cuda:0')
episode: 449 training return: tensor(160.9359, device='cuda:0')
episode: 450 training return: tensor(84.3713, device='cuda:0')
episode: 451 training return: tensor(-73.5374, device='cuda:0')
epoch: 113 test_true_pfm: 3345.7232294740647 sim_pfm: 439.2786704540097
episode: 452 training return: tensor(-61.7712, device='cuda:0')
episode: 453 training return: tensor(217.4941, device='cuda:0')
episode: 454 training return: tensor(-33.2386, device='cuda:0')
episode: 455 training return: tensor(44.5082, device='cuda:0')
epoch: 114 test_true_pfm: 3282.3642779876277 sim_pfm: 348.7516423060636
episode: 456 training return: tensor(-57.9955, device='cuda:0')
episode: 457 training return: tensor(271.6116, device='cuda:0')
episode: 458 training return: tensor(-62.5609, device='cuda:0')
episode: 459 training return: tensor(-65.6081, device='cuda:0')
epoch: 115 test_true_pfm: 2594.224162588698 sim_pfm: 247.1751591352901
episode: 460 training return: tensor(2.0053, device='cuda:0')
episode: 461 training return: tensor(212.7076, device='cuda:0')
episode: 462 training return: tensor(-50.3436, device='cuda:0')
episode: 463 training return: tensor(337.1119, device='cuda:0')
epoch: 116 test_true_pfm: 2385.752311028849 sim_pfm: 451.5170456740889
episode: 464 training return: tensor(110.9627, device='cuda:0')
episode: 465 training return: tensor(-82.4814, device='cuda:0')
episode: 466 training return: tensor(-60.4243, device='cuda:0')
episode: 467 training return: tensor(-13.9547, device='cuda:0')
epoch: 117 test_true_pfm: 2508.491282547225 sim_pfm: 300.7879018812091
episode: 468 training return: tensor(-72.0490, device='cuda:0')
episode: 469 training return: tensor(2.3110, device='cuda:0')
episode: 470 training return: tensor(-13.2737, device='cuda:0')
episode: 471 training return: tensor(136.6782, device='cuda:0')
epoch: 118 test_true_pfm: 2762.0005193150882 sim_pfm: 388.8838702715778
episode: 472 training return: tensor(-20.4873, device='cuda:0')
episode: 473 training return: tensor(3.0305, device='cuda:0')
episode: 474 training return: tensor(-45.6212, device='cuda:0')
episode: 475 training return: tensor(-25.6378, device='cuda:0')
epoch: 119 test_true_pfm: 3042.5719268669964 sim_pfm: 479.6168297226541
episode: 476 training return: tensor(195.6827, device='cuda:0')
episode: 477 training return: tensor(-27.3428, device='cuda:0')
episode: 478 training return: tensor(12.4168, device='cuda:0')
episode: 479 training return: tensor(-69.1238, device='cuda:0')
epoch: 120 test_true_pfm: 2109.493497608904 sim_pfm: 476.9108213686268
episode: 480 training return: tensor(-8.8113, device='cuda:0')
episode: 481 training return: tensor(-62.1131, device='cuda:0')
episode: 482 training return: tensor(275.9715, device='cuda:0')
episode: 483 training return: tensor(-40.6647, device='cuda:0')
epoch: 121 test_true_pfm: 1950.1048267783942 sim_pfm: 405.82919987269753
episode: 484 training return: tensor(165.1594, device='cuda:0')
episode: 485 training return: tensor(323.9480, device='cuda:0')
episode: 486 training return: tensor(210.0046, device='cuda:0')
episode: 487 training return: tensor(109.8028, device='cuda:0')
epoch: 122 test_true_pfm: 3059.664619786529 sim_pfm: 287.879267108355
episode: 488 training return: tensor(-70.2978, device='cuda:0')
episode: 489 training return: tensor(128.0578, device='cuda:0')
episode: 490 training return: tensor(58.9782, device='cuda:0')
episode: 491 training return: tensor(434.6836, device='cuda:0')
epoch: 123 test_true_pfm: 2916.503407997076 sim_pfm: 452.17446604386595
episode: 492 training return: tensor(24.1330, device='cuda:0')
episode: 493 training return: tensor(68.6302, device='cuda:0')
episode: 494 training return: tensor(-21.3405, device='cuda:0')
episode: 495 training return: tensor(236.4855, device='cuda:0')
epoch: 124 test_true_pfm: 2946.566980802339 sim_pfm: 447.6598828147592
episode: 496 training return: tensor(94.1308, device='cuda:0')
episode: 497 training return: tensor(163.2110, device='cuda:0')
episode: 498 training return: tensor(-39.6565, device='cuda:0')
episode: 499 training return: tensor(23.5087, device='cuda:0')
epoch: 125 test_true_pfm: 2912.9113074456377 sim_pfm: 358.01926881254377
episode: 500 training return: tensor(-50.1890, device='cuda:0')
episode: 501 training return: tensor(180.1782, device='cuda:0')
episode: 502 training return: tensor(127.9611, device='cuda:0')
episode: 503 training return: tensor(512.4304, device='cuda:0')
epoch: 126 test_true_pfm: 2806.3289902405654 sim_pfm: 420.6593475268164
episode: 504 training return: tensor(81.4253, device='cuda:0')
episode: 505 training return: tensor(12.9330, device='cuda:0')
episode: 506 training return: tensor(-17.4646, device='cuda:0')
episode: 507 training return: tensor(213.3476, device='cuda:0')
epoch: 127 test_true_pfm: 2720.7526733624595 sim_pfm: 246.68258577014785
episode: 508 training return: tensor(-10.9250, device='cuda:0')
episode: 509 training return: tensor(142.6851, device='cuda:0')
episode: 510 training return: tensor(32.5292, device='cuda:0')
episode: 511 training return: tensor(249.5562, device='cuda:0')
epoch: 128 test_true_pfm: 1568.750397656442 sim_pfm: 200.89330670641115
episode: 512 training return: tensor(-67.7037, device='cuda:0')
episode: 513 training return: tensor(6.6802, device='cuda:0')
episode: 514 training return: tensor(41.7347, device='cuda:0')
episode: 515 training return: tensor(1.9274, device='cuda:0')
epoch: 129 test_true_pfm: 2464.536829414791 sim_pfm: 259.67369964146445
episode: 516 training return: tensor(-32.9350, device='cuda:0')
episode: 517 training return: tensor(-71.3051, device='cuda:0')
episode: 518 training return: tensor(-72.3835, device='cuda:0')
episode: 519 training return: tensor(247.2444, device='cuda:0')
epoch: 130 test_true_pfm: 2712.5667835199833 sim_pfm: 488.6299735882785
episode: 520 training return: tensor(-16.6167, device='cuda:0')
episode: 521 training return: tensor(-9.3085, device='cuda:0')
episode: 522 training return: tensor(37.8127, device='cuda:0')
episode: 523 training return: tensor(-20.0483, device='cuda:0')
epoch: 131 test_true_pfm: 2402.87482419979 sim_pfm: 495.212958099825
episode: 524 training return: tensor(188.4811, device='cuda:0')
episode: 525 training return: tensor(87.1437, device='cuda:0')
episode: 526 training return: tensor(482.5825, device='cuda:0')
episode: 527 training return: tensor(335.3606, device='cuda:0')
epoch: 132 test_true_pfm: 1929.2043639544463 sim_pfm: 140.2409751539429
episode: 528 training return: tensor(99.9094, device='cuda:0')
episode: 529 training return: tensor(127.5434, device='cuda:0')
episode: 530 training return: tensor(-56.1898, device='cuda:0')
episode: 531 training return: tensor(-36.1641, device='cuda:0')
epoch: 133 test_true_pfm: 2514.336949712351 sim_pfm: 327.22251561509137
episode: 532 training return: tensor(3.7928, device='cuda:0')
episode: 533 training return: tensor(264.3349, device='cuda:0')
episode: 534 training return: tensor(57.0115, device='cuda:0')
episode: 535 training return: tensor(474.5930, device='cuda:0')
epoch: 134 test_true_pfm: 2911.390372287489 sim_pfm: 294.7537386478022
episode: 536 training return: tensor(17.4036, device='cuda:0')
episode: 537 training return: tensor(-85.0555, device='cuda:0')
episode: 538 training return: tensor(-36.2244, device='cuda:0')
episode: 539 training return: tensor(287.5638, device='cuda:0')
epoch: 135 test_true_pfm: 2394.154819823179 sim_pfm: 321.085638911929
episode: 540 training return: tensor(39.4165, device='cuda:0')
episode: 541 training return: tensor(-24.8906, device='cuda:0')
episode: 542 training return: tensor(509.1606, device='cuda:0')
episode: 543 training return: tensor(-31.2952, device='cuda:0')
epoch: 136 test_true_pfm: 2274.5915169503364 sim_pfm: 365.2241297646445
episode: 544 training return: tensor(15.3797, device='cuda:0')
episode: 545 training return: tensor(442.9067, device='cuda:0')
episode: 546 training return: tensor(-2.7068, device='cuda:0')
episode: 547 training return: tensor(336.3094, device='cuda:0')
epoch: 137 test_true_pfm: 2759.4114917095503 sim_pfm: 306.4359487692903
episode: 548 training return: tensor(316.6658, device='cuda:0')
episode: 549 training return: tensor(84.7148, device='cuda:0')
episode: 550 training return: tensor(220.7496, device='cuda:0')
episode: 551 training return: tensor(-34.7240, device='cuda:0')
epoch: 138 test_true_pfm: 2697.309871041925 sim_pfm: 308.5639789728157
episode: 552 training return: tensor(365.3868, device='cuda:0')
episode: 553 training return: tensor(85.9136, device='cuda:0')
episode: 554 training return: tensor(-42.4800, device='cuda:0')
episode: 555 training return: tensor(-15.8484, device='cuda:0')
epoch: 139 test_true_pfm: 1717.4001171345583 sim_pfm: 366.5951521544678
episode: 556 training return: tensor(491.1839, device='cuda:0')
episode: 557 training return: tensor(44.4251, device='cuda:0')
episode: 558 training return: tensor(34.8607, device='cuda:0')
episode: 559 training return: tensor(-25.5420, device='cuda:0')
epoch: 140 test_true_pfm: 2453.9274497502392 sim_pfm: 184.6750235700359
episode: 560 training return: tensor(247.6850, device='cuda:0')
episode: 561 training return: tensor(-52.8081, device='cuda:0')
episode: 562 training return: tensor(-61.1640, device='cuda:0')
episode: 563 training return: tensor(210.1705, device='cuda:0')
epoch: 141 test_true_pfm: 3054.8307526049516 sim_pfm: 332.0028883060634
episode: 564 training return: tensor(50.8788, device='cuda:0')
episode: 565 training return: tensor(157.4337, device='cuda:0')
episode: 566 training return: tensor(219.5058, device='cuda:0')
episode: 567 training return: tensor(-4.1744, device='cuda:0')
epoch: 142 test_true_pfm: 1960.8443674981372 sim_pfm: 254.66478513886491
episode: 568 training return: tensor(-64.0122, device='cuda:0')
episode: 569 training return: tensor(-58.7780, device='cuda:0')
episode: 570 training return: tensor(164.3791, device='cuda:0')
episode: 571 training return: tensor(17.3462, device='cuda:0')
epoch: 143 test_true_pfm: 1985.2655066467585 sim_pfm: 113.82492586613323
episode: 572 training return: tensor(-65.1830, device='cuda:0')
episode: 573 training return: tensor(140.0777, device='cuda:0')
episode: 574 training return: tensor(-43.6002, device='cuda:0')
episode: 575 training return: tensor(150.6882, device='cuda:0')
epoch: 144 test_true_pfm: 2529.6809291664426 sim_pfm: 437.2329100340139
episode: 576 training return: tensor(390.0478, device='cuda:0')
episode: 577 training return: tensor(-72.6348, device='cuda:0')
episode: 578 training return: tensor(-19.0352, device='cuda:0')
episode: 579 training return: tensor(11.8215, device='cuda:0')
epoch: 145 test_true_pfm: 3001.0578192845255 sim_pfm: 356.9806821748983
episode: 580 training return: tensor(212.5370, device='cuda:0')
episode: 581 training return: tensor(216.6309, device='cuda:0')
episode: 582 training return: tensor(-39.8802, device='cuda:0')
episode: 583 training return: tensor(-77.6191, device='cuda:0')
epoch: 146 test_true_pfm: 2697.648689400292 sim_pfm: 486.87334680436953
episode: 584 training return: tensor(-13.6341, device='cuda:0')
episode: 585 training return: tensor(-17.3244, device='cuda:0')
episode: 586 training return: tensor(-6.4467, device='cuda:0')
episode: 587 training return: tensor(-21.1400, device='cuda:0')
epoch: 147 test_true_pfm: 2685.2818981088153 sim_pfm: 449.0780093696085
episode: 588 training return: tensor(211.3344, device='cuda:0')
episode: 589 training return: tensor(39.7237, device='cuda:0')
episode: 590 training return: tensor(62.7887, device='cuda:0')
episode: 591 training return: tensor(305.1097, device='cuda:0')
epoch: 148 test_true_pfm: 2760.62683680649 sim_pfm: 189.1144110809934
episode: 592 training return: tensor(263.1741, device='cuda:0')
episode: 593 training return: tensor(56.2121, device='cuda:0')
episode: 594 training return: tensor(-50.0349, device='cuda:0')
episode: 595 training return: tensor(126.9849, device='cuda:0')
epoch: 149 test_true_pfm: 1884.9194122559413 sim_pfm: 226.56798560746634
episode: 596 training return: tensor(45.2405, device='cuda:0')
episode: 597 training return: tensor(138.0762, device='cuda:0')
episode: 598 training return: tensor(205.8630, device='cuda:0')
episode: 599 training return: tensor(-6.5070, device='cuda:0')
epoch: 150 test_true_pfm: 1612.528109532506 sim_pfm: 164.5266742607831
