['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'behavior', '--traj', 'medium', '--seed', '5', '--data', '100000', '--regu', '0.2']
epoch: 0 training_loss 0.3335870538651943 test_loss: 0.20414450168609619
epoch: 1 training_loss 0.18161770179867745 test_loss: 0.16621978282928468
epoch: 2 training_loss 0.14951808884739876 test_loss: 0.14377480745315552
epoch: 3 training_loss 0.14121826086193323 test_loss: 0.12146868705749511
epoch: 4 training_loss 0.12827954195439817 test_loss: 0.13363198041915894
epoch: 5 training_loss 0.1322618748433888 test_loss: 0.13324819803237914
epoch: 6 training_loss 0.12396933861076832 test_loss: 0.12283593416213989
epoch: 7 training_loss 0.12163325760513544 test_loss: 0.13243813514709474
epoch: 8 training_loss 0.1157274473272264 test_loss: 0.129008150100708
epoch: 9 training_loss 0.11877043832093477 test_loss: 0.1233180284500122
epoch: 10 training_loss 0.1256856150738895 test_loss: 0.10249935388565064
epoch: 11 training_loss 0.11141978165134787 test_loss: 0.1339240550994873
epoch: 12 training_loss 0.10859631828963756 test_loss: 0.1259848952293396
epoch: 13 training_loss 0.12193560555577278 test_loss: 0.11390808820724488
epoch: 14 training_loss 0.11183890223503112 test_loss: 0.12330503463745117
epoch: 15 training_loss 0.11818992309272289 test_loss: 0.12649064064025878
epoch: 16 training_loss 0.11874409355223178 test_loss: 0.1223032832145691
epoch: 17 training_loss 0.1208705572783947 test_loss: 0.11846047639846802
epoch: 18 training_loss 0.10745839752256871 test_loss: 0.11803425550460815
epoch: 19 training_loss 0.11346028733998537 test_loss: 0.11274641752243042
epoch: 20 training_loss 0.10378016252070665 test_loss: 0.12713022232055665
epoch: 21 training_loss 0.11353396913036704 test_loss: 0.11341211795806885
epoch: 22 training_loss 0.11574603356420994 test_loss: 0.11965237855911255
epoch: 23 training_loss 0.11303013261407614 test_loss: 0.10649759769439697
epoch: 24 training_loss 0.11124856572598218 test_loss: 0.10065139532089233
epoch: 25 training_loss 0.11249320309609175 test_loss: 0.11522587537765502
epoch: 26 training_loss 0.10439534984529018 test_loss: 0.11642293930053711
epoch: 27 training_loss 0.11117594903334975 test_loss: 0.11103199720382691
epoch: 28 training_loss 0.10586949743330479 test_loss: 0.11495063304901124
epoch: 29 training_loss 0.10902823496609926 test_loss: 0.11935405731201172
epoch: 30 training_loss 0.113003960698843 test_loss: 0.12758784294128417
epoch: 31 training_loss 0.11341922022402287 test_loss: 0.10334125757217408
epoch: 32 training_loss 0.11020038003101945 test_loss: 0.10941822528839111
epoch: 33 training_loss 0.11401107784360648 test_loss: 0.09588504433631898
epoch: 34 training_loss 0.10138987012207508 test_loss: 0.13238832950592042
epoch: 35 training_loss 0.10337186427786946 test_loss: 0.13402556180953978
epoch: 36 training_loss 0.10331336721777916 test_loss: 0.11923893690109252
epoch: 37 training_loss 0.10685399580746889 test_loss: 0.10220623016357422
epoch: 38 training_loss 0.10950599674135447 test_loss: 0.109371018409729
epoch: 39 training_loss 0.11106792690232396 test_loss: 0.10756518840789794
epoch: 40 training_loss 0.10913058381527663 test_loss: 0.11958264112472534
epoch: 41 training_loss 0.11207608230412007 test_loss: 0.11075109243392944
epoch: 42 training_loss 0.10673080760985613 test_loss: 0.1184959053993225
epoch: 43 training_loss 0.10651154624298215 test_loss: 0.10294116735458374
epoch: 44 training_loss 0.10502268638461829 test_loss: 0.11451283693313599
epoch: 45 training_loss 0.10808355642482638 test_loss: 0.102265465259552
epoch: 46 training_loss 0.10831174459308386 test_loss: 0.11475871801376343
epoch: 47 training_loss 0.09993811193853616 test_loss: 0.11580283641815185
epoch: 48 training_loss 0.10109178548678756 test_loss: 0.08477514982223511
epoch: 49 training_loss 0.10577266361564398 test_loss: 0.10971939563751221
epoch: 50 training_loss 0.11125352693721652 test_loss: 0.1105272650718689
epoch: 51 training_loss 0.10618359824642538 test_loss: 0.1092374563217163
epoch: 52 training_loss 0.10933013154193759 test_loss: 0.110833740234375
epoch: 53 training_loss 0.10554993066936731 test_loss: 0.11832593679428101
epoch: 54 training_loss 0.10160360638052225 test_loss: 0.13273895978927613
epoch: 55 training_loss 0.10162017531692982 test_loss: 0.1289237380027771
epoch: 56 training_loss 0.10711848240345717 test_loss: 0.09365375638008118
epoch: 57 training_loss 0.10706623939797283 test_loss: 0.11169226169586181
epoch: 58 training_loss 0.1016018308326602 test_loss: 0.10060445070266724
epoch: 59 training_loss 0.11390280917286873 test_loss: 0.12315020561218262
epoch: 60 training_loss 0.1057360365614295 test_loss: 0.13772423267364503
epoch: 61 training_loss 0.10795836936682462 test_loss: 0.10652801990509034
epoch: 62 training_loss 0.10247692439705133 test_loss: 0.11709768772125244
epoch: 63 training_loss 0.103006328670308 test_loss: 0.11899604797363281
epoch: 64 training_loss 0.1062755529396236 test_loss: 0.10438685417175293
epoch: 65 training_loss 0.10024142947047948 test_loss: 0.11199721097946166
epoch: 66 training_loss 0.10504666958004236 test_loss: 0.11370398998260497
epoch: 67 training_loss 0.10586520798504352 test_loss: 0.11989477872848511
epoch: 68 training_loss 0.10710038710385561 test_loss: 0.10591870546340942
epoch: 69 training_loss 0.10888658579438924 test_loss: 0.12215813398361205
epoch: 70 training_loss 0.0960045910999179 test_loss: 0.12342332601547241
epoch: 71 training_loss 0.11259617798030376 test_loss: 0.12526954412460328
epoch: 72 training_loss 0.10286861784756184 test_loss: 0.09473946690559387
epoch: 73 training_loss 0.10134954813867808 test_loss: 0.1086250901222229
epoch: 74 training_loss 0.10797079623676836 test_loss: 0.1338992953300476
epoch: 75 training_loss 0.09456807279959321 test_loss: 0.11444956064224243
epoch: 76 training_loss 0.1059406171925366 test_loss: 0.11969232559204102
epoch: 77 training_loss 0.11026939876377582 test_loss: 0.10904920101165771
epoch: 78 training_loss 0.1003339896723628 test_loss: 0.10905382633209229
epoch: 79 training_loss 0.09317542240023613 test_loss: 0.10836489200592041
epoch: 80 training_loss 0.1027958394959569 test_loss: 0.1027957558631897
epoch: 81 training_loss 0.10460165815427899 test_loss: 0.10617001056671142
epoch: 82 training_loss 0.10400062941014766 test_loss: 0.10239665508270264
epoch: 83 training_loss 0.11288302056491376 test_loss: 0.11463801860809326
epoch: 84 training_loss 0.10429977480322122 test_loss: 0.1256626844406128
epoch: 85 training_loss 0.09894745199009776 test_loss: 0.11348052024841308
epoch: 86 training_loss 0.10682169567793608 test_loss: 0.10671786069869996
epoch: 87 training_loss 0.10654221240431071 test_loss: 0.11455283164978028
epoch: 88 training_loss 0.10399893114343285 test_loss: 0.1059998869895935
epoch: 89 training_loss 0.10199372751638293 test_loss: 0.10479551553726196
epoch: 90 training_loss 0.10552676487714052 test_loss: 0.107486093044281
epoch: 91 training_loss 0.09905975958332419 test_loss: 0.12533737421035768
epoch: 92 training_loss 0.10091468138620258 test_loss: 0.09657388925552368
epoch: 93 training_loss 0.10522146664559841 test_loss: 0.10779682397842408
epoch: 94 training_loss 0.10279339771717787 test_loss: 0.12156461477279663
epoch: 95 training_loss 0.10136611308902502 test_loss: 0.10184814929962158
epoch: 96 training_loss 0.10672061741352082 test_loss: 0.10706027746200561
epoch: 97 training_loss 0.10510959383100271 test_loss: 0.1079299807548523
epoch: 98 training_loss 0.10765778593719005 test_loss: 0.12245267629623413
epoch: 99 training_loss 0.10093983499333263 test_loss: 0.11548857688903809
epoch: 100 training_loss 0.0999849193636328 test_loss: 0.10311174392700195
epoch: 101 training_loss 0.10054164754226803 test_loss: 0.11298171281814576
epoch: 102 training_loss 0.10571550190448761 test_loss: 0.10878276824951172
epoch: 103 training_loss 0.10955637786537409 test_loss: 0.11436035633087158
epoch: 104 training_loss 0.10780762862414121 test_loss: 0.11017675399780273
epoch: 105 training_loss 0.1020666523464024 test_loss: 0.10749844312667847
epoch: 106 training_loss 0.11010472144931555 test_loss: 0.11136459112167359
epoch: 107 training_loss 0.09908310975879431 test_loss: 0.0997515082359314
epoch: 108 training_loss 0.10011049034073949 test_loss: 0.1140783429145813
epoch: 109 training_loss 0.10258352629840374 test_loss: 0.10052469968795777
epoch: 110 training_loss 0.10444809477776289 test_loss: 0.11398999691009522
epoch: 111 training_loss 0.10490268103778362 test_loss: 0.10974801778793335
epoch: 112 training_loss 0.10142812421545386 test_loss: 0.11388310194015502
epoch: 113 training_loss 0.10602523325011134 test_loss: 0.10332413911819457
epoch: 114 training_loss 0.10825699446722865 test_loss: 0.11850520372390747
epoch: 115 training_loss 0.10163725897669793 test_loss: 0.12043496370315551
epoch: 116 training_loss 0.10322740908712148 test_loss: 0.09809420704841613
epoch: 117 training_loss 0.10836932873353362 test_loss: 0.11395906209945679
epoch: 118 training_loss 0.11395134080201387 test_loss: 0.10430432558059692
epoch: 119 training_loss 0.10612348020076752 test_loss: 0.1055368423461914
epoch: 120 training_loss 0.10233261892572046 test_loss: 0.11007547378540039
epoch: 121 training_loss 0.1027883917838335 test_loss: 0.11230368614196777
epoch: 122 training_loss 0.09604736346751451 test_loss: 0.10419694185256959
epoch: 123 training_loss 0.10030715273693204 test_loss: 0.11274091005325318
epoch: 124 training_loss 0.1065050045400858 test_loss: 0.1088595986366272
epoch: 125 training_loss 0.10765562377870083 test_loss: 0.08956300616264343
epoch: 126 training_loss 0.10403426812961697 test_loss: 0.10864131450653076
epoch: 127 training_loss 0.09716659102588893 test_loss: 0.12505277395248413
epoch: 128 training_loss 0.10506115334108472 test_loss: 0.10508700609207153
epoch: 129 training_loss 0.09779958255589008 test_loss: 0.12752964496612548
epoch: 130 training_loss 0.09802250429987908 test_loss: 0.14040682315826417
epoch: 131 training_loss 0.10590226542204619 test_loss: 0.11131192445755005
epoch: 132 training_loss 0.1004893864877522 test_loss: 0.11344963312149048
epoch: 133 training_loss 0.10631600964814425 test_loss: 0.09994110465049744
epoch: 134 training_loss 0.10695267263799905 test_loss: 0.101882803440094
epoch: 135 training_loss 0.09590349277481437 test_loss: 0.10646075010299683
epoch: 136 training_loss 0.10732223067432642 test_loss: 0.12197215557098388
epoch: 137 training_loss 0.10158107548952103 test_loss: 0.11777349710464477
epoch: 138 training_loss 0.102172397878021 test_loss: 0.11246542930603028
epoch: 139 training_loss 0.10467154122889041 test_loss: 0.10366321802139282
epoch: 140 training_loss 0.10456489529460669 test_loss: 0.11609169244766235
epoch: 141 training_loss 0.09755030205473304 test_loss: 0.0972716212272644
epoch: 142 training_loss 0.09812912659719586 test_loss: 0.13394324779510497
epoch: 143 training_loss 0.10239312667399644 test_loss: 0.10070996284484864
epoch: 144 training_loss 0.10019912015646697 test_loss: 0.08936669230461121
epoch: 145 training_loss 0.10075583972036839 test_loss: 0.09363129734992981
epoch: 146 training_loss 0.10067896716296673 test_loss: 0.10875568389892579
epoch: 147 training_loss 0.09836156025528908 test_loss: 0.12827068567276
epoch: 148 training_loss 0.11116408849135041 test_loss: 0.11996887922286988
epoch: 149 training_loss 0.09929387860000133 test_loss: 0.12287720441818237
epoch: 0 training_loss 49.831933650970456 test_loss: 24.939588928222655
epoch: 1 training_loss 18.971153831481935 test_loss: 15.514895629882812
epoch: 2 training_loss 13.891801929473877 test_loss: 12.026060485839844
epoch: 3 training_loss 11.035163955688477 test_loss: 10.186174011230468
epoch: 4 training_loss 9.0871333026886 test_loss: 8.357589721679688
epoch: 5 training_loss 7.7526232576370235 test_loss: 7.215497589111328
epoch: 6 training_loss 6.8347206830978395 test_loss: 6.230419540405274
epoch: 7 training_loss 6.160367302894592 test_loss: 5.763106918334961
epoch: 8 training_loss 5.635293879508972 test_loss: 5.423279190063477
epoch: 9 training_loss 5.169471673965454 test_loss: 4.8984111785888675
epoch: 10 training_loss 4.822630157470703 test_loss: 4.679848861694336
epoch: 11 training_loss 4.569949092864991 test_loss: 4.463507461547851
epoch: 12 training_loss 4.364051225185395 test_loss: 4.301105880737305
epoch: 13 training_loss 4.052264635562897 test_loss: 3.9752445220947266
epoch: 14 training_loss 3.89491738319397 test_loss: 3.8387264251708983
epoch: 15 training_loss 3.7048767137527467 test_loss: 3.6904762268066404
epoch: 16 training_loss 3.525377161502838 test_loss: 3.4522773742675783
epoch: 17 training_loss 3.420230655670166 test_loss: 3.4981010437011717
epoch: 18 training_loss 3.299540138244629 test_loss: 3.2644184112548826
epoch: 19 training_loss 3.1870206665992735 test_loss: 3.1839773178100588
epoch: 20 training_loss 3.1519778752326966 test_loss: 3.1772945404052733
epoch: 21 training_loss 3.0918127846717836 test_loss: 3.215190887451172
epoch: 22 training_loss 2.9861734247207643 test_loss: 2.81024169921875
epoch: 23 training_loss 2.9648992466926574 test_loss: 2.915106773376465
epoch: 24 training_loss 2.857311067581177 test_loss: 2.724056625366211
epoch: 25 training_loss 2.806770899295807 test_loss: 2.7312896728515623
epoch: 26 training_loss 2.710109236240387 test_loss: 2.6518169403076173
epoch: 27 training_loss 2.687384080886841 test_loss: 2.83626651763916
epoch: 28 training_loss 2.631861174106598 test_loss: 2.5828853607177735
epoch: 29 training_loss 2.6002762961387633 test_loss: 2.679888153076172
epoch: 30 training_loss 2.5595751118659975 test_loss: 2.5930849075317384
epoch: 31 training_loss 2.4821946930885317 test_loss: 2.5268672943115233
epoch: 32 training_loss 2.473047342300415 test_loss: 2.4798818588256837
epoch: 33 training_loss 2.5155554270744322 test_loss: 2.4263584136962892
epoch: 34 training_loss 2.414174156188965 test_loss: 2.4460803985595705
epoch: 35 training_loss 2.434116747379303 test_loss: 2.3208045959472656
epoch: 36 training_loss 2.355799491405487 test_loss: 2.26809024810791
epoch: 37 training_loss 2.2615017926692964 test_loss: 2.4264755249023438
epoch: 38 training_loss 2.320449820756912 test_loss: 2.242431640625
epoch: 39 training_loss 2.221259982585907 test_loss: 2.260310935974121
epoch: 40 training_loss 2.2187577426433562 test_loss: 2.1884487152099608
epoch: 41 training_loss 2.1850302267074584 test_loss: 2.3653221130371094
epoch: 42 training_loss 2.126135264635086 test_loss: 2.131239891052246
epoch: 43 training_loss 2.185735867023468 test_loss: 2.120301055908203
epoch: 44 training_loss 2.1257812857627867 test_loss: 2.1572309494018556
epoch: 45 training_loss 2.133454406261444 test_loss: 2.1701311111450194
epoch: 46 training_loss 2.0831248748302458 test_loss: 2.213161087036133
epoch: 47 training_loss 2.0522156059741974 test_loss: 2.1131467819213867
epoch: 48 training_loss 2.0514097821712496 test_loss: 2.0621597290039064
epoch: 49 training_loss 2.059716249704361 test_loss: 1.9520408630371093
epoch: 50 training_loss 2.013259398937225 test_loss: 2.138876724243164
epoch: 51 training_loss 1.9815961933135986 test_loss: 2.032960319519043
epoch: 52 training_loss 2.0112707698345185 test_loss: 2.0086997985839843
epoch: 53 training_loss 2.0066240417957304 test_loss: 1.9994590759277344
epoch: 54 training_loss 1.9268598997592925 test_loss: 1.880579948425293
epoch: 55 training_loss 1.9477271878719329 test_loss: 2.059719276428223
epoch: 56 training_loss 1.9008232390880584 test_loss: 1.9834165573120117
epoch: 57 training_loss 1.9312587666511536 test_loss: 1.8890714645385742
epoch: 58 training_loss 1.9094116961956025 test_loss: 1.9125839233398438
epoch: 59 training_loss 1.919139049053192 test_loss: 1.7886383056640625
epoch: 60 training_loss 1.8823959040641784 test_loss: 1.897069549560547
epoch: 61 training_loss 1.8494745302200317 test_loss: 1.8692174911499024
epoch: 62 training_loss 1.8685902619361878 test_loss: 1.8347972869873046
epoch: 63 training_loss 1.8331443786621093 test_loss: 1.8121620178222657
epoch: 64 training_loss 1.856564952135086 test_loss: 1.8809122085571288
epoch: 65 training_loss 1.8466408002376555 test_loss: 1.8221612930297852
epoch: 66 training_loss 1.7931006956100464 test_loss: 1.8582046508789063
epoch: 67 training_loss 1.8172666573524474 test_loss: 1.8378665924072266
epoch: 68 training_loss 1.793146826028824 test_loss: 1.8205743789672852
epoch: 69 training_loss 1.7997997725009918 test_loss: 1.7403423309326171
epoch: 70 training_loss 1.7757495176792144 test_loss: 1.8148544311523438
epoch: 71 training_loss 1.7694515788555145 test_loss: 1.8051389694213866
epoch: 72 training_loss 1.7782643747329712 test_loss: 1.7434181213378905
epoch: 73 training_loss 1.76480428814888 test_loss: 1.7185541152954102
epoch: 74 training_loss 1.725101500749588 test_loss: 1.7898204803466797
epoch: 75 training_loss 1.75344043135643 test_loss: 1.7587203979492188
epoch: 76 training_loss 1.7379710829257966 test_loss: 1.7954526901245118
epoch: 77 training_loss 1.7470969331264496 test_loss: 1.8353654861450195
epoch: 78 training_loss 1.7016854763031006 test_loss: 1.6867511749267579
epoch: 79 training_loss 1.7179979419708251 test_loss: 1.732691192626953
epoch: 80 training_loss 1.7102394211292267 test_loss: 1.7210954666137694
epoch: 81 training_loss 1.7185941910743714 test_loss: 1.7074163436889649
epoch: 82 training_loss 1.7062489020824432 test_loss: 1.7159004211425781
epoch: 83 training_loss 1.7012110888957976 test_loss: 1.6864234924316406
epoch: 84 training_loss 1.6996633076667786 test_loss: 1.649947738647461
epoch: 85 training_loss 1.6829630768299102 test_loss: 1.7640432357788085
epoch: 86 training_loss 1.6998017311096192 test_loss: 1.6817424774169922
epoch: 87 training_loss 1.6722166645526886 test_loss: 1.6592599868774414
epoch: 88 training_loss 1.6622358787059783 test_loss: 1.6971567153930665
epoch: 89 training_loss 1.6519655680656433 test_loss: 1.6941062927246093
epoch: 90 training_loss 1.6358493077754974 test_loss: 1.684300422668457
epoch: 91 training_loss 1.6339406383037567 test_loss: 1.6029544830322267
epoch: 92 training_loss 1.6406877827644348 test_loss: 1.6029649734497071
epoch: 93 training_loss 1.6519443809986114 test_loss: 1.6726402282714843
epoch: 94 training_loss 1.6467874228954316 test_loss: 1.6456302642822265
epoch: 95 training_loss 1.6159210872650147 test_loss: 1.6799892425537108
epoch: 96 training_loss 1.6451924991607667 test_loss: 1.653474998474121
epoch: 97 training_loss 1.6368737983703614 test_loss: 1.6745946884155274
epoch: 98 training_loss 1.6302861309051513 test_loss: 1.6298797607421875
epoch: 99 training_loss 1.6248492205142975 test_loss: 1.6064693450927734
epoch: 100 training_loss 1.6109131503105163 test_loss: 1.6213760375976562
epoch: 101 training_loss 1.6461954760551452 test_loss: 1.639214324951172
epoch: 102 training_loss 1.5728841412067414 test_loss: 1.5837682723999023
epoch: 103 training_loss 1.5996391153335572 test_loss: 1.5732930183410645
epoch: 104 training_loss 1.600295592546463 test_loss: 1.5478599548339844
epoch: 105 training_loss 1.5735570752620698 test_loss: 1.6012426376342774
epoch: 106 training_loss 1.588727011680603 test_loss: 1.6040592193603516
epoch: 107 training_loss 1.5922324562072754 test_loss: 1.5461577415466308
epoch: 108 training_loss 1.5840927052497864 test_loss: 1.5550990104675293
epoch: 109 training_loss 1.5655773556232453 test_loss: 1.5509480476379394
epoch: 110 training_loss 1.5488789308071136 test_loss: 1.5328479766845704
epoch: 111 training_loss 1.553435652256012 test_loss: 1.5521811485290526
epoch: 112 training_loss 1.5739118945598602 test_loss: 1.5784969329833984
epoch: 113 training_loss 1.5487049961090087 test_loss: 1.5708396911621094
epoch: 114 training_loss 1.5668025362491607 test_loss: 1.5409660339355469
epoch: 115 training_loss 1.5267150604724884 test_loss: 1.5430900573730468
epoch: 116 training_loss 1.5503346967697142 test_loss: 1.5842120170593261
epoch: 117 training_loss 1.5111928272247315 test_loss: 1.5312776565551758
epoch: 118 training_loss 1.5497144162654877 test_loss: 1.5523859977722168
epoch: 119 training_loss 1.5326596260070802 test_loss: 1.5196401596069335
epoch: 120 training_loss 1.5157433831691742 test_loss: 1.514519500732422
epoch: 121 training_loss 1.558358396291733 test_loss: 1.5200149536132812
epoch: 122 training_loss 1.5643910253047943 test_loss: 1.5626776695251465
epoch: 123 training_loss 1.5523694574832916 test_loss: 1.5172532081604004
epoch: 124 training_loss 1.5140669465065002 test_loss: 1.604496955871582
epoch: 125 training_loss 1.5258087182044984 test_loss: 1.501819133758545
epoch: 126 training_loss 1.5108884394168853 test_loss: 1.5179118156433105
epoch: 127 training_loss 1.5029333961009979 test_loss: 1.5260796546936035
epoch: 128 training_loss 1.5023032212257386 test_loss: 1.5402100563049317
epoch: 129 training_loss 1.498331642150879 test_loss: 1.5571464538574218
epoch: 130 training_loss 1.5131889009475707 test_loss: 1.4840205192565918
epoch: 131 training_loss 1.53371839761734 test_loss: 1.4617606163024903
epoch: 132 training_loss 1.500981639623642 test_loss: 1.5796584129333495
epoch: 133 training_loss 1.505518844127655 test_loss: 1.543771743774414
epoch: 134 training_loss 1.4883943021297454 test_loss: 1.4783693313598634
epoch: 135 training_loss 1.477930715084076 test_loss: 1.4432724952697753
epoch: 136 training_loss 1.5030096662044525 test_loss: 1.4936570167541503
epoch: 137 training_loss 1.4877216243743896 test_loss: 1.5175271034240723
epoch: 138 training_loss 1.5091239976882935 test_loss: 1.4765524864196777
epoch: 139 training_loss 1.4927544593811035 test_loss: 1.4748163223266602
epoch: 140 training_loss 1.475749226808548 test_loss: 1.4493081092834472
epoch: 141 training_loss 1.488617616891861 test_loss: 1.484952163696289
epoch: 142 training_loss 1.4712670981884002 test_loss: 1.46853666305542
epoch: 143 training_loss 1.4669472575187683 test_loss: 1.4794992446899413
epoch: 144 training_loss 1.4509234237670898 test_loss: 1.4467233657836913
epoch: 145 training_loss 1.4712133288383484 test_loss: 1.5411620140075684
epoch: 146 training_loss 1.4673163628578185 test_loss: 1.5177522659301759
epoch: 147 training_loss 1.4576203429698944 test_loss: 1.4894482612609863
epoch: 148 training_loss 1.4700144052505493 test_loss: 1.4866743087768555
epoch: 149 training_loss 1.500046352148056 test_loss: 1.5227810859680175
5079.350355067565
episode: 0 training return: tensor(-131.9222, device='cuda:0')
episode: 1 training return: tensor(-95.3701, device='cuda:0')
episode: 2 training return: tensor(-83.5640, device='cuda:0')
episode: 3 training return: tensor(-141.6635, device='cuda:0')
epoch: 1 test_true_pfm: 5220.232294141079 sim_pfm: 37.33213237048282
episode: 4 training return: tensor(-181.1830, device='cuda:0')
episode: 5 training return: tensor(-154.4628, device='cuda:0')
episode: 6 training return: tensor(-234.2014, device='cuda:0')
episode: 7 training return: tensor(-86.2687, device='cuda:0')
epoch: 2 test_true_pfm: 5008.14164494969 sim_pfm: 39.3839696556873
episode: 8 training return: tensor(-247.8502, device='cuda:0')
episode: 9 training return: tensor(-90.6120, device='cuda:0')
episode: 10 training return: tensor(-62.4706, device='cuda:0')
episode: 11 training return: tensor(-36.2689, device='cuda:0')
epoch: 3 test_true_pfm: 4995.450178977066 sim_pfm: 2.768729620031081
episode: 12 training return: tensor(4.2021, device='cuda:0')
episode: 13 training return: tensor(29.9932, device='cuda:0')
episode: 14 training return: tensor(52.5721, device='cuda:0')
episode: 15 training return: tensor(-76.5902, device='cuda:0')
epoch: 4 test_true_pfm: 5165.313497023321 sim_pfm: 106.58761436482503
episode: 16 training return: tensor(11.8444, device='cuda:0')
episode: 17 training return: tensor(105.7654, device='cuda:0')
episode: 18 training return: tensor(-94.3726, device='cuda:0')
episode: 19 training return: tensor(-159.0358, device='cuda:0')
epoch: 5 test_true_pfm: 5026.384373749801 sim_pfm: 145.4058600544037
episode: 20 training return: tensor(-56.6063, device='cuda:0')
episode: 21 training return: tensor(-32.1344, device='cuda:0')
episode: 22 training return: tensor(-92.5176, device='cuda:0')
episode: 23 training return: tensor(-122.3107, device='cuda:0')
epoch: 6 test_true_pfm: 5171.672456401891 sim_pfm: 111.07674835267244
episode: 24 training return: tensor(4.9930, device='cuda:0')
episode: 25 training return: tensor(-30.0565, device='cuda:0')
episode: 26 training return: tensor(-91.0962, device='cuda:0')
episode: 27 training return: tensor(-130.0323, device='cuda:0')
epoch: 7 test_true_pfm: 5081.921876906999 sim_pfm: 151.62016808086386
episode: 28 training return: tensor(-33.5549, device='cuda:0')
episode: 29 training return: tensor(90.9908, device='cuda:0')
episode: 30 training return: tensor(-51.7702, device='cuda:0')
episode: 31 training return: tensor(-94.2068, device='cuda:0')
epoch: 8 test_true_pfm: 5012.438407854519 sim_pfm: 61.62786771201839
episode: 32 training return: tensor(84.5254, device='cuda:0')
episode: 33 training return: tensor(22.5627, device='cuda:0')
episode: 34 training return: tensor(41.8960, device='cuda:0')
episode: 35 training return: tensor(-139.1292, device='cuda:0')
epoch: 9 test_true_pfm: 5308.642605104679 sim_pfm: 162.8799313218915
episode: 36 training return: tensor(13.3297, device='cuda:0')
episode: 37 training return: tensor(-62.7008, device='cuda:0')
episode: 38 training return: tensor(-63.6965, device='cuda:0')
episode: 39 training return: tensor(-0.9629, device='cuda:0')
epoch: 10 test_true_pfm: 5189.6291114839705 sim_pfm: 159.79479568630146
episode: 40 training return: tensor(7.1937, device='cuda:0')
episode: 41 training return: tensor(-88.3146, device='cuda:0')
episode: 42 training return: tensor(73.2192, device='cuda:0')
episode: 43 training return: tensor(55.3362, device='cuda:0')
epoch: 11 test_true_pfm: 5288.363714078868 sim_pfm: 292.01315386088874
episode: 44 training return: tensor(55.1647, device='cuda:0')
episode: 45 training return: tensor(-4.2438, device='cuda:0')
episode: 46 training return: tensor(-64.8158, device='cuda:0')
episode: 47 training return: tensor(0.8747, device='cuda:0')
epoch: 12 test_true_pfm: 5368.349504837393 sim_pfm: 165.57626417669235
episode: 48 training return: tensor(38.3085, device='cuda:0')
episode: 49 training return: tensor(-13.2096, device='cuda:0')
episode: 50 training return: tensor(-14.8258, device='cuda:0')
episode: 51 training return: tensor(44.9530, device='cuda:0')
epoch: 13 test_true_pfm: 5074.07892471196 sim_pfm: 262.1876954190666
episode: 52 training return: tensor(185.1852, device='cuda:0')
episode: 53 training return: tensor(188.5062, device='cuda:0')
episode: 54 training return: tensor(135.7097, device='cuda:0')
episode: 55 training return: tensor(74.9866, device='cuda:0')
epoch: 14 test_true_pfm: 5463.44771163813 sim_pfm: 350.3192683485201
episode: 56 training return: tensor(58.6708, device='cuda:0')
episode: 57 training return: tensor(33.5621, device='cuda:0')
episode: 58 training return: tensor(98.4853, device='cuda:0')
episode: 59 training return: tensor(133.9615, device='cuda:0')
epoch: 15 test_true_pfm: 5509.8900034426615 sim_pfm: 258.2756158568275
episode: 60 training return: tensor(81.4070, device='cuda:0')
episode: 61 training return: tensor(142.0224, device='cuda:0')
episode: 62 training return: tensor(223.2963, device='cuda:0')
episode: 63 training return: tensor(109.3090, device='cuda:0')
epoch: 16 test_true_pfm: 5461.398706835021 sim_pfm: 322.22683402561233
episode: 64 training return: tensor(63.4527, device='cuda:0')
episode: 65 training return: tensor(117.2597, device='cuda:0')
episode: 66 training return: tensor(36.0444, device='cuda:0')
episode: 67 training return: tensor(68.5223, device='cuda:0')
epoch: 17 test_true_pfm: 5449.397246508883 sim_pfm: 40.93378814145884
episode: 68 training return: tensor(9.2639, device='cuda:0')
episode: 69 training return: tensor(122.3839, device='cuda:0')
episode: 70 training return: tensor(116.8870, device='cuda:0')
episode: 71 training return: tensor(156.5824, device='cuda:0')
epoch: 18 test_true_pfm: 5545.53990478085 sim_pfm: 355.37356999264256
episode: 72 training return: tensor(79.8569, device='cuda:0')
episode: 73 training return: tensor(96.6417, device='cuda:0')
episode: 74 training return: tensor(124.8720, device='cuda:0')
episode: 75 training return: tensor(229.6864, device='cuda:0')
epoch: 19 test_true_pfm: 5552.459482827795 sim_pfm: 369.1128845321946
episode: 76 training return: tensor(238.6876, device='cuda:0')
episode: 77 training return: tensor(132.7334, device='cuda:0')
episode: 78 training return: tensor(263.9147, device='cuda:0')
episode: 79 training return: tensor(154.7694, device='cuda:0')
epoch: 20 test_true_pfm: 5509.276922039128 sim_pfm: 347.3327772707756
episode: 80 training return: tensor(154.2627, device='cuda:0')
episode: 81 training return: tensor(94.5485, device='cuda:0')
episode: 82 training return: tensor(241.7147, device='cuda:0')
episode: 83 training return: tensor(217.4127, device='cuda:0')
epoch: 21 test_true_pfm: 5521.638307101348 sim_pfm: 313.36716678587254
episode: 84 training return: tensor(229.9640, device='cuda:0')
episode: 85 training return: tensor(130.0715, device='cuda:0')
episode: 86 training return: tensor(189.7816, device='cuda:0')
episode: 87 training return: tensor(238.1731, device='cuda:0')
epoch: 22 test_true_pfm: 5604.0006130081265 sim_pfm: 360.3818564352114
episode: 88 training return: tensor(-658.9169, device='cuda:0')
episode: 89 training return: tensor(-915.0175, device='cuda:0')
episode: 90 training return: tensor(163.6459, device='cuda:0')
episode: 91 training return: tensor(285.1687, device='cuda:0')
epoch: 23 test_true_pfm: 5489.854784645409 sim_pfm: 303.68575129002176
episode: 92 training return: tensor(147.9947, device='cuda:0')
episode: 93 training return: tensor(209.7110, device='cuda:0')
episode: 94 training return: tensor(232.9468, device='cuda:0')
episode: 95 training return: tensor(152.3777, device='cuda:0')
epoch: 24 test_true_pfm: 5614.711104217109 sim_pfm: 386.8683984244902
episode: 96 training return: tensor(258.7202, device='cuda:0')
episode: 97 training return: tensor(192.8894, device='cuda:0')
episode: 98 training return: tensor(166.6660, device='cuda:0')
episode: 99 training return: tensor(323.3201, device='cuda:0')
epoch: 25 test_true_pfm: 5554.268327621699 sim_pfm: 369.5994353411273
episode: 100 training return: tensor(294.5867, device='cuda:0')
episode: 101 training return: tensor(208.9421, device='cuda:0')
episode: 102 training return: tensor(184.9019, device='cuda:0')
episode: 103 training return: tensor(288.8827, device='cuda:0')
epoch: 26 test_true_pfm: 5589.2441920720275 sim_pfm: 325.05657450427924
episode: 104 training return: tensor(152.5140, device='cuda:0')
episode: 105 training return: tensor(154.8279, device='cuda:0')
episode: 106 training return: tensor(283.1533, device='cuda:0')
episode: 107 training return: tensor(132.7954, device='cuda:0')
epoch: 27 test_true_pfm: 5623.648818977702 sim_pfm: 346.23483870754717
episode: 108 training return: tensor(216.0990, device='cuda:0')
episode: 109 training return: tensor(196.2154, device='cuda:0')
episode: 110 training return: tensor(296.0286, device='cuda:0')
episode: 111 training return: tensor(208.5616, device='cuda:0')
epoch: 28 test_true_pfm: 5610.99779152893 sim_pfm: 402.4947470713717
episode: 112 training return: tensor(319.4838, device='cuda:0')
episode: 113 training return: tensor(282.4958, device='cuda:0')
episode: 114 training return: tensor(300.6214, device='cuda:0')
episode: 115 training return: tensor(150.5595, device='cuda:0')
epoch: 29 test_true_pfm: 5685.495906430769 sim_pfm: 483.00090190250194
episode: 116 training return: tensor(268.7273, device='cuda:0')
episode: 117 training return: tensor(328.6490, device='cuda:0')
episode: 118 training return: tensor(355.9257, device='cuda:0')
episode: 119 training return: tensor(289.5943, device='cuda:0')
epoch: 30 test_true_pfm: 5668.41014214859 sim_pfm: 465.50148586324457
episode: 120 training return: tensor(242.7852, device='cuda:0')
episode: 121 training return: tensor(296.4460, device='cuda:0')
episode: 122 training return: tensor(302.8632, device='cuda:0')
episode: 123 training return: tensor(309.3818, device='cuda:0')
epoch: 31 test_true_pfm: 5790.078736853661 sim_pfm: 451.8013075585089
episode: 124 training return: tensor(243.9041, device='cuda:0')
episode: 125 training return: tensor(366.9478, device='cuda:0')
episode: 126 training return: tensor(253.4493, device='cuda:0')
episode: 127 training return: tensor(196.6656, device='cuda:0')
epoch: 32 test_true_pfm: 5595.508457560659 sim_pfm: 462.20738061659114
episode: 128 training return: tensor(279.8289, device='cuda:0')
episode: 129 training return: tensor(257.0826, device='cuda:0')
episode: 130 training return: tensor(203.1373, device='cuda:0')
episode: 131 training return: tensor(350.7919, device='cuda:0')
epoch: 33 test_true_pfm: 5653.54852887082 sim_pfm: 437.1736328496093
episode: 132 training return: tensor(266.8052, device='cuda:0')
episode: 133 training return: tensor(333.6034, device='cuda:0')
episode: 134 training return: tensor(388.5449, device='cuda:0')
episode: 135 training return: tensor(387.5475, device='cuda:0')
epoch: 34 test_true_pfm: 5748.1336186968665 sim_pfm: 389.27908438675996
episode: 136 training return: tensor(302.4680, device='cuda:0')
episode: 137 training return: tensor(254.4191, device='cuda:0')
episode: 138 training return: tensor(330.6678, device='cuda:0')
episode: 139 training return: tensor(322.1917, device='cuda:0')
epoch: 35 test_true_pfm: 5714.031425402335 sim_pfm: 461.10558625886915
episode: 140 training return: tensor(328.6598, device='cuda:0')
episode: 141 training return: tensor(413.5642, device='cuda:0')
episode: 142 training return: tensor(356.2698, device='cuda:0')
episode: 143 training return: tensor(229.4720, device='cuda:0')
epoch: 36 test_true_pfm: 5751.581746016355 sim_pfm: 478.16481184590765
episode: 144 training return: tensor(272.9370, device='cuda:0')
episode: 145 training return: tensor(253.6736, device='cuda:0')
episode: 146 training return: tensor(352.6931, device='cuda:0')
episode: 147 training return: tensor(291.4653, device='cuda:0')
epoch: 37 test_true_pfm: 5785.80941039488 sim_pfm: 478.5376728513899
episode: 148 training return: tensor(220.5388, device='cuda:0')
episode: 149 training return: tensor(317.3456, device='cuda:0')
episode: 150 training return: tensor(259.4821, device='cuda:0')
episode: 151 training return: tensor(363.3455, device='cuda:0')
epoch: 38 test_true_pfm: 5770.700113849724 sim_pfm: 529.2138067647078
episode: 152 training return: tensor(303.7746, device='cuda:0')
episode: 153 training return: tensor(298.5833, device='cuda:0')
episode: 154 training return: tensor(320.3676, device='cuda:0')
episode: 155 training return: tensor(377.4900, device='cuda:0')
epoch: 39 test_true_pfm: 5828.1348969244755 sim_pfm: 471.9769457552854
episode: 156 training return: tensor(300.5785, device='cuda:0')
episode: 157 training return: tensor(257.9601, device='cuda:0')
episode: 158 training return: tensor(356.3531, device='cuda:0')
episode: 159 training return: tensor(384.5746, device='cuda:0')
epoch: 40 test_true_pfm: 5775.0543285260255 sim_pfm: 496.62348881115514
episode: 160 training return: tensor(303.0908, device='cuda:0')
episode: 161 training return: tensor(320.2914, device='cuda:0')
episode: 162 training return: tensor(334.4175, device='cuda:0')
episode: 163 training return: tensor(395.6800, device='cuda:0')
epoch: 41 test_true_pfm: 5810.34339553999 sim_pfm: 517.9181380672186
episode: 164 training return: tensor(379.2099, device='cuda:0')
episode: 165 training return: tensor(292.4941, device='cuda:0')
episode: 166 training return: tensor(302.7693, device='cuda:0')
episode: 167 training return: tensor(379.1221, device='cuda:0')
epoch: 42 test_true_pfm: 5823.860439848931 sim_pfm: 522.496924152947
episode: 168 training return: tensor(485.1475, device='cuda:0')
episode: 169 training return: tensor(280.5528, device='cuda:0')
episode: 170 training return: tensor(346.4532, device='cuda:0')
episode: 171 training return: tensor(465.2267, device='cuda:0')
epoch: 43 test_true_pfm: 5728.733513242335 sim_pfm: 456.63331582439906
episode: 172 training return: tensor(442.5223, device='cuda:0')
episode: 173 training return: tensor(382.3096, device='cuda:0')
episode: 174 training return: tensor(399.3404, device='cuda:0')
episode: 175 training return: tensor(367.9882, device='cuda:0')
epoch: 44 test_true_pfm: 5824.298790506648 sim_pfm: 520.3347623953887
episode: 176 training return: tensor(361.9879, device='cuda:0')
episode: 177 training return: tensor(306.2923, device='cuda:0')
episode: 178 training return: tensor(325.5917, device='cuda:0')
episode: 179 training return: tensor(392.9608, device='cuda:0')
epoch: 45 test_true_pfm: 5825.1893520999365 sim_pfm: 553.5453382863974
episode: 180 training return: tensor(329.7294, device='cuda:0')
episode: 181 training return: tensor(361.1575, device='cuda:0')
episode: 182 training return: tensor(436.0614, device='cuda:0')
episode: 183 training return: tensor(338.5759, device='cuda:0')
epoch: 46 test_true_pfm: 5769.8295420735085 sim_pfm: 485.29115900128573
episode: 184 training return: tensor(397.3121, device='cuda:0')
episode: 185 training return: tensor(397.2766, device='cuda:0')
episode: 186 training return: tensor(319.6416, device='cuda:0')
episode: 187 training return: tensor(318.7619, device='cuda:0')
epoch: 47 test_true_pfm: 5837.197814358676 sim_pfm: 498.54663214522105
episode: 188 training return: tensor(269.1425, device='cuda:0')
episode: 189 training return: tensor(323.2918, device='cuda:0')
episode: 190 training return: tensor(382.8115, device='cuda:0')
episode: 191 training return: tensor(303.0026, device='cuda:0')
epoch: 48 test_true_pfm: 5824.992262024636 sim_pfm: 519.9763493904611
episode: 192 training return: tensor(318.8454, device='cuda:0')
episode: 193 training return: tensor(363.5547, device='cuda:0')
episode: 194 training return: tensor(389.5770, device='cuda:0')
episode: 195 training return: tensor(330.5676, device='cuda:0')
epoch: 49 test_true_pfm: 5847.751961323163 sim_pfm: 514.592152097864
episode: 196 training return: tensor(379.2064, device='cuda:0')
episode: 197 training return: tensor(433.2380, device='cuda:0')
episode: 198 training return: tensor(382.7548, device='cuda:0')
episode: 199 training return: tensor(301.6773, device='cuda:0')
epoch: 50 test_true_pfm: 5802.26890127044 sim_pfm: 503.5272185029462
episode: 200 training return: tensor(465.8738, device='cuda:0')
episode: 201 training return: tensor(394.5905, device='cuda:0')
episode: 202 training return: tensor(337.0067, device='cuda:0')
episode: 203 training return: tensor(440.8607, device='cuda:0')
epoch: 51 test_true_pfm: 5843.265132990756 sim_pfm: 550.946766063765
episode: 204 training return: tensor(340.4609, device='cuda:0')
episode: 205 training return: tensor(487.6776, device='cuda:0')
episode: 206 training return: tensor(393.4832, device='cuda:0')
episode: 207 training return: tensor(427.5936, device='cuda:0')
epoch: 52 test_true_pfm: 5806.14234830507 sim_pfm: 574.420919626505
episode: 208 training return: tensor(228.6335, device='cuda:0')
episode: 209 training return: tensor(276.3630, device='cuda:0')
episode: 210 training return: tensor(389.4155, device='cuda:0')
episode: 211 training return: tensor(448.1358, device='cuda:0')
epoch: 53 test_true_pfm: 5862.02627775951 sim_pfm: 523.9762636172042
episode: 212 training return: tensor(410.0587, device='cuda:0')
episode: 213 training return: tensor(364.7676, device='cuda:0')
episode: 214 training return: tensor(346.1483, device='cuda:0')
episode: 215 training return: tensor(462.2555, device='cuda:0')
epoch: 54 test_true_pfm: 5851.017384568168 sim_pfm: 544.8562804558411
episode: 216 training return: tensor(284.8259, device='cuda:0')
episode: 217 training return: tensor(406.6493, device='cuda:0')
episode: 218 training return: tensor(473.4517, device='cuda:0')
episode: 219 training return: tensor(456.4720, device='cuda:0')
epoch: 55 test_true_pfm: 5874.128293079585 sim_pfm: 568.9368060033206
episode: 220 training return: tensor(369.2886, device='cuda:0')
episode: 221 training return: tensor(328.0422, device='cuda:0')
episode: 222 training return: tensor(452.0331, device='cuda:0')
episode: 223 training return: tensor(351.3068, device='cuda:0')
epoch: 56 test_true_pfm: 5828.689092627788 sim_pfm: 541.4455719248703
episode: 224 training return: tensor(464.3389, device='cuda:0')
episode: 225 training return: tensor(435.1660, device='cuda:0')
episode: 226 training return: tensor(330.5639, device='cuda:0')
episode: 227 training return: tensor(457.1081, device='cuda:0')
epoch: 57 test_true_pfm: 5924.375101295867 sim_pfm: 624.3359798420375
episode: 228 training return: tensor(417.4550, device='cuda:0')
episode: 229 training return: tensor(337.9832, device='cuda:0')
episode: 230 training return: tensor(461.9246, device='cuda:0')
episode: 231 training return: tensor(439.9850, device='cuda:0')
epoch: 58 test_true_pfm: 5860.787922984157 sim_pfm: 567.9316072207876
episode: 232 training return: tensor(400.0260, device='cuda:0')
episode: 233 training return: tensor(426.7303, device='cuda:0')
episode: 234 training return: tensor(410.7112, device='cuda:0')
episode: 235 training return: tensor(419.2656, device='cuda:0')
epoch: 59 test_true_pfm: 5971.180743214052 sim_pfm: 587.88118714554
episode: 236 training return: tensor(492.7943, device='cuda:0')
episode: 237 training return: tensor(385.0861, device='cuda:0')
episode: 238 training return: tensor(426.4034, device='cuda:0')
episode: 239 training return: tensor(450.6376, device='cuda:0')
epoch: 60 test_true_pfm: 5888.828799713233 sim_pfm: 603.9381112618527
episode: 240 training return: tensor(431.0836, device='cuda:0')
episode: 241 training return: tensor(274.0577, device='cuda:0')
episode: 242 training return: tensor(468.5453, device='cuda:0')
episode: 243 training return: tensor(468.6631, device='cuda:0')
epoch: 61 test_true_pfm: 5916.4863580960855 sim_pfm: 545.2780528852405
episode: 244 training return: tensor(384.7448, device='cuda:0')
episode: 245 training return: tensor(380.5632, device='cuda:0')
episode: 246 training return: tensor(472.6460, device='cuda:0')
episode: 247 training return: tensor(345.5980, device='cuda:0')
epoch: 62 test_true_pfm: 5821.362145131319 sim_pfm: 545.6654934224401
episode: 248 training return: tensor(413.2636, device='cuda:0')
episode: 249 training return: tensor(362.8534, device='cuda:0')
episode: 250 training return: tensor(381.7262, device='cuda:0')
episode: 251 training return: tensor(434.6835, device='cuda:0')
epoch: 63 test_true_pfm: 5890.144552949064 sim_pfm: 568.3792048215109
episode: 252 training return: tensor(433.0798, device='cuda:0')
episode: 253 training return: tensor(440.1938, device='cuda:0')
episode: 254 training return: tensor(480.6295, device='cuda:0')
episode: 255 training return: tensor(461.3165, device='cuda:0')
epoch: 64 test_true_pfm: 5985.819212174359 sim_pfm: 633.2366288298703
episode: 256 training return: tensor(447.8308, device='cuda:0')
episode: 257 training return: tensor(460.5420, device='cuda:0')
episode: 258 training return: tensor(486.3065, device='cuda:0')
episode: 259 training return: tensor(518.7651, device='cuda:0')
epoch: 65 test_true_pfm: 5891.136194487718 sim_pfm: 567.3046437713783
episode: 260 training return: tensor(400.1500, device='cuda:0')
episode: 261 training return: tensor(304.8047, device='cuda:0')
episode: 262 training return: tensor(436.8778, device='cuda:0')
episode: 263 training return: tensor(430.8101, device='cuda:0')
epoch: 66 test_true_pfm: 5885.845650487521 sim_pfm: 535.4566375341188
episode: 264 training return: tensor(510.1970, device='cuda:0')
episode: 265 training return: tensor(429.8845, device='cuda:0')
episode: 266 training return: tensor(471.0601, device='cuda:0')
episode: 267 training return: tensor(425.2398, device='cuda:0')
epoch: 67 test_true_pfm: 5867.035682352153 sim_pfm: 542.3270761415575
episode: 268 training return: tensor(345.8981, device='cuda:0')
episode: 269 training return: tensor(452.7756, device='cuda:0')
episode: 270 training return: tensor(456.6115, device='cuda:0')
episode: 271 training return: tensor(497.1647, device='cuda:0')
epoch: 68 test_true_pfm: 5917.960836408355 sim_pfm: 604.4323920645111
episode: 272 training return: tensor(445.6718, device='cuda:0')
episode: 273 training return: tensor(444.5620, device='cuda:0')
episode: 274 training return: tensor(460.3995, device='cuda:0')
episode: 275 training return: tensor(497.2047, device='cuda:0')
epoch: 69 test_true_pfm: 5900.7852151249 sim_pfm: 559.8819234269904
episode: 276 training return: tensor(472.4171, device='cuda:0')
episode: 277 training return: tensor(439.7950, device='cuda:0')
episode: 278 training return: tensor(469.6371, device='cuda:0')
episode: 279 training return: tensor(442.2630, device='cuda:0')
epoch: 70 test_true_pfm: 5990.581728061693 sim_pfm: 568.4959314668085
episode: 280 training return: tensor(385.5261, device='cuda:0')
episode: 281 training return: tensor(490.4399, device='cuda:0')
episode: 282 training return: tensor(290.8536, device='cuda:0')
episode: 283 training return: tensor(487.0778, device='cuda:0')
epoch: 71 test_true_pfm: 5911.584110148255 sim_pfm: 594.418781890689
episode: 284 training return: tensor(448.6288, device='cuda:0')
episode: 285 training return: tensor(427.5230, device='cuda:0')
episode: 286 training return: tensor(467.6094, device='cuda:0')
episode: 287 training return: tensor(443.6815, device='cuda:0')
epoch: 72 test_true_pfm: 5931.890410121775 sim_pfm: 593.7920802150815
episode: 288 training return: tensor(432.9957, device='cuda:0')
episode: 289 training return: tensor(445.4849, device='cuda:0')
episode: 290 training return: tensor(410.6789, device='cuda:0')
episode: 291 training return: tensor(364.8651, device='cuda:0')
epoch: 73 test_true_pfm: 5991.408387738943 sim_pfm: 586.7469195493419
episode: 292 training return: tensor(498.4232, device='cuda:0')
episode: 293 training return: tensor(428.4780, device='cuda:0')
episode: 294 training return: tensor(415.5548, device='cuda:0')
episode: 295 training return: tensor(402.2386, device='cuda:0')
epoch: 74 test_true_pfm: 5941.443225524369 sim_pfm: 602.8272500335783
episode: 296 training return: tensor(450.0165, device='cuda:0')
episode: 297 training return: tensor(516.2415, device='cuda:0')
episode: 298 training return: tensor(389.7642, device='cuda:0')
episode: 299 training return: tensor(469.8416, device='cuda:0')
epoch: 75 test_true_pfm: 6027.357320326785 sim_pfm: 612.6175393119823
episode: 300 training return: tensor(541.2625, device='cuda:0')
episode: 301 training return: tensor(348.2219, device='cuda:0')
episode: 302 training return: tensor(334.5451, device='cuda:0')
episode: 303 training return: tensor(403.6231, device='cuda:0')
epoch: 76 test_true_pfm: 5899.118756439191 sim_pfm: 578.6307394805675
episode: 304 training return: tensor(433.0484, device='cuda:0')
episode: 305 training return: tensor(392.4966, device='cuda:0')
episode: 306 training return: tensor(525.3693, device='cuda:0')
episode: 307 training return: tensor(402.0132, device='cuda:0')
epoch: 77 test_true_pfm: 6018.8443188987185 sim_pfm: 611.4288273575658
episode: 308 training return: tensor(277.1161, device='cuda:0')
episode: 309 training return: tensor(486.4629, device='cuda:0')
episode: 310 training return: tensor(531.2728, device='cuda:0')
episode: 311 training return: tensor(416.0724, device='cuda:0')
epoch: 78 test_true_pfm: 5970.057988323911 sim_pfm: 591.199404467654
episode: 312 training return: tensor(445.7742, device='cuda:0')
episode: 313 training return: tensor(423.8835, device='cuda:0')
episode: 314 training return: tensor(456.0952, device='cuda:0')
episode: 315 training return: tensor(481.9234, device='cuda:0')
epoch: 79 test_true_pfm: 5962.105435037115 sim_pfm: 596.9063617128801
episode: 316 training return: tensor(359.2391, device='cuda:0')
episode: 317 training return: tensor(537.2565, device='cuda:0')
episode: 318 training return: tensor(433.1106, device='cuda:0')
episode: 319 training return: tensor(490.5911, device='cuda:0')
epoch: 80 test_true_pfm: 5942.750489791746 sim_pfm: 608.5015990066653
episode: 320 training return: tensor(432.1019, device='cuda:0')
episode: 321 training return: tensor(431.8315, device='cuda:0')
episode: 322 training return: tensor(450.0152, device='cuda:0')
episode: 323 training return: tensor(489.8315, device='cuda:0')
epoch: 81 test_true_pfm: 5981.744833509329 sim_pfm: 570.9349066235009
episode: 324 training return: tensor(473.1722, device='cuda:0')
episode: 325 training return: tensor(451.2396, device='cuda:0')
episode: 326 training return: tensor(518.3089, device='cuda:0')
episode: 327 training return: tensor(514.5090, device='cuda:0')
epoch: 82 test_true_pfm: 5937.101759947286 sim_pfm: 631.0760586237496
episode: 328 training return: tensor(518.9841, device='cuda:0')
episode: 329 training return: tensor(367.0795, device='cuda:0')
episode: 330 training return: tensor(433.1081, device='cuda:0')
episode: 331 training return: tensor(527.8276, device='cuda:0')
epoch: 83 test_true_pfm: 5909.587648987873 sim_pfm: 605.1741486281875
episode: 332 training return: tensor(553.6094, device='cuda:0')
episode: 333 training return: tensor(501.2188, device='cuda:0')
episode: 334 training return: tensor(539.6584, device='cuda:0')
episode: 335 training return: tensor(546.6896, device='cuda:0')
epoch: 84 test_true_pfm: 5931.724060576143 sim_pfm: 579.4113771915048
episode: 336 training return: tensor(449.7964, device='cuda:0')
episode: 337 training return: tensor(405.9290, device='cuda:0')
episode: 338 training return: tensor(499.6151, device='cuda:0')
episode: 339 training return: tensor(520.9142, device='cuda:0')
epoch: 85 test_true_pfm: 5867.505237230785 sim_pfm: 597.2384552257136
episode: 340 training return: tensor(456.9426, device='cuda:0')
episode: 341 training return: tensor(506.3149, device='cuda:0')
episode: 342 training return: tensor(499.8285, device='cuda:0')
episode: 343 training return: tensor(446.4993, device='cuda:0')
epoch: 86 test_true_pfm: 5988.101390747339 sim_pfm: 597.1039473185956
episode: 344 training return: tensor(386.0936, device='cuda:0')
episode: 345 training return: tensor(392.0746, device='cuda:0')
episode: 346 training return: tensor(450.5697, device='cuda:0')
episode: 347 training return: tensor(466.5093, device='cuda:0')
epoch: 87 test_true_pfm: 5968.268987933331 sim_pfm: 612.8358762889887
episode: 348 training return: tensor(546.9060, device='cuda:0')
episode: 349 training return: tensor(563.2292, device='cuda:0')
episode: 350 training return: tensor(538.5551, device='cuda:0')
episode: 351 training return: tensor(539.2515, device='cuda:0')
epoch: 88 test_true_pfm: 5972.126954555271 sim_pfm: 649.4555967271832
episode: 352 training return: tensor(479.3493, device='cuda:0')
episode: 353 training return: tensor(397.6592, device='cuda:0')
episode: 354 training return: tensor(494.0887, device='cuda:0')
episode: 355 training return: tensor(416.7584, device='cuda:0')
epoch: 89 test_true_pfm: 6015.9786814651525 sim_pfm: 588.0914224096632
episode: 356 training return: tensor(498.9404, device='cuda:0')
episode: 357 training return: tensor(503.0984, device='cuda:0')
episode: 358 training return: tensor(520.4175, device='cuda:0')
episode: 359 training return: tensor(485.2216, device='cuda:0')
epoch: 90 test_true_pfm: 6017.708459922714 sim_pfm: 618.8248049224009
episode: 360 training return: tensor(471.8115, device='cuda:0')
episode: 361 training return: tensor(473.6586, device='cuda:0')
episode: 362 training return: tensor(454.6299, device='cuda:0')
episode: 363 training return: tensor(463.3678, device='cuda:0')
epoch: 91 test_true_pfm: 5953.251381231393 sim_pfm: 626.5704572456016
episode: 364 training return: tensor(496.6284, device='cuda:0')
episode: 365 training return: tensor(524.8729, device='cuda:0')
episode: 366 training return: tensor(480.4053, device='cuda:0')
episode: 367 training return: tensor(479.1794, device='cuda:0')
epoch: 92 test_true_pfm: 5983.8967042420445 sim_pfm: 641.7328614464495
episode: 368 training return: tensor(507.3520, device='cuda:0')
episode: 369 training return: tensor(499.6741, device='cuda:0')
episode: 370 training return: tensor(487.4472, device='cuda:0')
episode: 371 training return: tensor(480.8083, device='cuda:0')
epoch: 93 test_true_pfm: 6024.152664014524 sim_pfm: 612.4854825220149
episode: 372 training return: tensor(503.5302, device='cuda:0')
episode: 373 training return: tensor(462.4289, device='cuda:0')
episode: 374 training return: tensor(444.3736, device='cuda:0')
episode: 375 training return: tensor(449.1292, device='cuda:0')
epoch: 94 test_true_pfm: 5963.630694228086 sim_pfm: 605.3543121065208
episode: 376 training return: tensor(352.3990, device='cuda:0')
episode: 377 training return: tensor(499.4307, device='cuda:0')
episode: 378 training return: tensor(516.5285, device='cuda:0')
episode: 379 training return: tensor(503.1188, device='cuda:0')
epoch: 95 test_true_pfm: 6003.41013063893 sim_pfm: 581.0271584723765
episode: 380 training return: tensor(498.0278, device='cuda:0')
episode: 381 training return: tensor(535.2685, device='cuda:0')
episode: 382 training return: tensor(519.1862, device='cuda:0')
episode: 383 training return: tensor(465.7821, device='cuda:0')
epoch: 96 test_true_pfm: 5999.869816699361 sim_pfm: 626.9214484062201
episode: 384 training return: tensor(526.3145, device='cuda:0')
episode: 385 training return: tensor(458.2306, device='cuda:0')
episode: 386 training return: tensor(559.5048, device='cuda:0')
episode: 387 training return: tensor(473.7113, device='cuda:0')
epoch: 97 test_true_pfm: 6040.820279068449 sim_pfm: 605.9857286679326
episode: 388 training return: tensor(449.1666, device='cuda:0')
episode: 389 training return: tensor(417.6212, device='cuda:0')
episode: 390 training return: tensor(501.2596, device='cuda:0')
episode: 391 training return: tensor(540.4160, device='cuda:0')
epoch: 98 test_true_pfm: 6043.500878743373 sim_pfm: 630.0373288611881
episode: 392 training return: tensor(472.0537, device='cuda:0')
episode: 393 training return: tensor(481.1560, device='cuda:0')
episode: 394 training return: tensor(521.2643, device='cuda:0')
episode: 395 training return: tensor(463.4882, device='cuda:0')
epoch: 99 test_true_pfm: 6092.938544655729 sim_pfm: 615.7838925821707
episode: 396 training return: tensor(479.5099, device='cuda:0')
episode: 397 training return: tensor(507.1274, device='cuda:0')
episode: 398 training return: tensor(486.9141, device='cuda:0')
episode: 399 training return: tensor(521.8490, device='cuda:0')
epoch: 100 test_true_pfm: 5982.22627052178 sim_pfm: 642.785812006332
episode: 400 training return: tensor(536.9243, device='cuda:0')
episode: 401 training return: tensor(467.5908, device='cuda:0')
episode: 402 training return: tensor(456.4599, device='cuda:0')
episode: 403 training return: tensor(522.5924, device='cuda:0')
epoch: 101 test_true_pfm: 5979.735400411716 sim_pfm: 617.418042988788
episode: 404 training return: tensor(531.8878, device='cuda:0')
episode: 405 training return: tensor(424.0809, device='cuda:0')
episode: 406 training return: tensor(449.0678, device='cuda:0')
episode: 407 training return: tensor(527.0510, device='cuda:0')
epoch: 102 test_true_pfm: 6057.468388345075 sim_pfm: 644.2836343830664
episode: 408 training return: tensor(543.4598, device='cuda:0')
episode: 409 training return: tensor(455.1017, device='cuda:0')
episode: 410 training return: tensor(477.6624, device='cuda:0')
episode: 411 training return: tensor(502.1498, device='cuda:0')
epoch: 103 test_true_pfm: 6065.476771940933 sim_pfm: 641.1948212565234
episode: 412 training return: tensor(491.8431, device='cuda:0')
episode: 413 training return: tensor(525.3740, device='cuda:0')
episode: 414 training return: tensor(499.5011, device='cuda:0')
episode: 415 training return: tensor(465.8407, device='cuda:0')
epoch: 104 test_true_pfm: 6027.169364786504 sim_pfm: 640.0745623985616
episode: 416 training return: tensor(542.2986, device='cuda:0')
episode: 417 training return: tensor(448.8474, device='cuda:0')
episode: 418 training return: tensor(470.6418, device='cuda:0')
episode: 419 training return: tensor(450.7082, device='cuda:0')
epoch: 105 test_true_pfm: 6012.298509154517 sim_pfm: 643.762023099931
episode: 420 training return: tensor(469.3925, device='cuda:0')
episode: 421 training return: tensor(482.0609, device='cuda:0')
episode: 422 training return: tensor(516.3542, device='cuda:0')
episode: 423 training return: tensor(487.2298, device='cuda:0')
epoch: 106 test_true_pfm: 5942.172949737251 sim_pfm: 584.3663389363792
episode: 424 training return: tensor(528.4528, device='cuda:0')
episode: 425 training return: tensor(531.6381, device='cuda:0')
episode: 426 training return: tensor(567.1272, device='cuda:0')
episode: 427 training return: tensor(487.5573, device='cuda:0')
epoch: 107 test_true_pfm: 6002.510188352651 sim_pfm: 635.6939159152486
episode: 428 training return: tensor(551.0578, device='cuda:0')
episode: 429 training return: tensor(509.6621, device='cuda:0')
episode: 430 training return: tensor(499.2231, device='cuda:0')
episode: 431 training return: tensor(502.4467, device='cuda:0')
epoch: 108 test_true_pfm: 6109.651253927379 sim_pfm: 640.4194573114413
episode: 432 training return: tensor(541.8263, device='cuda:0')
episode: 433 training return: tensor(453.7007, device='cuda:0')
episode: 434 training return: tensor(496.3802, device='cuda:0')
episode: 435 training return: tensor(456.3913, device='cuda:0')
epoch: 109 test_true_pfm: 6009.87329911438 sim_pfm: 642.0516808945686
episode: 436 training return: tensor(448.0114, device='cuda:0')
episode: 437 training return: tensor(444.3689, device='cuda:0')
episode: 438 training return: tensor(543.3219, device='cuda:0')
episode: 439 training return: tensor(529.6782, device='cuda:0')
epoch: 110 test_true_pfm: 6083.22096736627 sim_pfm: 650.7715048980123
episode: 440 training return: tensor(432.7361, device='cuda:0')
episode: 441 training return: tensor(491.3808, device='cuda:0')
episode: 442 training return: tensor(556.6430, device='cuda:0')
episode: 443 training return: tensor(508.4366, device='cuda:0')
epoch: 111 test_true_pfm: 6068.877143796623 sim_pfm: 662.0240206405675
episode: 444 training return: tensor(507.4904, device='cuda:0')
episode: 445 training return: tensor(496.2554, device='cuda:0')
episode: 446 training return: tensor(372.8742, device='cuda:0')
episode: 447 training return: tensor(558.2629, device='cuda:0')
epoch: 112 test_true_pfm: 6072.273682022438 sim_pfm: 632.1044498235375
episode: 448 training return: tensor(455.1649, device='cuda:0')
episode: 449 training return: tensor(550.0123, device='cuda:0')
episode: 450 training return: tensor(441.8085, device='cuda:0')
episode: 451 training return: tensor(443.1539, device='cuda:0')
epoch: 113 test_true_pfm: 5963.630553775038 sim_pfm: 635.7134293102814
episode: 452 training return: tensor(557.3340, device='cuda:0')
episode: 453 training return: tensor(454.0432, device='cuda:0')
episode: 454 training return: tensor(482.4443, device='cuda:0')
episode: 455 training return: tensor(528.8712, device='cuda:0')
epoch: 114 test_true_pfm: 5988.0416100704615 sim_pfm: 612.5221987387243
episode: 456 training return: tensor(554.8380, device='cuda:0')
episode: 457 training return: tensor(425.6604, device='cuda:0')
episode: 458 training return: tensor(485.1951, device='cuda:0')
episode: 459 training return: tensor(514.5933, device='cuda:0')
epoch: 115 test_true_pfm: 6093.914197906882 sim_pfm: 637.3102581367517
episode: 460 training return: tensor(462.2529, device='cuda:0')
episode: 461 training return: tensor(505.0258, device='cuda:0')
episode: 462 training return: tensor(582.1148, device='cuda:0')
episode: 463 training return: tensor(551.5588, device='cuda:0')
epoch: 116 test_true_pfm: 6046.707878911293 sim_pfm: 654.9779074722125
episode: 464 training return: tensor(522.6077, device='cuda:0')
episode: 465 training return: tensor(570.7747, device='cuda:0')
episode: 466 training return: tensor(560.5809, device='cuda:0')
episode: 467 training return: tensor(526.7066, device='cuda:0')
epoch: 117 test_true_pfm: 5985.549066155289 sim_pfm: 625.5354369124398
episode: 468 training return: tensor(510.2312, device='cuda:0')
episode: 469 training return: tensor(587.5403, device='cuda:0')
episode: 470 training return: tensor(556.6089, device='cuda:0')
episode: 471 training return: tensor(476.1401, device='cuda:0')
epoch: 118 test_true_pfm: 6081.724146484889 sim_pfm: 660.818298543784
episode: 472 training return: tensor(438.5130, device='cuda:0')
episode: 473 training return: tensor(567.0990, device='cuda:0')
episode: 474 training return: tensor(509.3142, device='cuda:0')
episode: 475 training return: tensor(534.1667, device='cuda:0')
epoch: 119 test_true_pfm: 6069.492933380684 sim_pfm: 635.832552626826
episode: 476 training return: tensor(458.1140, device='cuda:0')
episode: 477 training return: tensor(564.9373, device='cuda:0')
episode: 478 training return: tensor(608.5033, device='cuda:0')
episode: 479 training return: tensor(499.3375, device='cuda:0')
epoch: 120 test_true_pfm: 5949.223620994526 sim_pfm: 660.0464078354029
episode: 480 training return: tensor(496.6523, device='cuda:0')
episode: 481 training return: tensor(458.8457, device='cuda:0')
episode: 482 training return: tensor(576.6463, device='cuda:0')
episode: 483 training return: tensor(524.0287, device='cuda:0')
epoch: 121 test_true_pfm: 6052.997464162975 sim_pfm: 651.8585779681647
episode: 484 training return: tensor(448.5570, device='cuda:0')
episode: 485 training return: tensor(512.9144, device='cuda:0')
episode: 486 training return: tensor(590.7667, device='cuda:0')
episode: 487 training return: tensor(543.7684, device='cuda:0')
epoch: 122 test_true_pfm: 6107.008515041342 sim_pfm: 660.59740468037
episode: 488 training return: tensor(555.6425, device='cuda:0')
episode: 489 training return: tensor(538.5605, device='cuda:0')
episode: 490 training return: tensor(482.8846, device='cuda:0')
episode: 491 training return: tensor(536.6672, device='cuda:0')
epoch: 123 test_true_pfm: 6109.38715783283 sim_pfm: 652.811515380706
episode: 492 training return: tensor(521.8044, device='cuda:0')
episode: 493 training return: tensor(527.1847, device='cuda:0')
episode: 494 training return: tensor(526.5848, device='cuda:0')
episode: 495 training return: tensor(470.7394, device='cuda:0')
epoch: 124 test_true_pfm: 6070.109687667256 sim_pfm: 654.1226100700636
episode: 496 training return: tensor(539.6628, device='cuda:0')
episode: 497 training return: tensor(415.0915, device='cuda:0')
episode: 498 training return: tensor(524.1036, device='cuda:0')
episode: 499 training return: tensor(514.0791, device='cuda:0')
epoch: 125 test_true_pfm: 6030.495702851833 sim_pfm: 636.7952500488512
episode: 500 training return: tensor(510.3427, device='cuda:0')
episode: 501 training return: tensor(521.2639, device='cuda:0')
episode: 502 training return: tensor(575.1707, device='cuda:0')
episode: 503 training return: tensor(552.3821, device='cuda:0')
epoch: 126 test_true_pfm: 6067.008598741703 sim_pfm: 640.8544570936938
episode: 504 training return: tensor(565.3706, device='cuda:0')
episode: 505 training return: tensor(530.1859, device='cuda:0')
episode: 506 training return: tensor(513.9531, device='cuda:0')
episode: 507 training return: tensor(400.0633, device='cuda:0')
epoch: 127 test_true_pfm: 6053.636138868114 sim_pfm: 670.2570817042142
episode: 508 training return: tensor(577.3366, device='cuda:0')
episode: 509 training return: tensor(487.6434, device='cuda:0')
episode: 510 training return: tensor(446.5570, device='cuda:0')
episode: 511 training return: tensor(439.0526, device='cuda:0')
epoch: 128 test_true_pfm: 5988.507617501059 sim_pfm: 638.4053546382735
episode: 512 training return: tensor(551.1547, device='cuda:0')
episode: 513 training return: tensor(522.8235, device='cuda:0')
episode: 514 training return: tensor(509.6286, device='cuda:0')
episode: 515 training return: tensor(482.5426, device='cuda:0')
epoch: 129 test_true_pfm: 6109.283741888746 sim_pfm: 662.8091454867196
episode: 516 training return: tensor(544.9509, device='cuda:0')
episode: 517 training return: tensor(578.2988, device='cuda:0')
episode: 518 training return: tensor(568.0620, device='cuda:0')
episode: 519 training return: tensor(519.2694, device='cuda:0')
epoch: 130 test_true_pfm: 6078.892953263679 sim_pfm: 650.2688527994324
episode: 520 training return: tensor(523.1121, device='cuda:0')
episode: 521 training return: tensor(497.7027, device='cuda:0')
episode: 522 training return: tensor(423.2613, device='cuda:0')
episode: 523 training return: tensor(569.4905, device='cuda:0')
epoch: 131 test_true_pfm: 6058.49169275787 sim_pfm: 659.0970972667759
episode: 524 training return: tensor(520.6448, device='cuda:0')
episode: 525 training return: tensor(497.7644, device='cuda:0')
episode: 526 training return: tensor(550.4192, device='cuda:0')
episode: 527 training return: tensor(510.1211, device='cuda:0')
epoch: 132 test_true_pfm: 6134.775753937348 sim_pfm: 627.5410356792078
episode: 528 training return: tensor(505.6039, device='cuda:0')
episode: 529 training return: tensor(593.8999, device='cuda:0')
episode: 530 training return: tensor(579.8591, device='cuda:0')
episode: 531 training return: tensor(482.1658, device='cuda:0')
epoch: 133 test_true_pfm: 6070.057307146533 sim_pfm: 663.3487881030111
episode: 532 training return: tensor(563.3751, device='cuda:0')
episode: 533 training return: tensor(514.5164, device='cuda:0')
episode: 534 training return: tensor(586.4203, device='cuda:0')
episode: 535 training return: tensor(566.8127, device='cuda:0')
epoch: 134 test_true_pfm: 6061.748048615699 sim_pfm: 651.5279805157334
episode: 536 training return: tensor(506.0148, device='cuda:0')
episode: 537 training return: tensor(570.4467, device='cuda:0')
episode: 538 training return: tensor(551.7388, device='cuda:0')
episode: 539 training return: tensor(515.1649, device='cuda:0')
epoch: 135 test_true_pfm: 6132.401680248629 sim_pfm: 645.5883196876384
episode: 540 training return: tensor(591.1785, device='cuda:0')
episode: 541 training return: tensor(595.9849, device='cuda:0')
episode: 542 training return: tensor(532.3749, device='cuda:0')
episode: 543 training return: tensor(519.5264, device='cuda:0')
epoch: 136 test_true_pfm: 5974.328280817211 sim_pfm: 635.3798156635991
episode: 544 training return: tensor(574.9238, device='cuda:0')
episode: 545 training return: tensor(516.9603, device='cuda:0')
episode: 546 training return: tensor(540.9277, device='cuda:0')
episode: 547 training return: tensor(577.1334, device='cuda:0')
epoch: 137 test_true_pfm: 6086.4074381616965 sim_pfm: 673.9987437910944
episode: 548 training return: tensor(538.0263, device='cuda:0')
episode: 549 training return: tensor(556.5280, device='cuda:0')
episode: 550 training return: tensor(515.1948, device='cuda:0')
episode: 551 training return: tensor(533.8278, device='cuda:0')
epoch: 138 test_true_pfm: 6049.523042473247 sim_pfm: 626.3926215062287
episode: 552 training return: tensor(534.9002, device='cuda:0')
episode: 553 training return: tensor(566.3318, device='cuda:0')
episode: 554 training return: tensor(518.8386, device='cuda:0')
episode: 555 training return: tensor(552.4825, device='cuda:0')
epoch: 139 test_true_pfm: 6019.5181707267075 sim_pfm: 636.9775301911868
episode: 556 training return: tensor(511.2848, device='cuda:0')
episode: 557 training return: tensor(508.6127, device='cuda:0')
episode: 558 training return: tensor(555.5223, device='cuda:0')
episode: 559 training return: tensor(501.8278, device='cuda:0')
epoch: 140 test_true_pfm: 6172.275162782702 sim_pfm: 675.1094059710546
episode: 560 training return: tensor(531.8412, device='cuda:0')
episode: 561 training return: tensor(551.9409, device='cuda:0')
episode: 562 training return: tensor(500.3291, device='cuda:0')
episode: 563 training return: tensor(536.3350, device='cuda:0')
epoch: 141 test_true_pfm: 6077.847040465343 sim_pfm: 686.482843535409
episode: 564 training return: tensor(604.1836, device='cuda:0')
episode: 565 training return: tensor(467.8907, device='cuda:0')
episode: 566 training return: tensor(546.8627, device='cuda:0')
episode: 567 training return: tensor(575.3337, device='cuda:0')
epoch: 142 test_true_pfm: 6078.390554709297 sim_pfm: 665.395597154779
episode: 568 training return: tensor(595.9741, device='cuda:0')
episode: 569 training return: tensor(520.7204, device='cuda:0')
episode: 570 training return: tensor(527.9977, device='cuda:0')
episode: 571 training return: tensor(580.4000, device='cuda:0')
epoch: 143 test_true_pfm: 6076.709749670775 sim_pfm: 648.9944335301407
episode: 572 training return: tensor(558.2170, device='cuda:0')
episode: 573 training return: tensor(416.2706, device='cuda:0')
episode: 574 training return: tensor(530.0058, device='cuda:0')
episode: 575 training return: tensor(513.9379, device='cuda:0')
epoch: 144 test_true_pfm: 6083.283801218918 sim_pfm: 671.8118813965315
episode: 576 training return: tensor(448.6269, device='cuda:0')
episode: 577 training return: tensor(576.9050, device='cuda:0')
episode: 578 training return: tensor(550.2596, device='cuda:0')
episode: 579 training return: tensor(562.5634, device='cuda:0')
epoch: 145 test_true_pfm: 6082.856701665417 sim_pfm: 666.6430426602407
episode: 580 training return: tensor(506.3566, device='cuda:0')
episode: 581 training return: tensor(546.5421, device='cuda:0')
episode: 582 training return: tensor(518.7971, device='cuda:0')
episode: 583 training return: tensor(555.4100, device='cuda:0')
epoch: 146 test_true_pfm: 6089.231204968527 sim_pfm: 676.2410269202277
episode: 584 training return: tensor(529.5315, device='cuda:0')
episode: 585 training return: tensor(580.3057, device='cuda:0')
episode: 586 training return: tensor(569.8407, device='cuda:0')
episode: 587 training return: tensor(504.9465, device='cuda:0')
epoch: 147 test_true_pfm: 6147.222971165051 sim_pfm: 681.1936315541388
episode: 588 training return: tensor(587.8269, device='cuda:0')
episode: 589 training return: tensor(457.5927, device='cuda:0')
episode: 590 training return: tensor(542.6776, device='cuda:0')
episode: 591 training return: tensor(438.9878, device='cuda:0')
epoch: 148 test_true_pfm: 6108.018925643496 sim_pfm: 693.1911981141117
episode: 592 training return: tensor(556.7711, device='cuda:0')
episode: 593 training return: tensor(539.4465, device='cuda:0')
episode: 594 training return: tensor(581.0995, device='cuda:0')
episode: 595 training return: tensor(540.5213, device='cuda:0')
epoch: 149 test_true_pfm: 6156.3933190674425 sim_pfm: 662.6074686343587
episode: 596 training return: tensor(614.7339, device='cuda:0')
episode: 597 training return: tensor(591.2607, device='cuda:0')
episode: 598 training return: tensor(509.9443, device='cuda:0')
episode: 599 training return: tensor(571.7381, device='cuda:0')
epoch: 150 test_true_pfm: 6136.29914168558 sim_pfm: 693.547842568291
