['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'behavior', '--traj', 'medium', '--seed', '4', '--data', '30000']
epoch: 0 training_loss 0.30008591055870054 test_loss: 0.201617431640625
epoch: 1 training_loss 0.1669410766661167 test_loss: 0.16114017963409424
epoch: 2 training_loss 0.14338559847325086 test_loss: 0.13719974756240844
epoch: 3 training_loss 0.1353350371494889 test_loss: 0.13078564405441284
epoch: 4 training_loss 0.12877369184046983 test_loss: 0.14392170906066895
epoch: 5 training_loss 0.12662247378379107 test_loss: 0.13135803937911988
epoch: 6 training_loss 0.1249234776943922 test_loss: 0.11558259725570678
epoch: 7 training_loss 0.11607725333422422 test_loss: 0.1347581386566162
epoch: 8 training_loss 0.12231650445610284 test_loss: 0.14116562604904176
epoch: 9 training_loss 0.11726913448423147 test_loss: 0.10399775505065918
epoch: 10 training_loss 0.11235090482980013 test_loss: 0.1090761661529541
epoch: 11 training_loss 0.10940558388829232 test_loss: 0.13228214979171754
epoch: 12 training_loss 0.1130278087221086 test_loss: 0.12428905963897705
epoch: 13 training_loss 0.11578395020216703 test_loss: 0.11622183322906494
epoch: 14 training_loss 0.11655640427023173 test_loss: 0.1315239667892456
epoch: 15 training_loss 0.10416363030672074 test_loss: 0.1207728624343872
epoch: 16 training_loss 0.11159000519663095 test_loss: 0.12571568489074708
epoch: 17 training_loss 0.11905822828412056 test_loss: 0.12375643253326415
epoch: 18 training_loss 0.11480164635926485 test_loss: 0.13240913152694703
epoch: 19 training_loss 0.11570315694436431 test_loss: 0.1047054648399353
epoch: 20 training_loss 0.1115645089931786 test_loss: 0.12140855789184571
epoch: 21 training_loss 0.09893847584724426 test_loss: 0.1172412633895874
epoch: 22 training_loss 0.10441898703575134 test_loss: 0.14127662181854247
epoch: 23 training_loss 0.10404715813696384 test_loss: 0.09912924766540528
epoch: 24 training_loss 0.10313865404576063 test_loss: 0.1060225248336792
epoch: 25 training_loss 0.11060899391770362 test_loss: 0.1291414976119995
epoch: 26 training_loss 0.11105838689953089 test_loss: 0.1153145432472229
epoch: 27 training_loss 0.1070603583008051 test_loss: 0.1066274881362915
epoch: 28 training_loss 0.10778856843709946 test_loss: 0.10990726947784424
epoch: 29 training_loss 0.10802805311977863 test_loss: 0.11567426919937134
epoch: 30 training_loss 0.10829829514026641 test_loss: 0.10796556472778321
epoch: 31 training_loss 0.10097860658541322 test_loss: 0.12145220041275025
epoch: 32 training_loss 0.11515289792791009 test_loss: 0.11173510551452637
epoch: 33 training_loss 0.10059248838573694 test_loss: 0.1032645583152771
epoch: 34 training_loss 0.10940980195999145 test_loss: 0.11618255376815796
epoch: 35 training_loss 0.10375084878876806 test_loss: 0.11078593730926514
epoch: 36 training_loss 0.1104688810557127 test_loss: 0.12359962463378907
epoch: 37 training_loss 0.09659536158666014 test_loss: 0.1377659559249878
epoch: 38 training_loss 0.1067454282566905 test_loss: 0.10180015563964843
epoch: 39 training_loss 0.10800633689388633 test_loss: 0.10417618751525878
epoch: 40 training_loss 0.10174493607133626 test_loss: 0.11185982227325439
epoch: 41 training_loss 0.10420150248333812 test_loss: 0.11536996364593506
epoch: 42 training_loss 0.10232812605798244 test_loss: 0.09189971685409545
epoch: 43 training_loss 0.10909827064722777 test_loss: 0.10663696527481079
epoch: 44 training_loss 0.10417135991156101 test_loss: 0.09372681379318237
epoch: 45 training_loss 0.10560926519334317 test_loss: 0.11301852464675903
epoch: 46 training_loss 0.10042883176356554 test_loss: 0.10850075483322144
epoch: 47 training_loss 0.10103385102935135 test_loss: 0.13666021823883057
epoch: 48 training_loss 0.10556597556918859 test_loss: 0.10121798515319824
epoch: 49 training_loss 0.10552054671570658 test_loss: 0.09865236878395081
epoch: 50 training_loss 0.09427564637735486 test_loss: 0.12033776044845582
epoch: 51 training_loss 0.10184209417551755 test_loss: 0.11134110689163208
epoch: 52 training_loss 0.09622066497802734 test_loss: 0.1271323800086975
epoch: 53 training_loss 0.10633261386305094 test_loss: 0.11578477621078491
epoch: 54 training_loss 0.09846270052716136 test_loss: 0.12437472343444825
epoch: 55 training_loss 0.09757035970687866 test_loss: 0.1208421230316162
epoch: 56 training_loss 0.10392002005130052 test_loss: 0.1058924674987793
epoch: 57 training_loss 0.09905429106205701 test_loss: 0.11887218952178955
epoch: 58 training_loss 0.09501800263300537 test_loss: 0.12283475399017334
epoch: 59 training_loss 0.104879396148026 test_loss: 0.10239932537078858
epoch: 60 training_loss 0.10045800387859344 test_loss: 0.10631513595581055
epoch: 61 training_loss 0.10166291074827313 test_loss: 0.11234339475631713
epoch: 62 training_loss 0.09591563399881124 test_loss: 0.10927059650421142
epoch: 63 training_loss 0.11182970745489001 test_loss: 0.10451549291610718
epoch: 64 training_loss 0.09715379504486918 test_loss: 0.11680151224136352
epoch: 65 training_loss 0.10864810470491648 test_loss: 0.12038297653198242
epoch: 66 training_loss 0.10367070252075791 test_loss: 0.09745388627052307
epoch: 67 training_loss 0.1029337676241994 test_loss: 0.09652319550514221
epoch: 68 training_loss 0.09492434868589043 test_loss: 0.11696604490280152
epoch: 69 training_loss 0.09928325856104493 test_loss: 0.10450879335403443
epoch: 70 training_loss 0.0995059840194881 test_loss: 0.10687819719314576
epoch: 71 training_loss 0.09633375737816095 test_loss: 0.11008342504501342
epoch: 72 training_loss 0.10263820730149746 test_loss: 0.1142572045326233
epoch: 73 training_loss 0.10406912386417388 test_loss: 0.10335148572921753
epoch: 74 training_loss 0.10120748277753591 test_loss: 0.13395291566848755
epoch: 75 training_loss 0.10587128233164549 test_loss: 0.11508527994155884
epoch: 76 training_loss 0.09143992112949491 test_loss: 0.11482812166213989
epoch: 77 training_loss 0.1007366644218564 test_loss: 0.1095632791519165
epoch: 78 training_loss 0.09918059982359409 test_loss: 0.10171369314193726
epoch: 79 training_loss 0.10212714843451977 test_loss: 0.11669903993606567
epoch: 80 training_loss 0.09408903956413268 test_loss: 0.124409019947052
epoch: 81 training_loss 0.09840178091078997 test_loss: 0.10149492025375366
epoch: 82 training_loss 0.09724611524492502 test_loss: 0.09984540939331055
epoch: 83 training_loss 0.10095005629584193 test_loss: 0.09786685705184936
epoch: 84 training_loss 0.1023185008019209 test_loss: 0.09558488130569458
epoch: 85 training_loss 0.10086345314979553 test_loss: 0.11511024236679077
epoch: 86 training_loss 0.09455008082091808 test_loss: 0.1146658182144165
epoch: 87 training_loss 0.10550242148339749 test_loss: 0.11098988056182861
epoch: 88 training_loss 0.09862602399662138 test_loss: 0.10528442859649659
epoch: 89 training_loss 0.09804106591269374 test_loss: 0.10382941961288453
epoch: 90 training_loss 0.09852302065119148 test_loss: 0.11310498714447022
epoch: 91 training_loss 0.09716623671352863 test_loss: 0.09577072262763978
epoch: 92 training_loss 0.10301464475691319 test_loss: 0.11226351261138916
epoch: 93 training_loss 0.09610657018609345 test_loss: 0.10548101663589478
epoch: 94 training_loss 0.09521842211484909 test_loss: 0.11054774522781372
epoch: 95 training_loss 0.09838206015527248 test_loss: 0.10247220993041992
epoch: 96 training_loss 0.10237116783857346 test_loss: 0.09947324395179749
epoch: 97 training_loss 0.09906722147017717 test_loss: 0.11405477523803711
epoch: 98 training_loss 0.10076139265671373 test_loss: 0.09902473092079163
epoch: 99 training_loss 0.10474649667739869 test_loss: 0.10175766944885253
epoch: 100 training_loss 0.09889037575572729 test_loss: 0.12449060678482056
epoch: 101 training_loss 0.097512631919235 test_loss: 0.1105602502822876
epoch: 102 training_loss 0.09684174006804824 test_loss: 0.11140085458755493
epoch: 103 training_loss 0.09657362772151828 test_loss: 0.10375885963439942
epoch: 104 training_loss 0.09733627654612065 test_loss: 0.09641606211662293
epoch: 105 training_loss 0.1010422258079052 test_loss: 0.11770439147949219
epoch: 106 training_loss 0.10420358054339886 test_loss: 0.11314601898193359
epoch: 107 training_loss 0.10760094722732902 test_loss: 0.12009170055389404
epoch: 108 training_loss 0.09709495168179273 test_loss: 0.12391082048416138
epoch: 109 training_loss 0.10365183059126139 test_loss: 0.10802628993988037
epoch: 110 training_loss 0.09940691780298948 test_loss: 0.11390182971954346
epoch: 111 training_loss 0.10113804195076227 test_loss: 0.11299227476119995
epoch: 112 training_loss 0.09855021852999926 test_loss: 0.11012247800827027
epoch: 113 training_loss 0.09406494552269579 test_loss: 0.1051593542098999
epoch: 114 training_loss 0.1021752724982798 test_loss: 0.11589995622634888
epoch: 115 training_loss 0.09919599598273635 test_loss: 0.10897470712661743
epoch: 116 training_loss 0.09982720281928778 test_loss: 0.11516518592834472
epoch: 117 training_loss 0.09824661239981651 test_loss: 0.1074113130569458
epoch: 118 training_loss 0.10366024371236562 test_loss: 0.1184093713760376
epoch: 119 training_loss 0.09724589224904776 test_loss: 0.09462753534317017
epoch: 120 training_loss 0.10391287241131067 test_loss: 0.1036638855934143
epoch: 121 training_loss 0.10197319310158491 test_loss: 0.11440627574920655
epoch: 122 training_loss 0.09104243583977223 test_loss: 0.1030311107635498
epoch: 123 training_loss 0.10512410214170814 test_loss: 0.1179269790649414
epoch: 124 training_loss 0.09686925955116749 test_loss: 0.11928267478942871
epoch: 125 training_loss 0.10285085085779429 test_loss: 0.10717556476593018
epoch: 126 training_loss 0.09976457428187131 test_loss: 0.10453658103942871
epoch: 127 training_loss 0.09738205472007394 test_loss: 0.09927964210510254
epoch: 128 training_loss 0.10932381175458432 test_loss: 0.11512278318405152
epoch: 129 training_loss 0.09257393652573227 test_loss: 0.09921602606773376
epoch: 130 training_loss 0.09268565010279417 test_loss: 0.10416218042373657
epoch: 131 training_loss 0.10049614887684584 test_loss: 0.11214826107025147
epoch: 132 training_loss 0.09515969254076481 test_loss: 0.11295757293701172
epoch: 133 training_loss 0.09959779067896307 test_loss: 0.10469552278518676
epoch: 134 training_loss 0.09928923543542624 test_loss: 0.09091256260871887
epoch: 135 training_loss 0.10045677922666073 test_loss: 0.10069961547851562
epoch: 136 training_loss 0.09337830832228064 test_loss: 0.11397809982299804
epoch: 137 training_loss 0.10041112620383501 test_loss: 0.1170350193977356
epoch: 138 training_loss 0.09385851728729903 test_loss: 0.1088779091835022
epoch: 139 training_loss 0.09991380012594163 test_loss: 0.12186483144760132
epoch: 140 training_loss 0.09233341878280044 test_loss: 0.11063529253005981
epoch: 141 training_loss 0.09205379433929921 test_loss: 0.12394275665283203
epoch: 142 training_loss 0.10068158693611622 test_loss: 0.11044492721557617
epoch: 143 training_loss 0.09473167780786752 test_loss: 0.08790388107299804
epoch: 144 training_loss 0.09509312093257904 test_loss: 0.11703044176101685
epoch: 145 training_loss 0.09534452622756362 test_loss: 0.1092386245727539
epoch: 146 training_loss 0.10420990288257599 test_loss: 0.11955692768096923
epoch: 147 training_loss 0.09914135154336691 test_loss: 0.09396528601646423
epoch: 148 training_loss 0.09522373659536243 test_loss: 0.12731922864913942
epoch: 149 training_loss 0.09635388835333288 test_loss: 0.11784200668334961
epoch: 0 training_loss 47.94329912185669 test_loss: 23.655653381347655
epoch: 1 training_loss 18.21109253883362 test_loss: 14.748292541503906
epoch: 2 training_loss 13.352722778320313 test_loss: 11.70178451538086
epoch: 3 training_loss 10.630094470977783 test_loss: 9.578852081298828
epoch: 4 training_loss 8.816416749954223 test_loss: 8.047721099853515
epoch: 5 training_loss 7.641875448226929 test_loss: 7.188419342041016
epoch: 6 training_loss 6.541963415145874 test_loss: 6.222419357299804
epoch: 7 training_loss 6.000615768432617 test_loss: 5.6870380401611325
epoch: 8 training_loss 5.397404508590698 test_loss: 5.196869277954102
epoch: 9 training_loss 4.965911102294922 test_loss: 4.965668487548828
epoch: 10 training_loss 4.638043339252472 test_loss: 4.495691299438477
epoch: 11 training_loss 4.3154267406463624 test_loss: 4.385718536376953
epoch: 12 training_loss 4.119230945110321 test_loss: 4.050078201293945
epoch: 13 training_loss 3.945645043849945 test_loss: 3.8114986419677734
epoch: 14 training_loss 3.7225723552703855 test_loss: 3.7889976501464844
epoch: 15 training_loss 3.581926691532135 test_loss: 3.6488128662109376
epoch: 16 training_loss 3.4801497149467466 test_loss: 3.3283580780029296
epoch: 17 training_loss 3.327995250225067 test_loss: 3.2452323913574217
epoch: 18 training_loss 3.2456019639968874 test_loss: 3.198785591125488
epoch: 19 training_loss 3.1102987790107726 test_loss: 3.106252098083496
epoch: 20 training_loss 3.0601339507102967 test_loss: 3.027379608154297
epoch: 21 training_loss 2.9510766887664794 test_loss: 2.8932516098022463
epoch: 22 training_loss 2.8824090218544005 test_loss: 2.903685188293457
epoch: 23 training_loss 2.81418301820755 test_loss: 2.761076545715332
epoch: 24 training_loss 2.7508859205245972 test_loss: 2.902423095703125
epoch: 25 training_loss 2.7479584431648254 test_loss: 2.5926631927490233
epoch: 26 training_loss 2.6390097451210024 test_loss: 2.713287353515625
epoch: 27 training_loss 2.5798883199691773 test_loss: 2.530341148376465
epoch: 28 training_loss 2.513261523246765 test_loss: 2.615153503417969
epoch: 29 training_loss 2.513301932811737 test_loss: 2.434200096130371
epoch: 30 training_loss 2.477435848712921 test_loss: 2.4155384063720704
epoch: 31 training_loss 2.435449163913727 test_loss: 2.3653579711914063
epoch: 32 training_loss 2.4221234488487244 test_loss: 2.440846061706543
epoch: 33 training_loss 2.3541390085220337 test_loss: 2.3959789276123047
epoch: 34 training_loss 2.3106115353107453 test_loss: 2.319110298156738
epoch: 35 training_loss 2.2967440533638 test_loss: 2.4220584869384765
epoch: 36 training_loss 2.3394986844062804 test_loss: 2.2438655853271485
epoch: 37 training_loss 2.2321909976005556 test_loss: 2.2697473526000977
epoch: 38 training_loss 2.242275446653366 test_loss: 2.2524036407470702
epoch: 39 training_loss 2.2251621890068054 test_loss: 2.276911735534668
epoch: 40 training_loss 2.1858369910717013 test_loss: 2.1626014709472656
epoch: 41 training_loss 2.149787813425064 test_loss: 2.1815458297729493
epoch: 42 training_loss 2.09390038728714 test_loss: 2.1111011505126953
epoch: 43 training_loss 2.1464614701271056 test_loss: 2.1550504684448244
epoch: 44 training_loss 2.101170742511749 test_loss: 2.1084854125976564
epoch: 45 training_loss 2.110079731941223 test_loss: 2.1703285217285155
epoch: 46 training_loss 2.067795696258545 test_loss: 2.089140701293945
epoch: 47 training_loss 2.034372184276581 test_loss: 2.1227903366088867
epoch: 48 training_loss 2.043745768070221 test_loss: 2.085006904602051
epoch: 49 training_loss 2.0131024742126464 test_loss: 2.0216720581054686
epoch: 50 training_loss 2.0129914474487305 test_loss: 2.1158222198486327
epoch: 51 training_loss 2.017287492752075 test_loss: 2.103686714172363
epoch: 52 training_loss 1.9995683896541596 test_loss: 1.9845603942871093
epoch: 53 training_loss 1.9977000594139098 test_loss: 1.9953010559082032
epoch: 54 training_loss 1.9622075438499451 test_loss: 2.0148305892944336
epoch: 55 training_loss 1.919103970527649 test_loss: 1.9096925735473633
epoch: 56 training_loss 1.9252124667167663 test_loss: 1.9253143310546874
epoch: 57 training_loss 1.9066890490055084 test_loss: 1.9286542892456056
epoch: 58 training_loss 1.931219985485077 test_loss: 1.933387565612793
epoch: 59 training_loss 1.9022395944595336 test_loss: 1.9175374984741211
epoch: 60 training_loss 1.8899781930446624 test_loss: 1.9156213760375977
epoch: 61 training_loss 1.8501857280731202 test_loss: 1.8325332641601562
epoch: 62 training_loss 1.8482355344295502 test_loss: 1.9789773941040039
epoch: 63 training_loss 1.8606587088108062 test_loss: 1.8143390655517577
epoch: 64 training_loss 1.8306688213348388 test_loss: 1.8738948822021484
epoch: 65 training_loss 1.851219482421875 test_loss: 1.8135517120361329
epoch: 66 training_loss 1.8317165517807006 test_loss: 1.8846010208129882
epoch: 67 training_loss 1.8193894815444946 test_loss: 1.8272689819335937
epoch: 68 training_loss 1.8236297404766082 test_loss: 1.9101854324340821
epoch: 69 training_loss 1.8230308973789215 test_loss: 1.8307506561279296
epoch: 70 training_loss 1.79365767121315 test_loss: 1.8254316329956055
epoch: 71 training_loss 1.8077981674671173 test_loss: 1.8403049468994142
epoch: 72 training_loss 1.7587465155124664 test_loss: 1.7614755630493164
epoch: 73 training_loss 1.7633604097366333 test_loss: 1.7612928390502929
epoch: 74 training_loss 1.7413178014755248 test_loss: 1.8285524368286132
epoch: 75 training_loss 1.768277417421341 test_loss: 1.8540441513061523
epoch: 76 training_loss 1.7524266982078551 test_loss: 1.7821367263793946
epoch: 77 training_loss 1.7524538218975068 test_loss: 1.8288497924804688
epoch: 78 training_loss 1.7626700794696808 test_loss: 1.734583282470703
epoch: 79 training_loss 1.7120538854599 test_loss: 1.833855438232422
epoch: 80 training_loss 1.7815354585647583 test_loss: 1.7654237747192383
epoch: 81 training_loss 1.7560510075092315 test_loss: 1.8146406173706056
epoch: 82 training_loss 1.707964735031128 test_loss: 1.7710504531860352
epoch: 83 training_loss 1.7417284154891968 test_loss: 1.793086051940918
epoch: 84 training_loss 1.7061397182941436 test_loss: 1.8030683517456054
epoch: 85 training_loss 1.7147994482517241 test_loss: 1.7681884765625
epoch: 86 training_loss 1.7150657856464386 test_loss: 1.7100969314575196
epoch: 87 training_loss 1.723887461423874 test_loss: 1.8022188186645507
epoch: 88 training_loss 1.6644679510593414 test_loss: 1.6829767227172852
epoch: 89 training_loss 1.6747444140911103 test_loss: 1.7413143157958983
epoch: 90 training_loss 1.6538281047344208 test_loss: 1.692458724975586
epoch: 91 training_loss 1.6456384062767029 test_loss: 1.6614152908325195
epoch: 92 training_loss 1.669019922018051 test_loss: 1.6985267639160155
epoch: 93 training_loss 1.6582219016551971 test_loss: 1.7283916473388672
epoch: 94 training_loss 1.6760994970798493 test_loss: 1.7109954833984375
epoch: 95 training_loss 1.7122734427452087 test_loss: 1.7029783248901367
epoch: 96 training_loss 1.672463092803955 test_loss: 1.7518741607666015
epoch: 97 training_loss 1.6551904916763305 test_loss: 1.64935302734375
epoch: 98 training_loss 1.640071313381195 test_loss: 1.6735004425048827
epoch: 99 training_loss 1.6405355942249298 test_loss: 1.6843791961669923
epoch: 100 training_loss 1.6292533946037293 test_loss: 1.6401817321777343
epoch: 101 training_loss 1.6451477634906768 test_loss: 1.6474458694458007
epoch: 102 training_loss 1.6489708411693573 test_loss: 1.6157629013061523
epoch: 103 training_loss 1.591610072851181 test_loss: 1.570567798614502
epoch: 104 training_loss 1.6210582661628723 test_loss: 1.6393610000610352
epoch: 105 training_loss 1.597840542793274 test_loss: 1.6004863739013673
epoch: 106 training_loss 1.6178537094593048 test_loss: 1.626367950439453
epoch: 107 training_loss 1.6191529941558838 test_loss: 1.6098634719848632
epoch: 108 training_loss 1.5995465290546418 test_loss: 1.6405231475830078
epoch: 109 training_loss 1.6227963936328889 test_loss: 1.6435039520263672
epoch: 110 training_loss 1.5938234794139863 test_loss: 1.6063987731933593
epoch: 111 training_loss 1.5927796530723572 test_loss: 1.6077625274658203
epoch: 112 training_loss 1.6156809115409851 test_loss: 1.651777458190918
epoch: 113 training_loss 1.5909819161891938 test_loss: 1.5656811714172363
epoch: 114 training_loss 1.5728720498085023 test_loss: 1.5865365028381349
epoch: 115 training_loss 1.5779855263233185 test_loss: 1.6089527130126953
epoch: 116 training_loss 1.5773581731319428 test_loss: 1.6050899505615235
epoch: 117 training_loss 1.570908476114273 test_loss: 1.5586139678955078
epoch: 118 training_loss 1.5605013716220855 test_loss: 1.6241117477416993
epoch: 119 training_loss 1.5497021508216857 test_loss: 1.60902099609375
epoch: 120 training_loss 1.5464703106880189 test_loss: 1.5679242134094238
epoch: 121 training_loss 1.5662607622146607 test_loss: 1.5883329391479493
epoch: 122 training_loss 1.5785164499282838 test_loss: 1.6260320663452148
epoch: 123 training_loss 1.5811198890209197 test_loss: 1.5675849914550781
epoch: 124 training_loss 1.5843040764331817 test_loss: 1.607205581665039
epoch: 125 training_loss 1.5612367522716522 test_loss: 1.5902674674987793
epoch: 126 training_loss 1.5381174504756927 test_loss: 1.5803915977478027
epoch: 127 training_loss 1.5586504387855529 test_loss: 1.5821284294128417
epoch: 128 training_loss 1.5488080203533172 test_loss: 1.5418649673461915
epoch: 129 training_loss 1.5334753644466401 test_loss: 1.5649429321289063
epoch: 130 training_loss 1.5374841153621674 test_loss: 1.6051132202148437
epoch: 131 training_loss 1.5390675711631774 test_loss: 1.545123291015625
epoch: 132 training_loss 1.5321115577220916 test_loss: 1.5563297271728516
epoch: 133 training_loss 1.528986655473709 test_loss: 1.5779091835021972
epoch: 134 training_loss 1.5233218896389007 test_loss: 1.682321548461914
epoch: 135 training_loss 1.5559610426425934 test_loss: 1.5348370552062989
epoch: 136 training_loss 1.5402432596683502 test_loss: 1.557368564605713
epoch: 137 training_loss 1.5128576672077179 test_loss: 1.5599923133850098
epoch: 138 training_loss 1.5080689990520477 test_loss: 1.5108176231384278
epoch: 139 training_loss 1.523603881597519 test_loss: 1.5097813606262207
epoch: 140 training_loss 1.515097815990448 test_loss: 1.523803997039795
epoch: 141 training_loss 1.4855254518985748 test_loss: 1.5371092796325683
epoch: 142 training_loss 1.49054704785347 test_loss: 1.565575122833252
epoch: 143 training_loss 1.5016349804401399 test_loss: 1.5444979667663574
epoch: 144 training_loss 1.5169328105449678 test_loss: 1.563459873199463
epoch: 145 training_loss 1.5163251328468323 test_loss: 1.5232614517211913
epoch: 146 training_loss 1.5023868203163147 test_loss: 1.580457592010498
epoch: 147 training_loss 1.4962657690048218 test_loss: 1.5077205657958985
epoch: 148 training_loss 1.5040917897224426 test_loss: 1.4956439018249512
epoch: 149 training_loss 1.5138664102554322 test_loss: 1.50967435836792
5200.550713990271
episode: 0 training return: tensor(-46.1006, device='cuda:0')
episode: 1 training return: tensor(29.8849, device='cuda:0')
episode: 2 training return: tensor(117.0381, device='cuda:0')
episode: 3 training return: tensor(26.4720, device='cuda:0')
epoch: 1 test_true_pfm: 4993.253423363662 sim_pfm: 34.853247429515854
episode: 4 training return: tensor(-54.2679, device='cuda:0')
episode: 5 training return: tensor(-64.6714, device='cuda:0')
episode: 6 training return: tensor(-2.9960, device='cuda:0')
episode: 7 training return: tensor(109.6825, device='cuda:0')
epoch: 2 test_true_pfm: 5133.281690726314 sim_pfm: 56.33849789655263
episode: 8 training return: tensor(-59.4618, device='cuda:0')
episode: 9 training return: tensor(-93.2929, device='cuda:0')
episode: 10 training return: tensor(16.8248, device='cuda:0')
episode: 11 training return: tensor(116.7573, device='cuda:0')
epoch: 3 test_true_pfm: 5157.347463146996 sim_pfm: 51.01473345182603
episode: 12 training return: tensor(-14.9375, device='cuda:0')
episode: 13 training return: tensor(55.5469, device='cuda:0')
episode: 14 training return: tensor(-99.2700, device='cuda:0')
episode: 15 training return: tensor(-116.6953, device='cuda:0')
epoch: 4 test_true_pfm: 5117.786493984881 sim_pfm: 59.35702212562319
episode: 16 training return: tensor(-5.9426, device='cuda:0')
episode: 17 training return: tensor(-63.4525, device='cuda:0')
episode: 18 training return: tensor(-60.4508, device='cuda:0')
episode: 19 training return: tensor(-151.6988, device='cuda:0')
epoch: 5 test_true_pfm: 5107.92988369423 sim_pfm: 49.8416812295521
episode: 20 training return: tensor(30.4659, device='cuda:0')
episode: 21 training return: tensor(75.9500, device='cuda:0')
episode: 22 training return: tensor(0.4432, device='cuda:0')
episode: 23 training return: tensor(90.7523, device='cuda:0')
epoch: 6 test_true_pfm: 5157.779326486895 sim_pfm: 63.19692063968008
episode: 24 training return: tensor(53.0633, device='cuda:0')
episode: 25 training return: tensor(110.1705, device='cuda:0')
episode: 26 training return: tensor(-63.9394, device='cuda:0')
episode: 27 training return: tensor(60.4471, device='cuda:0')
epoch: 7 test_true_pfm: 5119.629111199057 sim_pfm: 59.18910242902348
episode: 28 training return: tensor(-30.6342, device='cuda:0')
episode: 29 training return: tensor(89.5816, device='cuda:0')
episode: 30 training return: tensor(-42.1968, device='cuda:0')
episode: 31 training return: tensor(139.1405, device='cuda:0')
epoch: 8 test_true_pfm: 5192.042706818117 sim_pfm: 109.23423954372993
episode: 32 training return: tensor(27.5134, device='cuda:0')
episode: 33 training return: tensor(99.9591, device='cuda:0')
episode: 34 training return: tensor(40.6953, device='cuda:0')
episode: 35 training return: tensor(-12.2216, device='cuda:0')
epoch: 9 test_true_pfm: 5096.5483259886505 sim_pfm: 183.58900976725272
episode: 36 training return: tensor(103.2783, device='cuda:0')
episode: 37 training return: tensor(-80.3110, device='cuda:0')
episode: 38 training return: tensor(46.6148, device='cuda:0')
episode: 39 training return: tensor(216.8579, device='cuda:0')
epoch: 10 test_true_pfm: 5189.516698426629 sim_pfm: 85.656849183278
episode: 40 training return: tensor(181.9204, device='cuda:0')
episode: 41 training return: tensor(38.7051, device='cuda:0')
episode: 42 training return: tensor(73.4629, device='cuda:0')
episode: 43 training return: tensor(132.4827, device='cuda:0')
epoch: 11 test_true_pfm: 5089.484458352632 sim_pfm: 97.89375493903451
episode: 44 training return: tensor(-105.6601, device='cuda:0')
episode: 45 training return: tensor(167.8028, device='cuda:0')
episode: 46 training return: tensor(125.9462, device='cuda:0')
episode: 47 training return: tensor(105.6790, device='cuda:0')
epoch: 12 test_true_pfm: 5230.106438614465 sim_pfm: 96.54200781220182
episode: 48 training return: tensor(33.6475, device='cuda:0')
episode: 49 training return: tensor(37.6077, device='cuda:0')
episode: 50 training return: tensor(-133.9094, device='cuda:0')
episode: 51 training return: tensor(40.5263, device='cuda:0')
epoch: 13 test_true_pfm: 5194.817031632602 sim_pfm: 150.2054564829741
episode: 52 training return: tensor(-19.5911, device='cuda:0')
episode: 53 training return: tensor(-41.7008, device='cuda:0')
episode: 54 training return: tensor(116.1596, device='cuda:0')
episode: 55 training return: tensor(51.1590, device='cuda:0')
epoch: 14 test_true_pfm: 5235.199293382491 sim_pfm: 152.13104592155045
episode: 56 training return: tensor(41.0352, device='cuda:0')
episode: 57 training return: tensor(69.4209, device='cuda:0')
episode: 58 training return: tensor(40.5114, device='cuda:0')
episode: 59 training return: tensor(-92.1789, device='cuda:0')
epoch: 15 test_true_pfm: 5294.657099275167 sim_pfm: 150.91428803656405
episode: 60 training return: tensor(204.3209, device='cuda:0')
episode: 61 training return: tensor(126.9207, device='cuda:0')
episode: 62 training return: tensor(209.7089, device='cuda:0')
episode: 63 training return: tensor(24.0950, device='cuda:0')
epoch: 16 test_true_pfm: 5347.401903641433 sim_pfm: 241.25427563401172
episode: 64 training return: tensor(133.1686, device='cuda:0')
episode: 65 training return: tensor(203.7997, device='cuda:0')
episode: 66 training return: tensor(63.0976, device='cuda:0')
episode: 67 training return: tensor(39.3502, device='cuda:0')
epoch: 17 test_true_pfm: 5297.39704009295 sim_pfm: 219.1226227041916
episode: 68 training return: tensor(150.2312, device='cuda:0')
episode: 69 training return: tensor(139.1563, device='cuda:0')
episode: 70 training return: tensor(76.7576, device='cuda:0')
episode: 71 training return: tensor(165.0678, device='cuda:0')
epoch: 18 test_true_pfm: 5350.667372930325 sim_pfm: 160.057781356251
episode: 72 training return: tensor(148.6293, device='cuda:0')
episode: 73 training return: tensor(88.2343, device='cuda:0')
episode: 74 training return: tensor(139.8033, device='cuda:0')
episode: 75 training return: tensor(202.8691, device='cuda:0')
epoch: 19 test_true_pfm: 5399.251673315662 sim_pfm: 242.77536055854094
episode: 76 training return: tensor(176.3582, device='cuda:0')
episode: 77 training return: tensor(180.4745, device='cuda:0')
episode: 78 training return: tensor(24.4116, device='cuda:0')
episode: 79 training return: tensor(177.1528, device='cuda:0')
epoch: 20 test_true_pfm: 5385.24817637372 sim_pfm: 190.6395101737386
episode: 80 training return: tensor(58.9705, device='cuda:0')
episode: 81 training return: tensor(165.7899, device='cuda:0')
episode: 82 training return: tensor(205.6211, device='cuda:0')
episode: 83 training return: tensor(262.8980, device='cuda:0')
epoch: 21 test_true_pfm: 5183.7086327506595 sim_pfm: 190.93021148788588
episode: 84 training return: tensor(71.4602, device='cuda:0')
episode: 85 training return: tensor(98.3946, device='cuda:0')
episode: 86 training return: tensor(176.2302, device='cuda:0')
episode: 87 training return: tensor(144.4440, device='cuda:0')
epoch: 22 test_true_pfm: 5323.277382918361 sim_pfm: 284.3799120351129
episode: 88 training return: tensor(159.2514, device='cuda:0')
episode: 89 training return: tensor(196.0223, device='cuda:0')
episode: 90 training return: tensor(220.8173, device='cuda:0')
episode: 91 training return: tensor(135.9715, device='cuda:0')
epoch: 23 test_true_pfm: 5352.948842115096 sim_pfm: 206.13371030341173
episode: 92 training return: tensor(237.9119, device='cuda:0')
episode: 93 training return: tensor(147.1927, device='cuda:0')
episode: 94 training return: tensor(58.6907, device='cuda:0')
episode: 95 training return: tensor(164.6797, device='cuda:0')
epoch: 24 test_true_pfm: 5293.688989084221 sim_pfm: 295.42797666361247
episode: 96 training return: tensor(252.4861, device='cuda:0')
episode: 97 training return: tensor(214.0305, device='cuda:0')
episode: 98 training return: tensor(105.8670, device='cuda:0')
episode: 99 training return: tensor(264.0304, device='cuda:0')
epoch: 25 test_true_pfm: 5333.51750603483 sim_pfm: 212.084737370819
episode: 100 training return: tensor(188.0447, device='cuda:0')
episode: 101 training return: tensor(107.6892, device='cuda:0')
episode: 102 training return: tensor(119.8934, device='cuda:0')
episode: 103 training return: tensor(367.9157, device='cuda:0')
epoch: 26 test_true_pfm: 5316.941794673552 sim_pfm: 316.97933493185946
episode: 104 training return: tensor(127.0161, device='cuda:0')
episode: 105 training return: tensor(213.8020, device='cuda:0')
episode: 106 training return: tensor(174.2408, device='cuda:0')
episode: 107 training return: tensor(298.8000, device='cuda:0')
epoch: 27 test_true_pfm: 5383.727345565209 sim_pfm: 350.2182023003891
episode: 108 training return: tensor(77.4406, device='cuda:0')
episode: 109 training return: tensor(172.3654, device='cuda:0')
episode: 110 training return: tensor(200.4822, device='cuda:0')
episode: 111 training return: tensor(195.4260, device='cuda:0')
epoch: 28 test_true_pfm: 5476.50905772916 sim_pfm: 272.48878120018827
episode: 112 training return: tensor(232.3443, device='cuda:0')
episode: 113 training return: tensor(126.3835, device='cuda:0')
episode: 114 training return: tensor(129.1365, device='cuda:0')
episode: 115 training return: tensor(73.4640, device='cuda:0')
epoch: 29 test_true_pfm: 5430.275604140434 sim_pfm: 223.5798819582269
episode: 116 training return: tensor(215.1957, device='cuda:0')
episode: 117 training return: tensor(42.7981, device='cuda:0')
episode: 118 training return: tensor(232.7574, device='cuda:0')
episode: 119 training return: tensor(-8.2793, device='cuda:0')
epoch: 30 test_true_pfm: 5486.168769509321 sim_pfm: 190.64930931482618
episode: 120 training return: tensor(307.8341, device='cuda:0')
episode: 121 training return: tensor(327.9618, device='cuda:0')
episode: 122 training return: tensor(209.4485, device='cuda:0')
episode: 123 training return: tensor(248.7963, device='cuda:0')
epoch: 31 test_true_pfm: 5417.211082380429 sim_pfm: 359.27009595781175
episode: 124 training return: tensor(300.8867, device='cuda:0')
episode: 125 training return: tensor(195.5583, device='cuda:0')
episode: 126 training return: tensor(154.9453, device='cuda:0')
episode: 127 training return: tensor(200.3284, device='cuda:0')
epoch: 32 test_true_pfm: 5419.9380284677745 sim_pfm: 235.95660940697417
episode: 128 training return: tensor(59.7566, device='cuda:0')
episode: 129 training return: tensor(248.7614, device='cuda:0')
episode: 130 training return: tensor(189.7509, device='cuda:0')
episode: 131 training return: tensor(189.5620, device='cuda:0')
epoch: 33 test_true_pfm: 5443.892716233352 sim_pfm: 238.79963863046336
episode: 132 training return: tensor(198.2087, device='cuda:0')
episode: 133 training return: tensor(83.5170, device='cuda:0')
episode: 134 training return: tensor(256.0075, device='cuda:0')
episode: 135 training return: tensor(268.2082, device='cuda:0')
epoch: 34 test_true_pfm: 5435.496813558177 sim_pfm: 223.7285121345194
episode: 136 training return: tensor(194.8136, device='cuda:0')
episode: 137 training return: tensor(292.2848, device='cuda:0')
episode: 138 training return: tensor(181.8447, device='cuda:0')
episode: 139 training return: tensor(217.9883, device='cuda:0')
epoch: 35 test_true_pfm: 5404.835562194707 sim_pfm: 262.83749416098
episode: 140 training return: tensor(118.2178, device='cuda:0')
episode: 141 training return: tensor(-680.0595, device='cuda:0')
episode: 142 training return: tensor(287.1919, device='cuda:0')
episode: 143 training return: tensor(117.2115, device='cuda:0')
epoch: 36 test_true_pfm: 5398.152851744564 sim_pfm: 286.90678926408873
episode: 144 training return: tensor(328.5114, device='cuda:0')
episode: 145 training return: tensor(109.3462, device='cuda:0')
episode: 146 training return: tensor(160.9874, device='cuda:0')
episode: 147 training return: tensor(140.5514, device='cuda:0')
epoch: 37 test_true_pfm: 5456.8558111658995 sim_pfm: 340.5754990437611
episode: 148 training return: tensor(290.4104, device='cuda:0')
episode: 149 training return: tensor(198.7945, device='cuda:0')
episode: 150 training return: tensor(225.4091, device='cuda:0')
episode: 151 training return: tensor(104.2235, device='cuda:0')
epoch: 38 test_true_pfm: 5394.225182172036 sim_pfm: 279.70039756706683
episode: 152 training return: tensor(242.8271, device='cuda:0')
episode: 153 training return: tensor(307.5518, device='cuda:0')
episode: 154 training return: tensor(251.4772, device='cuda:0')
episode: 155 training return: tensor(147.7649, device='cuda:0')
epoch: 39 test_true_pfm: 5384.759549865837 sim_pfm: 313.6392331002765
episode: 156 training return: tensor(171.6118, device='cuda:0')
episode: 157 training return: tensor(289.9715, device='cuda:0')
episode: 158 training return: tensor(160.4638, device='cuda:0')
episode: 159 training return: tensor(216.7950, device='cuda:0')
epoch: 40 test_true_pfm: 5537.226705491415 sim_pfm: 313.6158859678156
episode: 160 training return: tensor(164.2433, device='cuda:0')
episode: 161 training return: tensor(225.3732, device='cuda:0')
episode: 162 training return: tensor(177.0034, device='cuda:0')
episode: 163 training return: tensor(237.1569, device='cuda:0')
epoch: 41 test_true_pfm: 5524.914540893885 sim_pfm: 266.24474690270534
episode: 164 training return: tensor(214.8143, device='cuda:0')
episode: 165 training return: tensor(267.3555, device='cuda:0')
episode: 166 training return: tensor(242.9344, device='cuda:0')
episode: 167 training return: tensor(205.9513, device='cuda:0')
epoch: 42 test_true_pfm: 5479.380178213239 sim_pfm: 288.024729175823
episode: 168 training return: tensor(225.9051, device='cuda:0')
episode: 169 training return: tensor(156.6028, device='cuda:0')
episode: 170 training return: tensor(165.6048, device='cuda:0')
episode: 171 training return: tensor(225.2210, device='cuda:0')
epoch: 43 test_true_pfm: 5383.791127711323 sim_pfm: 265.43130502998247
episode: 172 training return: tensor(265.8012, device='cuda:0')
episode: 173 training return: tensor(154.2902, device='cuda:0')
episode: 174 training return: tensor(223.8316, device='cuda:0')
episode: 175 training return: tensor(165.6071, device='cuda:0')
epoch: 44 test_true_pfm: 5547.200544315235 sim_pfm: 367.10872648963897
episode: 176 training return: tensor(247.9763, device='cuda:0')
episode: 177 training return: tensor(253.3099, device='cuda:0')
episode: 178 training return: tensor(199.4888, device='cuda:0')
episode: 179 training return: tensor(256.3005, device='cuda:0')
epoch: 45 test_true_pfm: 5548.609135116232 sim_pfm: 370.163291289636
episode: 180 training return: tensor(257.3080, device='cuda:0')
episode: 181 training return: tensor(194.5513, device='cuda:0')
episode: 182 training return: tensor(283.6070, device='cuda:0')
episode: 183 training return: tensor(264.2095, device='cuda:0')
epoch: 46 test_true_pfm: 5515.827684777284 sim_pfm: 318.90991696126486
episode: 184 training return: tensor(116.7664, device='cuda:0')
episode: 185 training return: tensor(165.2109, device='cuda:0')
episode: 186 training return: tensor(192.5266, device='cuda:0')
episode: 187 training return: tensor(203.6380, device='cuda:0')
epoch: 47 test_true_pfm: 5486.4022535711 sim_pfm: 379.93933178889955
episode: 188 training return: tensor(341.2353, device='cuda:0')
episode: 189 training return: tensor(153.5239, device='cuda:0')
episode: 190 training return: tensor(89.6390, device='cuda:0')
episode: 191 training return: tensor(175.0137, device='cuda:0')
epoch: 48 test_true_pfm: 5470.184522964754 sim_pfm: 308.8154927720413
episode: 192 training return: tensor(219.0157, device='cuda:0')
episode: 193 training return: tensor(338.4200, device='cuda:0')
episode: 194 training return: tensor(329.9655, device='cuda:0')
episode: 195 training return: tensor(230.6046, device='cuda:0')
epoch: 49 test_true_pfm: 5503.110507757442 sim_pfm: 312.2243509062585
episode: 196 training return: tensor(276.5085, device='cuda:0')
episode: 197 training return: tensor(298.6367, device='cuda:0')
episode: 198 training return: tensor(279.8491, device='cuda:0')
episode: 199 training return: tensor(172.5435, device='cuda:0')
epoch: 50 test_true_pfm: 5507.097464563713 sim_pfm: 363.71231249840156
episode: 200 training return: tensor(103.6186, device='cuda:0')
episode: 201 training return: tensor(308.1227, device='cuda:0')
episode: 202 training return: tensor(240.8538, device='cuda:0')
episode: 203 training return: tensor(224.2448, device='cuda:0')
epoch: 51 test_true_pfm: 5450.157106782756 sim_pfm: 359.0090132773233
episode: 204 training return: tensor(285.9651, device='cuda:0')
episode: 205 training return: tensor(229.4614, device='cuda:0')
episode: 206 training return: tensor(298.1388, device='cuda:0')
episode: 207 training return: tensor(290.8491, device='cuda:0')
epoch: 52 test_true_pfm: 5549.650023997542 sim_pfm: 338.49514151969925
episode: 208 training return: tensor(297.3587, device='cuda:0')
episode: 209 training return: tensor(288.2972, device='cuda:0')
episode: 210 training return: tensor(194.6228, device='cuda:0')
episode: 211 training return: tensor(274.2879, device='cuda:0')
epoch: 53 test_true_pfm: 5535.617127057543 sim_pfm: 290.7740595949581
episode: 212 training return: tensor(288.4662, device='cuda:0')
episode: 213 training return: tensor(291.0738, device='cuda:0')
episode: 214 training return: tensor(391.2072, device='cuda:0')
episode: 215 training return: tensor(243.0981, device='cuda:0')
epoch: 54 test_true_pfm: 5544.941316808438 sim_pfm: 376.7330479999849
episode: 216 training return: tensor(335.6683, device='cuda:0')
episode: 217 training return: tensor(308.6245, device='cuda:0')
episode: 218 training return: tensor(98.4198, device='cuda:0')
episode: 219 training return: tensor(250.3511, device='cuda:0')
epoch: 55 test_true_pfm: 5573.306140698798 sim_pfm: 408.3066114455772
episode: 220 training return: tensor(370.4281, device='cuda:0')
episode: 221 training return: tensor(288.0752, device='cuda:0')
episode: 222 training return: tensor(276.9453, device='cuda:0')
episode: 223 training return: tensor(340.5129, device='cuda:0')
epoch: 56 test_true_pfm: 5522.190724828685 sim_pfm: 335.73868783190846
episode: 224 training return: tensor(268.8610, device='cuda:0')
episode: 225 training return: tensor(300.0639, device='cuda:0')
episode: 226 training return: tensor(129.9109, device='cuda:0')
episode: 227 training return: tensor(342.0578, device='cuda:0')
epoch: 57 test_true_pfm: 5509.670065065115 sim_pfm: 354.0949452192096
episode: 228 training return: tensor(165.8712, device='cuda:0')
episode: 229 training return: tensor(323.4124, device='cuda:0')
episode: 230 training return: tensor(281.7009, device='cuda:0')
episode: 231 training return: tensor(300.9061, device='cuda:0')
epoch: 58 test_true_pfm: 5586.822145266576 sim_pfm: 380.7584196371802
episode: 232 training return: tensor(216.9467, device='cuda:0')
episode: 233 training return: tensor(308.8058, device='cuda:0')
episode: 234 training return: tensor(317.5633, device='cuda:0')
episode: 235 training return: tensor(345.0161, device='cuda:0')
epoch: 59 test_true_pfm: 5583.621701612664 sim_pfm: 397.6829779901891
episode: 236 training return: tensor(192.5616, device='cuda:0')
episode: 237 training return: tensor(269.1971, device='cuda:0')
episode: 238 training return: tensor(316.1849, device='cuda:0')
episode: 239 training return: tensor(234.0839, device='cuda:0')
epoch: 60 test_true_pfm: 5488.570064042302 sim_pfm: 410.3535672549527
episode: 240 training return: tensor(324.6741, device='cuda:0')
episode: 241 training return: tensor(279.2924, device='cuda:0')
episode: 242 training return: tensor(88.9743, device='cuda:0')
episode: 243 training return: tensor(294.3149, device='cuda:0')
epoch: 61 test_true_pfm: 5542.249731015152 sim_pfm: 352.859821590983
episode: 244 training return: tensor(272.9881, device='cuda:0')
episode: 245 training return: tensor(213.8505, device='cuda:0')
episode: 246 training return: tensor(311.6884, device='cuda:0')
episode: 247 training return: tensor(244.2392, device='cuda:0')
epoch: 62 test_true_pfm: 5530.19336091875 sim_pfm: 399.4447273165958
episode: 248 training return: tensor(236.1507, device='cuda:0')
episode: 249 training return: tensor(231.1550, device='cuda:0')
episode: 250 training return: tensor(290.5767, device='cuda:0')
episode: 251 training return: tensor(221.2256, device='cuda:0')
epoch: 63 test_true_pfm: 5567.631967345354 sim_pfm: 411.9807907003754
episode: 252 training return: tensor(249.1077, device='cuda:0')
episode: 253 training return: tensor(230.4621, device='cuda:0')
episode: 254 training return: tensor(329.5838, device='cuda:0')
episode: 255 training return: tensor(231.3906, device='cuda:0')
epoch: 64 test_true_pfm: 5568.8630464965145 sim_pfm: 385.0946416759495
episode: 256 training return: tensor(227.8371, device='cuda:0')
episode: 257 training return: tensor(313.6710, device='cuda:0')
episode: 258 training return: tensor(210.2738, device='cuda:0')
episode: 259 training return: tensor(316.4391, device='cuda:0')
epoch: 65 test_true_pfm: 5613.760190491853 sim_pfm: 396.4821373289645
episode: 260 training return: tensor(277.4803, device='cuda:0')
episode: 261 training return: tensor(105.0500, device='cuda:0')
episode: 262 training return: tensor(221.9724, device='cuda:0')
episode: 263 training return: tensor(284.5306, device='cuda:0')
epoch: 66 test_true_pfm: 5533.657896313641 sim_pfm: 346.49111850966193
episode: 264 training return: tensor(413.5664, device='cuda:0')
episode: 265 training return: tensor(288.5746, device='cuda:0')
episode: 266 training return: tensor(392.1784, device='cuda:0')
episode: 267 training return: tensor(234.5114, device='cuda:0')
epoch: 67 test_true_pfm: 5573.426511659062 sim_pfm: 350.83885184128303
episode: 268 training return: tensor(315.1657, device='cuda:0')
episode: 269 training return: tensor(361.0351, device='cuda:0')
episode: 270 training return: tensor(339.6926, device='cuda:0')
episode: 271 training return: tensor(404.4177, device='cuda:0')
epoch: 68 test_true_pfm: 5495.502477888836 sim_pfm: 371.9660507370718
episode: 272 training return: tensor(264.8433, device='cuda:0')
episode: 273 training return: tensor(418.0008, device='cuda:0')
episode: 274 training return: tensor(298.5201, device='cuda:0')
episode: 275 training return: tensor(179.6126, device='cuda:0')
epoch: 69 test_true_pfm: 5571.758696675733 sim_pfm: 395.2444238552901
episode: 276 training return: tensor(194.3934, device='cuda:0')
episode: 277 training return: tensor(276.2295, device='cuda:0')
episode: 278 training return: tensor(224.9659, device='cuda:0')
episode: 279 training return: tensor(316.6991, device='cuda:0')
epoch: 70 test_true_pfm: 5655.039595092366 sim_pfm: 380.6073866580652
episode: 280 training return: tensor(203.4984, device='cuda:0')
episode: 281 training return: tensor(260.5855, device='cuda:0')
episode: 282 training return: tensor(336.1577, device='cuda:0')
episode: 283 training return: tensor(284.9835, device='cuda:0')
epoch: 71 test_true_pfm: 5676.661238787569 sim_pfm: 376.0066991321801
episode: 284 training return: tensor(377.1087, device='cuda:0')
episode: 285 training return: tensor(332.6339, device='cuda:0')
episode: 286 training return: tensor(264.3619, device='cuda:0')
episode: 287 training return: tensor(281.8839, device='cuda:0')
epoch: 72 test_true_pfm: 5606.511264559656 sim_pfm: 349.6778417734119
episode: 288 training return: tensor(257.7075, device='cuda:0')
episode: 289 training return: tensor(419.3044, device='cuda:0')
episode: 290 training return: tensor(323.5522, device='cuda:0')
episode: 291 training return: tensor(319.0587, device='cuda:0')
epoch: 73 test_true_pfm: 5574.480437957656 sim_pfm: 360.0139967984481
episode: 292 training return: tensor(304.5421, device='cuda:0')
episode: 293 training return: tensor(252.4552, device='cuda:0')
episode: 294 training return: tensor(221.2906, device='cuda:0')
episode: 295 training return: tensor(152.1277, device='cuda:0')
epoch: 74 test_true_pfm: 5609.646579601865 sim_pfm: 369.0786845764281
episode: 296 training return: tensor(330.8858, device='cuda:0')
episode: 297 training return: tensor(249.0540, device='cuda:0')
episode: 298 training return: tensor(280.2696, device='cuda:0')
episode: 299 training return: tensor(211.4016, device='cuda:0')
epoch: 75 test_true_pfm: 5600.873624971169 sim_pfm: 408.10188659226213
episode: 300 training return: tensor(396.8026, device='cuda:0')
episode: 301 training return: tensor(287.1526, device='cuda:0')
episode: 302 training return: tensor(349.2388, device='cuda:0')
episode: 303 training return: tensor(321.4401, device='cuda:0')
epoch: 76 test_true_pfm: 5661.767052484203 sim_pfm: 430.37323614931665
episode: 304 training return: tensor(256.5773, device='cuda:0')
episode: 305 training return: tensor(292.2897, device='cuda:0')
episode: 306 training return: tensor(369.7833, device='cuda:0')
episode: 307 training return: tensor(353.6812, device='cuda:0')
epoch: 77 test_true_pfm: 5549.619576507696 sim_pfm: 445.9067617771895
episode: 308 training return: tensor(187.8744, device='cuda:0')
episode: 309 training return: tensor(246.2664, device='cuda:0')
episode: 310 training return: tensor(325.2208, device='cuda:0')
episode: 311 training return: tensor(403.1227, device='cuda:0')
epoch: 78 test_true_pfm: 5523.598330652459 sim_pfm: 391.24396573774476
episode: 312 training return: tensor(224.8753, device='cuda:0')
episode: 313 training return: tensor(352.2823, device='cuda:0')
episode: 314 training return: tensor(213.8574, device='cuda:0')
episode: 315 training return: tensor(357.5560, device='cuda:0')
epoch: 79 test_true_pfm: 5560.097220986648 sim_pfm: 373.0648166778653
episode: 316 training return: tensor(285.1026, device='cuda:0')
episode: 317 training return: tensor(304.5643, device='cuda:0')
episode: 318 training return: tensor(383.1364, device='cuda:0')
episode: 319 training return: tensor(301.8994, device='cuda:0')
epoch: 80 test_true_pfm: 5502.080472409066 sim_pfm: 353.006249881495
episode: 320 training return: tensor(133.5921, device='cuda:0')
episode: 321 training return: tensor(106.2668, device='cuda:0')
episode: 322 training return: tensor(241.7776, device='cuda:0')
episode: 323 training return: tensor(392.4055, device='cuda:0')
epoch: 81 test_true_pfm: 5642.525816845253 sim_pfm: 399.03547642122913
episode: 324 training return: tensor(365.6025, device='cuda:0')
episode: 325 training return: tensor(268.1809, device='cuda:0')
episode: 326 training return: tensor(399.8207, device='cuda:0')
episode: 327 training return: tensor(389.6696, device='cuda:0')
epoch: 82 test_true_pfm: 5567.546756637087 sim_pfm: 416.58512422568555
episode: 328 training return: tensor(237.6211, device='cuda:0')
episode: 329 training return: tensor(346.2480, device='cuda:0')
episode: 330 training return: tensor(313.9641, device='cuda:0')
episode: 331 training return: tensor(308.8603, device='cuda:0')
epoch: 83 test_true_pfm: 5605.357417386426 sim_pfm: 363.8454149065462
episode: 332 training return: tensor(304.0714, device='cuda:0')
episode: 333 training return: tensor(264.0519, device='cuda:0')
episode: 334 training return: tensor(221.2666, device='cuda:0')
episode: 335 training return: tensor(119.9643, device='cuda:0')
epoch: 84 test_true_pfm: 5615.97956413835 sim_pfm: 407.5173313757211
episode: 336 training return: tensor(292.6689, device='cuda:0')
episode: 337 training return: tensor(309.8994, device='cuda:0')
episode: 338 training return: tensor(248.8773, device='cuda:0')
episode: 339 training return: tensor(352.2911, device='cuda:0')
epoch: 85 test_true_pfm: 5550.47139745035 sim_pfm: 372.455216512831
episode: 340 training return: tensor(298.1881, device='cuda:0')
episode: 341 training return: tensor(314.4739, device='cuda:0')
episode: 342 training return: tensor(172.2895, device='cuda:0')
episode: 343 training return: tensor(345.3986, device='cuda:0')
epoch: 86 test_true_pfm: 5654.1389968912445 sim_pfm: 453.90585023533396
episode: 344 training return: tensor(295.5321, device='cuda:0')
episode: 345 training return: tensor(240.2433, device='cuda:0')
episode: 346 training return: tensor(442.7080, device='cuda:0')
episode: 347 training return: tensor(335.6386, device='cuda:0')
epoch: 87 test_true_pfm: 5567.377151389216 sim_pfm: 421.0797076830834
episode: 348 training return: tensor(337.7709, device='cuda:0')
episode: 349 training return: tensor(156.4329, device='cuda:0')
episode: 350 training return: tensor(367.3669, device='cuda:0')
episode: 351 training return: tensor(343.1152, device='cuda:0')
epoch: 88 test_true_pfm: 5639.077129584366 sim_pfm: 432.0122291057293
episode: 352 training return: tensor(315.2426, device='cuda:0')
episode: 353 training return: tensor(260.0396, device='cuda:0')
episode: 354 training return: tensor(171.7193, device='cuda:0')
episode: 355 training return: tensor(190.0985, device='cuda:0')
epoch: 89 test_true_pfm: 5580.848127134705 sim_pfm: 423.94391829501063
episode: 356 training return: tensor(313.9242, device='cuda:0')
episode: 357 training return: tensor(310.8631, device='cuda:0')
episode: 358 training return: tensor(354.4034, device='cuda:0')
episode: 359 training return: tensor(338.0170, device='cuda:0')
epoch: 90 test_true_pfm: 5647.975958770883 sim_pfm: 408.8501262348339
episode: 360 training return: tensor(401.3749, device='cuda:0')
episode: 361 training return: tensor(360.7426, device='cuda:0')
episode: 362 training return: tensor(405.2813, device='cuda:0')
episode: 363 training return: tensor(210.3767, device='cuda:0')
epoch: 91 test_true_pfm: 5717.247295279964 sim_pfm: 379.8531483700305
episode: 364 training return: tensor(372.0566, device='cuda:0')
episode: 365 training return: tensor(341.7144, device='cuda:0')
episode: 366 training return: tensor(395.3468, device='cuda:0')
episode: 367 training return: tensor(260.7219, device='cuda:0')
epoch: 92 test_true_pfm: 5573.928859634652 sim_pfm: 428.5648703671856
episode: 368 training return: tensor(210.4949, device='cuda:0')
episode: 369 training return: tensor(317.4822, device='cuda:0')
episode: 370 training return: tensor(310.7499, device='cuda:0')
episode: 371 training return: tensor(296.3570, device='cuda:0')
epoch: 93 test_true_pfm: 5668.494597011824 sim_pfm: 438.4102841312221
episode: 372 training return: tensor(372.6205, device='cuda:0')
episode: 373 training return: tensor(371.9779, device='cuda:0')
episode: 374 training return: tensor(351.9912, device='cuda:0')
episode: 375 training return: tensor(203.7291, device='cuda:0')
epoch: 94 test_true_pfm: 5558.301562190237 sim_pfm: 449.48048569009796
episode: 376 training return: tensor(208.6459, device='cuda:0')
episode: 377 training return: tensor(318.6646, device='cuda:0')
episode: 378 training return: tensor(305.4133, device='cuda:0')
episode: 379 training return: tensor(354.8966, device='cuda:0')
epoch: 95 test_true_pfm: 5654.25584400177 sim_pfm: 417.43553913972573
episode: 380 training return: tensor(362.5823, device='cuda:0')
episode: 381 training return: tensor(254.2563, device='cuda:0')
episode: 382 training return: tensor(315.4840, device='cuda:0')
episode: 383 training return: tensor(293.2712, device='cuda:0')
epoch: 96 test_true_pfm: 5539.285663348237 sim_pfm: 410.2444254131794
episode: 384 training return: tensor(363.9773, device='cuda:0')
episode: 385 training return: tensor(224.0743, device='cuda:0')
episode: 386 training return: tensor(324.9803, device='cuda:0')
episode: 387 training return: tensor(218.7226, device='cuda:0')
epoch: 97 test_true_pfm: 5666.875428932394 sim_pfm: 439.88506104643847
episode: 388 training return: tensor(284.8632, device='cuda:0')
episode: 389 training return: tensor(344.6940, device='cuda:0')
episode: 390 training return: tensor(310.8023, device='cuda:0')
episode: 391 training return: tensor(255.9589, device='cuda:0')
epoch: 98 test_true_pfm: 5563.787310114084 sim_pfm: 429.2355578578038
episode: 392 training return: tensor(359.0132, device='cuda:0')
episode: 393 training return: tensor(358.3974, device='cuda:0')
episode: 394 training return: tensor(217.7139, device='cuda:0')
episode: 395 training return: tensor(304.2292, device='cuda:0')
epoch: 99 test_true_pfm: 5566.064654870308 sim_pfm: 415.8022467745662
episode: 396 training return: tensor(215.9080, device='cuda:0')
episode: 397 training return: tensor(385.6365, device='cuda:0')
episode: 398 training return: tensor(297.2468, device='cuda:0')
episode: 399 training return: tensor(300.7005, device='cuda:0')
epoch: 100 test_true_pfm: 5676.472414463739 sim_pfm: 443.10998849050765
episode: 400 training return: tensor(280.5348, device='cuda:0')
episode: 401 training return: tensor(314.7021, device='cuda:0')
episode: 402 training return: tensor(333.0818, device='cuda:0')
episode: 403 training return: tensor(394.6689, device='cuda:0')
epoch: 101 test_true_pfm: 5546.4185448989765 sim_pfm: 452.8054492926264
episode: 404 training return: tensor(290.1018, device='cuda:0')
episode: 405 training return: tensor(352.3307, device='cuda:0')
episode: 406 training return: tensor(320.3279, device='cuda:0')
episode: 407 training return: tensor(373.9047, device='cuda:0')
epoch: 102 test_true_pfm: 5650.683511712785 sim_pfm: 411.57892328682163
episode: 408 training return: tensor(375.3315, device='cuda:0')
episode: 409 training return: tensor(319.2360, device='cuda:0')
episode: 410 training return: tensor(266.9944, device='cuda:0')
episode: 411 training return: tensor(298.1313, device='cuda:0')
epoch: 103 test_true_pfm: 5642.0108760957955 sim_pfm: 423.72956044600386
episode: 412 training return: tensor(315.1827, device='cuda:0')
episode: 413 training return: tensor(295.5268, device='cuda:0')
episode: 414 training return: tensor(324.0089, device='cuda:0')
episode: 415 training return: tensor(293.6126, device='cuda:0')
epoch: 104 test_true_pfm: 5678.487374548211 sim_pfm: 429.30290482620086
episode: 416 training return: tensor(259.3037, device='cuda:0')
episode: 417 training return: tensor(354.6912, device='cuda:0')
episode: 418 training return: tensor(289.7082, device='cuda:0')
episode: 419 training return: tensor(404.9002, device='cuda:0')
epoch: 105 test_true_pfm: 5651.62860216309 sim_pfm: 352.2568524009548
episode: 420 training return: tensor(387.8969, device='cuda:0')
episode: 421 training return: tensor(311.0559, device='cuda:0')
episode: 422 training return: tensor(239.1426, device='cuda:0')
episode: 423 training return: tensor(308.4491, device='cuda:0')
epoch: 106 test_true_pfm: 5665.0644823121775 sim_pfm: 422.6436598790654
episode: 424 training return: tensor(316.4071, device='cuda:0')
episode: 425 training return: tensor(319.5326, device='cuda:0')
episode: 426 training return: tensor(309.6455, device='cuda:0')
episode: 427 training return: tensor(305.9078, device='cuda:0')
epoch: 107 test_true_pfm: 5628.262148897454 sim_pfm: 451.25042727715726
episode: 428 training return: tensor(271.6077, device='cuda:0')
episode: 429 training return: tensor(362.8640, device='cuda:0')
episode: 430 training return: tensor(305.9678, device='cuda:0')
episode: 431 training return: tensor(360.8559, device='cuda:0')
epoch: 108 test_true_pfm: 5734.190441938161 sim_pfm: 429.8224354610429
episode: 432 training return: tensor(332.4036, device='cuda:0')
episode: 433 training return: tensor(269.7712, device='cuda:0')
episode: 434 training return: tensor(302.9647, device='cuda:0')
episode: 435 training return: tensor(284.5701, device='cuda:0')
epoch: 109 test_true_pfm: 5702.519080546455 sim_pfm: 479.302712916707
episode: 436 training return: tensor(281.7960, device='cuda:0')
episode: 437 training return: tensor(442.3044, device='cuda:0')
episode: 438 training return: tensor(401.2978, device='cuda:0')
episode: 439 training return: tensor(410.1255, device='cuda:0')
epoch: 110 test_true_pfm: 5646.179831760145 sim_pfm: 457.7331466082833
episode: 440 training return: tensor(250.2038, device='cuda:0')
episode: 441 training return: tensor(361.6213, device='cuda:0')
episode: 442 training return: tensor(335.3392, device='cuda:0')
episode: 443 training return: tensor(328.3201, device='cuda:0')
epoch: 111 test_true_pfm: 5640.130949033679 sim_pfm: 454.15692109040293
episode: 444 training return: tensor(235.1046, device='cuda:0')
episode: 445 training return: tensor(327.1666, device='cuda:0')
episode: 446 training return: tensor(404.4556, device='cuda:0')
episode: 447 training return: tensor(294.2357, device='cuda:0')
epoch: 112 test_true_pfm: 5553.316177838453 sim_pfm: 423.8988804232795
episode: 448 training return: tensor(270.5690, device='cuda:0')
episode: 449 training return: tensor(254.3364, device='cuda:0')
episode: 450 training return: tensor(360.2240, device='cuda:0')
episode: 451 training return: tensor(224.5653, device='cuda:0')
epoch: 113 test_true_pfm: 5627.5294451910095 sim_pfm: 428.7444116289941
episode: 452 training return: tensor(339.9210, device='cuda:0')
episode: 453 training return: tensor(276.6462, device='cuda:0')
episode: 454 training return: tensor(180.9579, device='cuda:0')
episode: 455 training return: tensor(331.2322, device='cuda:0')
epoch: 114 test_true_pfm: 5708.960756641595 sim_pfm: 433.49769112764625
episode: 456 training return: tensor(237.0876, device='cuda:0')
episode: 457 training return: tensor(398.8963, device='cuda:0')
episode: 458 training return: tensor(275.9138, device='cuda:0')
episode: 459 training return: tensor(364.7877, device='cuda:0')
epoch: 115 test_true_pfm: 5584.738163250981 sim_pfm: 466.82429725228576
episode: 460 training return: tensor(263.2105, device='cuda:0')
episode: 461 training return: tensor(327.7957, device='cuda:0')
episode: 462 training return: tensor(286.2621, device='cuda:0')
episode: 463 training return: tensor(453.8495, device='cuda:0')
epoch: 116 test_true_pfm: 5745.791712464664 sim_pfm: 416.74421761682606
episode: 464 training return: tensor(262.9343, device='cuda:0')
episode: 465 training return: tensor(323.0559, device='cuda:0')
episode: 466 training return: tensor(419.8471, device='cuda:0')
episode: 467 training return: tensor(442.6472, device='cuda:0')
epoch: 117 test_true_pfm: 5678.069028702884 sim_pfm: 499.8916612370813
episode: 468 training return: tensor(372.9460, device='cuda:0')
episode: 469 training return: tensor(378.0135, device='cuda:0')
episode: 470 training return: tensor(392.1946, device='cuda:0')
episode: 471 training return: tensor(284.0266, device='cuda:0')
epoch: 118 test_true_pfm: 5589.269303881253 sim_pfm: 430.0669250074231
episode: 472 training return: tensor(315.8925, device='cuda:0')
episode: 473 training return: tensor(317.5268, device='cuda:0')
episode: 474 training return: tensor(287.1747, device='cuda:0')
episode: 475 training return: tensor(325.6949, device='cuda:0')
epoch: 119 test_true_pfm: 5700.481850876772 sim_pfm: 410.3415034714756
episode: 476 training return: tensor(298.4196, device='cuda:0')
episode: 477 training return: tensor(345.3343, device='cuda:0')
episode: 478 training return: tensor(276.6911, device='cuda:0')
episode: 479 training return: tensor(374.4185, device='cuda:0')
epoch: 120 test_true_pfm: 5684.309000108242 sim_pfm: 445.5189431607723
episode: 480 training return: tensor(410.8944, device='cuda:0')
episode: 481 training return: tensor(285.5118, device='cuda:0')
episode: 482 training return: tensor(358.0771, device='cuda:0')
episode: 483 training return: tensor(200.4510, device='cuda:0')
epoch: 121 test_true_pfm: 5636.173844425829 sim_pfm: 451.57919515882776
episode: 484 training return: tensor(357.2026, device='cuda:0')
episode: 485 training return: tensor(294.7299, device='cuda:0')
episode: 486 training return: tensor(203.2518, device='cuda:0')
episode: 487 training return: tensor(310.7882, device='cuda:0')
epoch: 122 test_true_pfm: 5673.775287935549 sim_pfm: 410.5412608535941
episode: 488 training return: tensor(422.4886, device='cuda:0')
episode: 489 training return: tensor(345.4587, device='cuda:0')
episode: 490 training return: tensor(263.7741, device='cuda:0')
episode: 491 training return: tensor(231.0778, device='cuda:0')
epoch: 123 test_true_pfm: 5695.5583528442185 sim_pfm: 430.31369237973314
episode: 492 training return: tensor(282.1245, device='cuda:0')
episode: 493 training return: tensor(305.8839, device='cuda:0')
episode: 494 training return: tensor(275.5489, device='cuda:0')
episode: 495 training return: tensor(282.3355, device='cuda:0')
epoch: 124 test_true_pfm: 5691.36816598524 sim_pfm: 450.6337195353117
episode: 496 training return: tensor(330.6262, device='cuda:0')
episode: 497 training return: tensor(379.0106, device='cuda:0')
episode: 498 training return: tensor(417.6051, device='cuda:0')
episode: 499 training return: tensor(350.0927, device='cuda:0')
epoch: 125 test_true_pfm: 5742.3155171674125 sim_pfm: 423.22051853480906
episode: 500 training return: tensor(371.4677, device='cuda:0')
episode: 501 training return: tensor(360.8148, device='cuda:0')
episode: 502 training return: tensor(404.4031, device='cuda:0')
episode: 503 training return: tensor(346.2018, device='cuda:0')
epoch: 126 test_true_pfm: 5756.473058559568 sim_pfm: 453.3539657962974
episode: 504 training return: tensor(241.7195, device='cuda:0')
episode: 505 training return: tensor(403.9468, device='cuda:0')
episode: 506 training return: tensor(321.0241, device='cuda:0')
episode: 507 training return: tensor(248.5454, device='cuda:0')
epoch: 127 test_true_pfm: 5655.264069158744 sim_pfm: 455.0815468672469
episode: 508 training return: tensor(375.7251, device='cuda:0')
episode: 509 training return: tensor(318.4422, device='cuda:0')
episode: 510 training return: tensor(282.7301, device='cuda:0')
episode: 511 training return: tensor(404.5927, device='cuda:0')
epoch: 128 test_true_pfm: 5686.211027049765 sim_pfm: 410.94375736576814
episode: 512 training return: tensor(329.9666, device='cuda:0')
episode: 513 training return: tensor(217.9540, device='cuda:0')
episode: 514 training return: tensor(420.6712, device='cuda:0')
episode: 515 training return: tensor(357.6053, device='cuda:0')
epoch: 129 test_true_pfm: 5748.369774670665 sim_pfm: 418.99348270337214
episode: 516 training return: tensor(278.4109, device='cuda:0')
episode: 517 training return: tensor(324.6376, device='cuda:0')
episode: 518 training return: tensor(290.9491, device='cuda:0')
episode: 519 training return: tensor(260.4053, device='cuda:0')
epoch: 130 test_true_pfm: 5690.433111594552 sim_pfm: 462.29067663894966
episode: 520 training return: tensor(333.8161, device='cuda:0')
episode: 521 training return: tensor(411.0502, device='cuda:0')
episode: 522 training return: tensor(194.3984, device='cuda:0')
episode: 523 training return: tensor(247.9943, device='cuda:0')
epoch: 131 test_true_pfm: 5673.8300362528635 sim_pfm: 403.2106761521815
episode: 524 training return: tensor(323.8685, device='cuda:0')
episode: 525 training return: tensor(323.1542, device='cuda:0')
episode: 526 training return: tensor(350.5131, device='cuda:0')
episode: 527 training return: tensor(325.5524, device='cuda:0')
epoch: 132 test_true_pfm: 5645.933230736817 sim_pfm: 481.4409972029777
episode: 528 training return: tensor(330.9382, device='cuda:0')
episode: 529 training return: tensor(365.2116, device='cuda:0')
episode: 530 training return: tensor(383.2409, device='cuda:0')
episode: 531 training return: tensor(397.3530, device='cuda:0')
epoch: 133 test_true_pfm: 5640.731194939777 sim_pfm: 471.13510411368526
episode: 532 training return: tensor(388.4695, device='cuda:0')
episode: 533 training return: tensor(237.4396, device='cuda:0')
episode: 534 training return: tensor(314.9530, device='cuda:0')
episode: 535 training return: tensor(298.0706, device='cuda:0')
epoch: 134 test_true_pfm: 5789.864756454549 sim_pfm: 436.31808226877666
episode: 536 training return: tensor(332.3123, device='cuda:0')
episode: 537 training return: tensor(447.3416, device='cuda:0')
episode: 538 training return: tensor(426.4797, device='cuda:0')
episode: 539 training return: tensor(202.0798, device='cuda:0')
epoch: 135 test_true_pfm: 5648.500346333095 sim_pfm: 464.1147758672402
episode: 540 training return: tensor(286.2162, device='cuda:0')
episode: 541 training return: tensor(376.4589, device='cuda:0')
episode: 542 training return: tensor(236.5519, device='cuda:0')
episode: 543 training return: tensor(337.0852, device='cuda:0')
epoch: 136 test_true_pfm: 5686.150386185709 sim_pfm: 421.85572284568724
episode: 544 training return: tensor(404.5739, device='cuda:0')
episode: 545 training return: tensor(338.2678, device='cuda:0')
episode: 546 training return: tensor(384.6521, device='cuda:0')
episode: 547 training return: tensor(283.4074, device='cuda:0')
epoch: 137 test_true_pfm: 5625.817968462116 sim_pfm: 473.31358255442075
episode: 548 training return: tensor(389.0241, device='cuda:0')
episode: 549 training return: tensor(303.7635, device='cuda:0')
episode: 550 training return: tensor(392.1800, device='cuda:0')
episode: 551 training return: tensor(351.4948, device='cuda:0')
epoch: 138 test_true_pfm: 5656.550543026552 sim_pfm: 465.0874912991033
episode: 552 training return: tensor(342.3071, device='cuda:0')
episode: 553 training return: tensor(369.3131, device='cuda:0')
episode: 554 training return: tensor(322.3636, device='cuda:0')
episode: 555 training return: tensor(324.1380, device='cuda:0')
epoch: 139 test_true_pfm: 5725.792177366842 sim_pfm: 503.8076785743081
episode: 556 training return: tensor(337.4189, device='cuda:0')
episode: 557 training return: tensor(370.1710, device='cuda:0')
episode: 558 training return: tensor(293.2235, device='cuda:0')
episode: 559 training return: tensor(391.1554, device='cuda:0')
epoch: 140 test_true_pfm: 5650.567991825258 sim_pfm: 401.55520151846576
episode: 560 training return: tensor(211.9151, device='cuda:0')
episode: 561 training return: tensor(335.1649, device='cuda:0')
episode: 562 training return: tensor(348.8238, device='cuda:0')
episode: 563 training return: tensor(420.0084, device='cuda:0')
epoch: 141 test_true_pfm: 5722.753211609704 sim_pfm: 442.25853001954965
episode: 564 training return: tensor(387.7379, device='cuda:0')
episode: 565 training return: tensor(298.3335, device='cuda:0')
episode: 566 training return: tensor(370.7763, device='cuda:0')
episode: 567 training return: tensor(349.5897, device='cuda:0')
epoch: 142 test_true_pfm: 5642.63774063646 sim_pfm: 509.44768895594945
episode: 568 training return: tensor(297.6935, device='cuda:0')
episode: 569 training return: tensor(322.0812, device='cuda:0')
episode: 570 training return: tensor(267.2079, device='cuda:0')
episode: 571 training return: tensor(333.5953, device='cuda:0')
epoch: 143 test_true_pfm: 5691.832975116416 sim_pfm: 385.5282126038025
episode: 572 training return: tensor(393.0060, device='cuda:0')
episode: 573 training return: tensor(281.1139, device='cuda:0')
episode: 574 training return: tensor(338.8409, device='cuda:0')
episode: 575 training return: tensor(262.2414, device='cuda:0')
epoch: 144 test_true_pfm: 5718.638337609806 sim_pfm: 469.84702750631067
episode: 576 training return: tensor(287.3149, device='cuda:0')
episode: 577 training return: tensor(359.0060, device='cuda:0')
episode: 578 training return: tensor(369.9969, device='cuda:0')
episode: 579 training return: tensor(344.3947, device='cuda:0')
epoch: 145 test_true_pfm: 5626.20968731618 sim_pfm: 425.2747113692264
episode: 580 training return: tensor(233.3206, device='cuda:0')
episode: 581 training return: tensor(387.9651, device='cuda:0')
episode: 582 training return: tensor(358.8514, device='cuda:0')
episode: 583 training return: tensor(360.2865, device='cuda:0')
epoch: 146 test_true_pfm: 5666.664586103001 sim_pfm: 428.5034821171139
episode: 584 training return: tensor(423.6332, device='cuda:0')
episode: 585 training return: tensor(276.3352, device='cuda:0')
episode: 586 training return: tensor(381.4007, device='cuda:0')
episode: 587 training return: tensor(371.3981, device='cuda:0')
epoch: 147 test_true_pfm: 5710.458405739413 sim_pfm: 511.22524621484143
episode: 588 training return: tensor(407.4982, device='cuda:0')
episode: 589 training return: tensor(362.4116, device='cuda:0')
episode: 590 training return: tensor(309.7664, device='cuda:0')
episode: 591 training return: tensor(359.1205, device='cuda:0')
epoch: 148 test_true_pfm: 5726.746513726248 sim_pfm: 478.11075832376565
episode: 592 training return: tensor(277.0479, device='cuda:0')
episode: 593 training return: tensor(356.3818, device='cuda:0')
episode: 594 training return: tensor(402.1291, device='cuda:0')
episode: 595 training return: tensor(334.5457, device='cuda:0')
epoch: 149 test_true_pfm: 5659.728844073633 sim_pfm: 477.8081911982542
episode: 596 training return: tensor(236.5593, device='cuda:0')
episode: 597 training return: tensor(194.0592, device='cuda:0')
episode: 598 training return: tensor(293.9162, device='cuda:0')
episode: 599 training return: tensor(237.0757, device='cuda:0')
epoch: 150 test_true_pfm: 5685.066016145218 sim_pfm: 398.2419554935768
