['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'behavior', '--traj', 'expert', '--seed', '3']
epoch: 0 training_loss 0.2703856561332941 test_loss: 0.20103952884674073
epoch: 1 training_loss 0.19443281278014182 test_loss: 0.20310792922973633
epoch: 2 training_loss 0.16909368999302388 test_loss: 0.1593814969062805
epoch: 3 training_loss 0.15685712706297636 test_loss: 0.1492013692855835
epoch: 4 training_loss 0.14809204015880822 test_loss: 0.13139686584472657
epoch: 5 training_loss 0.1490208848565817 test_loss: 0.16364028453826904
epoch: 6 training_loss 0.138599450327456 test_loss: 0.1452581286430359
epoch: 7 training_loss 0.14504179190844296 test_loss: 0.14136254787445068
epoch: 8 training_loss 0.13427286371588706 test_loss: 0.12953929901123046
epoch: 9 training_loss 0.14889943782240153 test_loss: 0.1293414831161499
epoch: 10 training_loss 0.13166653137654066 test_loss: 0.14160031080245972
epoch: 11 training_loss 0.1258191093429923 test_loss: 0.1401475191116333
epoch: 12 training_loss 0.13119258169084788 test_loss: 0.11299500465393067
epoch: 13 training_loss 0.12973599322140217 test_loss: 0.13837341070175171
epoch: 14 training_loss 0.13630108758807183 test_loss: 0.11335633993148804
epoch: 15 training_loss 0.12602343019098045 test_loss: 0.1375973105430603
epoch: 16 training_loss 0.12216017421334982 test_loss: 0.12876765727996825
epoch: 17 training_loss 0.1310533818975091 test_loss: 0.1302562952041626
epoch: 18 training_loss 0.13627001103013753 test_loss: 0.14049978256225587
epoch: 19 training_loss 0.12937403101474046 test_loss: 0.13024526834487915
epoch: 20 training_loss 0.11847832586616278 test_loss: 0.11617577075958252
epoch: 21 training_loss 0.12215888470411301 test_loss: 0.12389118671417236
epoch: 22 training_loss 0.13188363024964928 test_loss: 0.11947317123413086
epoch: 23 training_loss 0.12119977258145809 test_loss: 0.10857523679733276
epoch: 24 training_loss 0.13074993692338466 test_loss: 0.11827551126480103
epoch: 25 training_loss 0.11591082397848368 test_loss: 0.13471437692642213
epoch: 26 training_loss 0.12823420502245425 test_loss: 0.1202267050743103
epoch: 27 training_loss 0.12375921361148358 test_loss: 0.11230685710906982
epoch: 28 training_loss 0.11575021244585514 test_loss: 0.13670148849487304
epoch: 29 training_loss 0.1273022348806262 test_loss: 0.12850193977355956
epoch: 30 training_loss 0.12247834503650665 test_loss: 0.15236403942108154
epoch: 31 training_loss 0.12971442122012378 test_loss: 0.13499821424484254
epoch: 32 training_loss 0.11938381373882294 test_loss: 0.13636378049850464
epoch: 33 training_loss 0.11905701369047166 test_loss: 0.1361909866333008
epoch: 34 training_loss 0.12143228018656373 test_loss: 0.13104082345962526
epoch: 35 training_loss 0.12147710464894772 test_loss: 0.12635633945465088
epoch: 36 training_loss 0.12957083363085986 test_loss: 0.12158122062683105
epoch: 37 training_loss 0.13134746350347995 test_loss: 0.10903565883636475
epoch: 38 training_loss 0.1175612573325634 test_loss: 0.12484596967697144
epoch: 39 training_loss 0.11472140796482563 test_loss: 0.09881309270858765
epoch: 40 training_loss 0.12701556693762542 test_loss: 0.12043150663375854
epoch: 41 training_loss 0.12005347114056349 test_loss: 0.1228061556816101
epoch: 42 training_loss 0.12015991961583496 test_loss: 0.10730339288711548
epoch: 43 training_loss 0.11229429557919503 test_loss: 0.12700568437576293
epoch: 44 training_loss 0.12331777637824416 test_loss: 0.12406084537506104
epoch: 45 training_loss 0.11334465987980366 test_loss: 0.1157797932624817
epoch: 46 training_loss 0.12255536258220673 test_loss: 0.13272373676300048
epoch: 47 training_loss 0.12435144543647766 test_loss: 0.11279230117797852
epoch: 48 training_loss 0.12883854545652867 test_loss: 0.11190717220306397
epoch: 49 training_loss 0.11718865122646094 test_loss: 0.11612082719802856
epoch: 50 training_loss 0.12077953159809113 test_loss: 0.12415657043457032
epoch: 51 training_loss 0.11774826738983393 test_loss: 0.10722168684005737
epoch: 52 training_loss 0.11838377438485623 test_loss: 0.12677377462387085
epoch: 53 training_loss 0.1164039410278201 test_loss: 0.12773362398147584
epoch: 54 training_loss 0.12328003499656916 test_loss: 0.11768147945404053
epoch: 55 training_loss 0.12560876987874509 test_loss: 0.1344664454460144
epoch: 56 training_loss 0.11596295507624746 test_loss: 0.1114991545677185
epoch: 57 training_loss 0.114971903860569 test_loss: 0.11350146532058716
epoch: 58 training_loss 0.11792629227042198 test_loss: 0.12845678329467775
epoch: 59 training_loss 0.11393840063363314 test_loss: 0.11837002038955688
epoch: 60 training_loss 0.11181520562618971 test_loss: 0.12046234607696533
epoch: 61 training_loss 0.11958227446302772 test_loss: 0.13832051753997804
epoch: 62 training_loss 0.1301336757093668 test_loss: 0.1291816234588623
epoch: 63 training_loss 0.12096113722771407 test_loss: 0.13536897897720337
epoch: 64 training_loss 0.1232452591881156 test_loss: 0.12806074619293212
epoch: 65 training_loss 0.12027092427015304 test_loss: 0.11788812875747681
epoch: 66 training_loss 0.11792317416518927 test_loss: 0.11675348281860351
epoch: 67 training_loss 0.12199832614511251 test_loss: 0.1288055419921875
epoch: 68 training_loss 0.11284710461273789 test_loss: 0.14011441469192504
epoch: 69 training_loss 0.11170804856345057 test_loss: 0.09605497121810913
epoch: 70 training_loss 0.11701536253094673 test_loss: 0.14354606866836547
epoch: 71 training_loss 0.12668355021625757 test_loss: 0.1300366163253784
epoch: 72 training_loss 0.11916518427431583 test_loss: 0.12992888689041138
epoch: 73 training_loss 0.11348845284432173 test_loss: 0.14783217906951904
epoch: 74 training_loss 0.12652241667732597 test_loss: 0.1224490761756897
epoch: 75 training_loss 0.11402448486536741 test_loss: 0.10036886930465698
epoch: 76 training_loss 0.11450018793344498 test_loss: 0.11450744867324829
epoch: 77 training_loss 0.1251268684118986 test_loss: 0.11860311031341553
epoch: 78 training_loss 0.12129451517015695 test_loss: 0.11231509447097779
epoch: 79 training_loss 0.12115043807774782 test_loss: 0.12964032888412474
epoch: 80 training_loss 0.11795639421790838 test_loss: 0.12747368812561036
epoch: 81 training_loss 0.11922340104356408 test_loss: 0.10876394510269165
epoch: 82 training_loss 0.11767683506011963 test_loss: 0.12405081987380981
epoch: 83 training_loss 0.11774677019566297 test_loss: 0.14055154323577881
epoch: 84 training_loss 0.12392601491883397 test_loss: 0.12575864791870117
epoch: 85 training_loss 0.11662340166047215 test_loss: 0.116221022605896
epoch: 86 training_loss 0.12594983821734787 test_loss: 0.13593125343322754
epoch: 87 training_loss 0.12156120870262384 test_loss: 0.11114605665206909
epoch: 88 training_loss 0.12409675627946853 test_loss: 0.12222402095794678
epoch: 89 training_loss 0.11357786191627384 test_loss: 0.11917204856872558
epoch: 90 training_loss 0.11860693406313658 test_loss: 0.11672232151031495
epoch: 91 training_loss 0.1242329554259777 test_loss: 0.11204744577407837
epoch: 92 training_loss 0.1110922772064805 test_loss: 0.14302730560302734
epoch: 93 training_loss 0.12728597298264505 test_loss: 0.1310440182685852
epoch: 94 training_loss 0.1324481188133359 test_loss: 0.12369157075881958
epoch: 95 training_loss 0.12273122619837523 test_loss: 0.11250501871109009
epoch: 96 training_loss 0.11647881355136633 test_loss: 0.1162366271018982
epoch: 97 training_loss 0.12075018918141722 test_loss: 0.115935218334198
epoch: 98 training_loss 0.10793241649866105 test_loss: 0.11828629970550537
epoch: 99 training_loss 0.12284063903614878 test_loss: 0.10263622999191284
epoch: 100 training_loss 0.11825261883437634 test_loss: 0.11627188920974732
epoch: 101 training_loss 0.11205199999734759 test_loss: 0.12343343496322631
epoch: 102 training_loss 0.1245307994261384 test_loss: 0.10386855602264404
epoch: 103 training_loss 0.12346690192818642 test_loss: 0.11899424791336059
epoch: 104 training_loss 0.12093326199799775 test_loss: 0.12318828105926513
epoch: 105 training_loss 0.12005901969969272 test_loss: 0.1221656084060669
epoch: 106 training_loss 0.11046012327075004 test_loss: 0.11692299842834472
epoch: 107 training_loss 0.12637379769235849 test_loss: 0.1265670657157898
epoch: 108 training_loss 0.11853340692818165 test_loss: 0.10989891290664673
epoch: 109 training_loss 0.12156975891441107 test_loss: 0.1184917688369751
epoch: 110 training_loss 0.11607698161154985 test_loss: 0.1275464415550232
epoch: 111 training_loss 0.12280346401035785 test_loss: 0.12998210191726683
epoch: 112 training_loss 0.1259375639632344 test_loss: 0.12916625738143922
epoch: 113 training_loss 0.11834347043186426 test_loss: 0.11839641332626342
epoch: 114 training_loss 0.1135562588274479 test_loss: 0.11296817064285278
epoch: 115 training_loss 0.11584662277251483 test_loss: 0.12191658020019532
epoch: 116 training_loss 0.12004592798650265 test_loss: 0.11775277853012085
epoch: 117 training_loss 0.12026041012257338 test_loss: 0.11708724498748779
epoch: 118 training_loss 0.11846938990056514 test_loss: 0.10905554294586181
epoch: 119 training_loss 0.11991946507245302 test_loss: 0.11003220081329346
epoch: 120 training_loss 0.12250256327912211 test_loss: 0.13644503355026244
epoch: 121 training_loss 0.12448716159909963 test_loss: 0.13688216209411622
epoch: 122 training_loss 0.12855734367854893 test_loss: 0.11110379695892333
epoch: 123 training_loss 0.11901315368711948 test_loss: 0.13001763820648193
epoch: 124 training_loss 0.1159938695654273 test_loss: 0.12788584232330322
epoch: 125 training_loss 0.11287709843367338 test_loss: 0.10959991216659545
epoch: 126 training_loss 0.12106379926204681 test_loss: 0.10417329072952271
epoch: 127 training_loss 0.11997066039592028 test_loss: 0.12926462888717652
epoch: 128 training_loss 0.125888726208359 test_loss: 0.11031683683395385
epoch: 129 training_loss 0.12034211724996567 test_loss: 0.11572221517562867
epoch: 130 training_loss 0.12370819002389907 test_loss: 0.10393791198730469
epoch: 131 training_loss 0.11825574990361928 test_loss: 0.11635745763778686
epoch: 132 training_loss 0.11725242154672742 test_loss: 0.12889461517333983
epoch: 133 training_loss 0.1206740890815854 test_loss: 0.1256095290184021
epoch: 134 training_loss 0.11772430850192904 test_loss: 0.12125219106674194
epoch: 135 training_loss 0.11710122805088759 test_loss: 0.12838295698165894
epoch: 136 training_loss 0.12379082126542926 test_loss: 0.11816095113754273
epoch: 137 training_loss 0.11661265950649977 test_loss: 0.10084493160247802
epoch: 138 training_loss 0.11548622246831655 test_loss: 0.11457557678222656
epoch: 139 training_loss 0.11264453833922744 test_loss: 0.11327316761016845
epoch: 140 training_loss 0.11878013715147973 test_loss: 0.13907210826873778
epoch: 141 training_loss 0.1204652113467455 test_loss: 0.1083491563796997
epoch: 142 training_loss 0.1258931589499116 test_loss: 0.13449761867523194
epoch: 143 training_loss 0.12114620724692941 test_loss: 0.1195786714553833
epoch: 144 training_loss 0.12375091165304183 test_loss: 0.11074330806732177
epoch: 145 training_loss 0.12010958645492792 test_loss: 0.10895036458969116
epoch: 146 training_loss 0.12008095180615783 test_loss: 0.11655997037887574
epoch: 147 training_loss 0.11333556395024061 test_loss: 0.12387983798980713
epoch: 148 training_loss 0.1109636927768588 test_loss: 0.11686936616897584
epoch: 149 training_loss 0.11640884254127741 test_loss: 0.1084208369255066
epoch: 0 training_loss 32.029426651000975 test_loss: 11.653038787841798
epoch: 1 training_loss 8.444927439689636 test_loss: 6.9000701904296875
epoch: 2 training_loss 6.032109642028809 test_loss: 5.125722885131836
epoch: 3 training_loss 4.809341537952423 test_loss: 4.465165328979492
epoch: 4 training_loss 4.169378664493561 test_loss: 3.9268165588378907
epoch: 5 training_loss 3.71179009437561 test_loss: 3.4900127410888673
epoch: 6 training_loss 3.4422172784805296 test_loss: 3.1208499908447265
epoch: 7 training_loss 3.1595249438285826 test_loss: 3.21474609375
epoch: 8 training_loss 2.86514675617218 test_loss: 2.669476318359375
epoch: 9 training_loss 2.671201868057251 test_loss: 2.6057785034179686
epoch: 10 training_loss 2.599450032711029 test_loss: 2.38885440826416
epoch: 11 training_loss 2.403933413028717 test_loss: 2.245635223388672
epoch: 12 training_loss 2.3673973369598387 test_loss: 2.10143985748291
epoch: 13 training_loss 2.247606222629547 test_loss: 2.1062223434448244
epoch: 14 training_loss 2.210737340450287 test_loss: 2.0141536712646486
epoch: 15 training_loss 2.068851773738861 test_loss: 2.1209379196166993
epoch: 16 training_loss 2.073044848442078 test_loss: 1.9665781021118165
epoch: 17 training_loss 1.9768511521816254 test_loss: 1.855750846862793
epoch: 18 training_loss 1.9312381160259247 test_loss: 1.8417314529418944
epoch: 19 training_loss 1.8997009956836701 test_loss: 1.8962295532226563
epoch: 20 training_loss 1.8802025103569031 test_loss: 1.829071044921875
epoch: 21 training_loss 1.8230139648914336 test_loss: 1.6236064910888672
epoch: 22 training_loss 1.816379725933075 test_loss: 1.8420406341552735
epoch: 23 training_loss 1.7929337060451507 test_loss: 1.755343246459961
epoch: 24 training_loss 1.734127275943756 test_loss: 1.683768081665039
epoch: 25 training_loss 1.6991805362701415 test_loss: 1.695810317993164
epoch: 26 training_loss 1.6887801802158355 test_loss: 1.5717698097229005
epoch: 27 training_loss 1.716702426671982 test_loss: 1.5291465759277343
epoch: 28 training_loss 1.6714971435070038 test_loss: 1.6408111572265625
epoch: 29 training_loss 1.5813696312904357 test_loss: 1.6379671096801758
epoch: 30 training_loss 1.6436073577404022 test_loss: 1.626556968688965
epoch: 31 training_loss 1.6094394731521606 test_loss: 1.5198163986206055
epoch: 32 training_loss 1.507745076417923 test_loss: 1.5161964416503906
epoch: 33 training_loss 1.5378854298591613 test_loss: 1.527247428894043
epoch: 34 training_loss 1.5122997665405273 test_loss: 1.4577153205871582
epoch: 35 training_loss 1.5104556369781494 test_loss: 1.4651983261108399
epoch: 36 training_loss 1.5231067740917206 test_loss: 1.6084735870361329
epoch: 37 training_loss 1.449104549884796 test_loss: 1.5145949363708495
epoch: 38 training_loss 1.4898047995567323 test_loss: 1.4266571998596191
epoch: 39 training_loss 1.5263070797920226 test_loss: 1.4346982002258302
epoch: 40 training_loss 1.4989894604682923 test_loss: 1.4557185173034668
epoch: 41 training_loss 1.4701201617717743 test_loss: 1.4306516647338867
epoch: 42 training_loss 1.4200533533096313 test_loss: 1.4063611030578613
epoch: 43 training_loss 1.4171297788619994 test_loss: 1.490667724609375
epoch: 44 training_loss 1.4550015103816987 test_loss: 1.4168631553649902
epoch: 45 training_loss 1.4279051518440247 test_loss: 1.4316946029663087
epoch: 46 training_loss 1.4351647782325745 test_loss: 1.3745532035827637
epoch: 47 training_loss 1.4131898534297944 test_loss: 1.3760218620300293
epoch: 48 training_loss 1.3859192156791686 test_loss: 1.3635913848876953
epoch: 49 training_loss 1.366945697069168 test_loss: 1.4549762725830078
epoch: 50 training_loss 1.3599034416675568 test_loss: 1.312645721435547
epoch: 51 training_loss 1.3941456151008607 test_loss: 1.3532938957214355
epoch: 52 training_loss 1.315324695110321 test_loss: 1.3114700317382812
epoch: 53 training_loss 1.3288723409175873 test_loss: 1.2991233825683595
epoch: 54 training_loss 1.3702538228034973 test_loss: 1.339806079864502
epoch: 55 training_loss 1.3211382400989533 test_loss: 1.3733463287353516
epoch: 56 training_loss 1.2968520832061767 test_loss: 1.360155200958252
epoch: 57 training_loss 1.3568242704868316 test_loss: 1.266786766052246
epoch: 58 training_loss 1.3369594371318818 test_loss: 1.2012468338012696
epoch: 59 training_loss 1.3360872888565063 test_loss: 1.2494611740112305
epoch: 60 training_loss 1.3190051579475404 test_loss: 1.367611789703369
epoch: 61 training_loss 1.3006474256515503 test_loss: 1.2915221214294434
epoch: 62 training_loss 1.3019682908058166 test_loss: 1.3718052864074708
epoch: 63 training_loss 1.2686574566364288 test_loss: 1.260764217376709
epoch: 64 training_loss 1.3243606173992157 test_loss: 1.3234813690185547
epoch: 65 training_loss 1.269832946062088 test_loss: 1.2134130477905274
epoch: 66 training_loss 1.2582210659980775 test_loss: 1.2539813041687011
epoch: 67 training_loss 1.2555273044109345 test_loss: 1.252281379699707
epoch: 68 training_loss 1.21662000477314 test_loss: 1.1735954284667969
epoch: 69 training_loss 1.2221566051244737 test_loss: 1.2837608337402344
epoch: 70 training_loss 1.2198381507396698 test_loss: 1.267188835144043
epoch: 71 training_loss 1.234825308918953 test_loss: 1.2284164428710938
epoch: 72 training_loss 1.2231733345985412 test_loss: 1.267326259613037
epoch: 73 training_loss 1.2362975299358367 test_loss: 1.2162930488586425
epoch: 74 training_loss 1.224499099254608 test_loss: 1.2096220016479493
epoch: 75 training_loss 1.1789800035953522 test_loss: 1.1987833976745605
epoch: 76 training_loss 1.1939111560583116 test_loss: 1.1443916320800782
epoch: 77 training_loss 1.2366226512193679 test_loss: 1.2159016609191895
epoch: 78 training_loss 1.2248318690061568 test_loss: 1.2195425987243653
epoch: 79 training_loss 1.2354785597324371 test_loss: 1.192210578918457
epoch: 80 training_loss 1.179838851094246 test_loss: 1.1882099151611327
epoch: 81 training_loss 1.1823622101545335 test_loss: 1.2629378318786622
epoch: 82 training_loss 1.1730876404047013 test_loss: 1.1215800285339355
epoch: 83 training_loss 1.2248452466726303 test_loss: 1.2079978942871095
epoch: 84 training_loss 1.1918302685022355 test_loss: 1.1341497421264648
epoch: 85 training_loss 1.1988729161024094 test_loss: 1.1051980972290039
epoch: 86 training_loss 1.163706532716751 test_loss: 1.2273202896118165
epoch: 87 training_loss 1.157501484155655 test_loss: 1.1303213119506836
epoch: 88 training_loss 1.155123643875122 test_loss: 1.1401192665100097
epoch: 89 training_loss 1.1724497479200364 test_loss: 1.2086244583129884
epoch: 90 training_loss 1.1539108788967132 test_loss: 1.0949681282043457
epoch: 91 training_loss 1.1519814270734787 test_loss: 1.1338144302368165
epoch: 92 training_loss 1.1465053284168243 test_loss: 1.0917389869689942
epoch: 93 training_loss 1.157858275771141 test_loss: 1.1368306159973145
epoch: 94 training_loss 1.1515035557746887 test_loss: 1.105956745147705
epoch: 95 training_loss 1.1652213883399964 test_loss: 1.1106008529663085
epoch: 96 training_loss 1.1305833441019058 test_loss: 1.0997504234313964
epoch: 97 training_loss 1.1375874650478364 test_loss: 1.1958861351013184
epoch: 98 training_loss 1.1550354415178299 test_loss: 1.0969823837280273
epoch: 99 training_loss 1.0801252382993698 test_loss: 1.15704288482666
epoch: 100 training_loss 1.1018520599603654 test_loss: 1.1246692657470703
epoch: 101 training_loss 1.1217702931165696 test_loss: 1.1111224174499512
epoch: 102 training_loss 1.1054051858186722 test_loss: 1.0570843696594239
epoch: 103 training_loss 1.0900973337888717 test_loss: 1.1158068656921387
epoch: 104 training_loss 1.1153744661808014 test_loss: 1.0706406593322755
epoch: 105 training_loss 1.0925761634111404 test_loss: 1.0562138557434082
epoch: 106 training_loss 1.1034279680252075 test_loss: 1.1141221046447753
epoch: 107 training_loss 1.1021138632297516 test_loss: 1.2001824378967285
epoch: 108 training_loss 1.1244811189174653 test_loss: 1.1175433158874513
epoch: 109 training_loss 1.086454238295555 test_loss: 1.0712850570678711
epoch: 110 training_loss 1.105072278380394 test_loss: 1.124058151245117
epoch: 111 training_loss 1.061595419049263 test_loss: 1.127468967437744
epoch: 112 training_loss 1.0720247685909272 test_loss: 1.0564980506896973
epoch: 113 training_loss 1.0979165953397751 test_loss: 1.0760674476623535
epoch: 114 training_loss 1.1210172605514526 test_loss: 1.0599875450134277
epoch: 115 training_loss 1.0802211183309556 test_loss: 1.138925552368164
epoch: 116 training_loss 1.0860991567373275 test_loss: 1.0774880409240724
epoch: 117 training_loss 1.0587360978126525 test_loss: 1.0379745483398437
epoch: 118 training_loss 1.073489726781845 test_loss: 1.0936169624328613
epoch: 119 training_loss 1.10133069396019 test_loss: 1.0709223747253418
epoch: 120 training_loss 1.0893574088811875 test_loss: 1.030777359008789
epoch: 121 training_loss 1.0978914785385132 test_loss: 1.0590105056762695
epoch: 122 training_loss 1.0616483122110367 test_loss: 1.1038450241088866
epoch: 123 training_loss 1.0772818201780319 test_loss: 1.0893987655639648
epoch: 124 training_loss 1.0659804266691209 test_loss: 1.1372564315795899
epoch: 125 training_loss 1.056665290594101 test_loss: 1.1315598487854004
epoch: 126 training_loss 1.0523823553323746 test_loss: 1.0459848403930665
epoch: 127 training_loss 1.0600646805763245 test_loss: 1.0220382690429688
epoch: 128 training_loss 1.0672317409515382 test_loss: 1.0902194023132323
epoch: 129 training_loss 1.0665789544582367 test_loss: 1.1034542083740235
epoch: 130 training_loss 1.070399710536003 test_loss: 0.9966997146606446
epoch: 131 training_loss 1.0460492205619811 test_loss: 1.0839089393615722
epoch: 132 training_loss 1.0614247488975526 test_loss: 1.060825252532959
epoch: 133 training_loss 1.0316773164272308 test_loss: 1.0744384765625
epoch: 134 training_loss 1.0448454910516738 test_loss: 1.0679109573364258
epoch: 135 training_loss 1.0339632195234298 test_loss: 1.0393858909606934
epoch: 136 training_loss 1.0510751521587371 test_loss: 1.0939666748046875
epoch: 137 training_loss 1.0751220345497132 test_loss: 1.0797224044799805
epoch: 138 training_loss 1.0529201191663742 test_loss: 1.0479011535644531
epoch: 139 training_loss 1.0513197296857835 test_loss: 1.0863909721374512
epoch: 140 training_loss 1.0226924139261246 test_loss: 1.0123353004455566
epoch: 141 training_loss 1.032959675192833 test_loss: 1.0503416061401367
epoch: 142 training_loss 1.024143369793892 test_loss: 1.0903289794921875
epoch: 143 training_loss 1.0260249763727187 test_loss: 1.0711145401000977
epoch: 144 training_loss 1.0755665522813798 test_loss: 1.082632064819336
epoch: 145 training_loss 1.0434497714042663 test_loss: 1.0746569633483887
epoch: 146 training_loss 0.9971597248315811 test_loss: 1.0201351165771484
epoch: 147 training_loss 1.0454060727357863 test_loss: 1.0392895698547364
epoch: 148 training_loss 1.0479081964492798 test_loss: 1.1701021194458008
epoch: 149 training_loss 1.0675700253248215 test_loss: 1.0139748573303222
4007.3397215517048
episode: 0 training return: tensor(113.6204, device='cuda:0')
episode: 1 training return: tensor(118.5158, device='cuda:0')
episode: 2 training return: tensor(144.6382, device='cuda:0')
episode: 3 training return: tensor(147.7890, device='cuda:0')
epoch: 1 test_true_pfm: 3982.1101497383784 sim_pfm: 131.2577179868628
episode: 4 training return: tensor(129.4592, device='cuda:0')
episode: 5 training return: tensor(39.8820, device='cuda:0')
episode: 6 training return: tensor(18.9633, device='cuda:0')
episode: 7 training return: tensor(51.5926, device='cuda:0')
epoch: 2 test_true_pfm: 4020.3293143164824 sim_pfm: 78.1051623945629
episode: 8 training return: tensor(-878.1940, device='cuda:0')
episode: 9 training return: tensor(-947.7162, device='cuda:0')
episode: 10 training return: tensor(148.9991, device='cuda:0')
episode: 11 training return: tensor(-927.1289, device='cuda:0')
epoch: 3 test_true_pfm: 4007.572045278562 sim_pfm: 125.13225484571497
episode: 12 training return: tensor(87.5196, device='cuda:0')
episode: 13 training return: tensor(101.6392, device='cuda:0')
episode: 14 training return: tensor(146.5438, device='cuda:0')
episode: 15 training return: tensor(132.0175, device='cuda:0')
epoch: 4 test_true_pfm: 4045.3956709824706 sim_pfm: 149.1777940482134
episode: 16 training return: tensor(139.2037, device='cuda:0')
episode: 17 training return: tensor(167.6313, device='cuda:0')
episode: 18 training return: tensor(115.3312, device='cuda:0')
episode: 19 training return: tensor(145.0013, device='cuda:0')
epoch: 5 test_true_pfm: 4044.074442155868 sim_pfm: 147.54037172768344
episode: 20 training return: tensor(144.1566, device='cuda:0')
episode: 21 training return: tensor(152.2083, device='cuda:0')
episode: 22 training return: tensor(138.8616, device='cuda:0')
episode: 23 training return: tensor(107.6634, device='cuda:0')
epoch: 6 test_true_pfm: 4063.405963387096 sim_pfm: 127.45348675661565
episode: 24 training return: tensor(30.6450, device='cuda:0')
episode: 25 training return: tensor(143.9127, device='cuda:0')
episode: 26 training return: tensor(159.7153, device='cuda:0')
episode: 27 training return: tensor(68.4674, device='cuda:0')
epoch: 7 test_true_pfm: 4028.049191546401 sim_pfm: 127.2011371482319
episode: 28 training return: tensor(140.0914, device='cuda:0')
episode: 29 training return: tensor(58.3451, device='cuda:0')
episode: 30 training return: tensor(152.2624, device='cuda:0')
episode: 31 training return: tensor(153.6307, device='cuda:0')
epoch: 8 test_true_pfm: 4075.11808900162 sim_pfm: 127.23244283306606
episode: 32 training return: tensor(-906.1018, device='cuda:0')
episode: 33 training return: tensor(55.6043, device='cuda:0')
episode: 34 training return: tensor(124.8604, device='cuda:0')
episode: 35 training return: tensor(158.0286, device='cuda:0')
epoch: 9 test_true_pfm: 4042.091081894682 sim_pfm: 148.95682571943811
episode: 36 training return: tensor(-935.5544, device='cuda:0')
episode: 37 training return: tensor(142.2927, device='cuda:0')
episode: 38 training return: tensor(80.5240, device='cuda:0')
episode: 39 training return: tensor(93.8954, device='cuda:0')
epoch: 10 test_true_pfm: 4048.025029825854 sim_pfm: 145.7449380926361
episode: 40 training return: tensor(-888.6365, device='cuda:0')
episode: 41 training return: tensor(176.3934, device='cuda:0')
episode: 42 training return: tensor(86.5165, device='cuda:0')
episode: 43 training return: tensor(147.7496, device='cuda:0')
epoch: 11 test_true_pfm: 4051.033498049023 sim_pfm: 128.42918779492416
episode: 44 training return: tensor(-950.4175, device='cuda:0')
episode: 45 training return: tensor(113.7874, device='cuda:0')
episode: 46 training return: tensor(144.2875, device='cuda:0')
episode: 47 training return: tensor(96.5764, device='cuda:0')
epoch: 12 test_true_pfm: 4068.810615879138 sim_pfm: 120.63203128180855
episode: 48 training return: tensor(130.0950, device='cuda:0')
episode: 49 training return: tensor(117.5793, device='cuda:0')
episode: 50 training return: tensor(86.5002, device='cuda:0')
episode: 51 training return: tensor(117.6289, device='cuda:0')
epoch: 13 test_true_pfm: 4032.233791558436 sim_pfm: 112.12874227238353
episode: 52 training return: tensor(63.7350, device='cuda:0')
episode: 53 training return: tensor(89.3429, device='cuda:0')
episode: 54 training return: tensor(-948.6484, device='cuda:0')
episode: 55 training return: tensor(91.2465, device='cuda:0')
epoch: 14 test_true_pfm: 3966.047272812731 sim_pfm: 62.762064300341684
episode: 56 training return: tensor(107.3704, device='cuda:0')
episode: 57 training return: tensor(36.5025, device='cuda:0')
episode: 58 training return: tensor(146.4343, device='cuda:0')
episode: 59 training return: tensor(121.1064, device='cuda:0')
epoch: 15 test_true_pfm: 4007.2987571705476 sim_pfm: 152.9107118145621
episode: 60 training return: tensor(147.6460, device='cuda:0')
episode: 61 training return: tensor(167.8429, device='cuda:0')
episode: 62 training return: tensor(114.9604, device='cuda:0')
episode: 63 training return: tensor(95.1673, device='cuda:0')
epoch: 16 test_true_pfm: 3994.9672190015117 sim_pfm: 135.71164543624036
episode: 64 training return: tensor(146.8022, device='cuda:0')
episode: 65 training return: tensor(74.0148, device='cuda:0')
episode: 66 training return: tensor(59.6493, device='cuda:0')
episode: 67 training return: tensor(142.2625, device='cuda:0')
epoch: 17 test_true_pfm: 4066.480441627685 sim_pfm: 141.6446900975619
episode: 68 training return: tensor(106.4446, device='cuda:0')
episode: 69 training return: tensor(61.0458, device='cuda:0')
episode: 70 training return: tensor(128.1926, device='cuda:0')
episode: 71 training return: tensor(43.0163, device='cuda:0')
epoch: 18 test_true_pfm: 4021.9134455069307 sim_pfm: 141.06543199913963
episode: 72 training return: tensor(151.2844, device='cuda:0')
episode: 73 training return: tensor(58.0069, device='cuda:0')
episode: 74 training return: tensor(147.0735, device='cuda:0')
episode: 75 training return: tensor(105.7287, device='cuda:0')
epoch: 19 test_true_pfm: 4024.5801269986987 sim_pfm: 131.2888281828103
episode: 76 training return: tensor(105.5843, device='cuda:0')
episode: 77 training return: tensor(130.7554, device='cuda:0')
episode: 78 training return: tensor(140.8885, device='cuda:0')
episode: 79 training return: tensor(97.7436, device='cuda:0')
epoch: 20 test_true_pfm: 4057.654875914846 sim_pfm: 110.11393789637562
episode: 80 training return: tensor(128.5146, device='cuda:0')
episode: 81 training return: tensor(-933.5516, device='cuda:0')
episode: 82 training return: tensor(144.0928, device='cuda:0')
episode: 83 training return: tensor(143.5811, device='cuda:0')
epoch: 21 test_true_pfm: 4055.755956108109 sim_pfm: 153.2872999612397
episode: 84 training return: tensor(138.4972, device='cuda:0')
episode: 85 training return: tensor(62.8628, device='cuda:0')
episode: 86 training return: tensor(124.4832, device='cuda:0')
episode: 87 training return: tensor(148.9374, device='cuda:0')
epoch: 22 test_true_pfm: 4030.2602208934127 sim_pfm: 93.62329449481331
episode: 88 training return: tensor(161.8435, device='cuda:0')
episode: 89 training return: tensor(61.2432, device='cuda:0')
episode: 90 training return: tensor(89.3657, device='cuda:0')
episode: 91 training return: tensor(105.4854, device='cuda:0')
epoch: 23 test_true_pfm: 3983.2571592530608 sim_pfm: 139.06458070331914
episode: 92 training return: tensor(149.4166, device='cuda:0')
episode: 93 training return: tensor(175.2019, device='cuda:0')
episode: 94 training return: tensor(132.2995, device='cuda:0')
episode: 95 training return: tensor(118.4452, device='cuda:0')
epoch: 24 test_true_pfm: 4050.617658661637 sim_pfm: 136.40348525857553
episode: 96 training return: tensor(129.9410, device='cuda:0')
episode: 97 training return: tensor(87.3927, device='cuda:0')
episode: 98 training return: tensor(135.8715, device='cuda:0')
episode: 99 training return: tensor(98.8784, device='cuda:0')
epoch: 25 test_true_pfm: 3969.177731651828 sim_pfm: 120.97293535476395
episode: 100 training return: tensor(-903.1237, device='cuda:0')
episode: 101 training return: tensor(10.4249, device='cuda:0')
episode: 102 training return: tensor(100.0695, device='cuda:0')
episode: 103 training return: tensor(150.5020, device='cuda:0')
epoch: 26 test_true_pfm: 4049.575099179303 sim_pfm: 139.83804782942752
episode: 104 training return: tensor(124.0668, device='cuda:0')
episode: 105 training return: tensor(-941.0328, device='cuda:0')
episode: 106 training return: tensor(136.9383, device='cuda:0')
episode: 107 training return: tensor(148.5271, device='cuda:0')
epoch: 27 test_true_pfm: 4025.233165598554 sim_pfm: 135.6299858262452
episode: 108 training return: tensor(111.1487, device='cuda:0')
episode: 109 training return: tensor(116.1200, device='cuda:0')
episode: 110 training return: tensor(145.1563, device='cuda:0')
episode: 111 training return: tensor(121.6076, device='cuda:0')
epoch: 28 test_true_pfm: 4050.077335542385 sim_pfm: 159.58812980132643
episode: 112 training return: tensor(-872.6523, device='cuda:0')
episode: 113 training return: tensor(115.9719, device='cuda:0')
episode: 114 training return: tensor(103.2612, device='cuda:0')
episode: 115 training return: tensor(143.8081, device='cuda:0')
epoch: 29 test_true_pfm: 3998.5242166598105 sim_pfm: -178.69604670725917
episode: 116 training return: tensor(102.2972, device='cuda:0')
episode: 117 training return: tensor(166.6559, device='cuda:0')
episode: 118 training return: tensor(-927.8178, device='cuda:0')
episode: 119 training return: tensor(154.9019, device='cuda:0')
epoch: 30 test_true_pfm: 4034.0003987920063 sim_pfm: 135.91779397657956
episode: 120 training return: tensor(170.9358, device='cuda:0')
episode: 121 training return: tensor(95.2926, device='cuda:0')
episode: 122 training return: tensor(152.0880, device='cuda:0')
episode: 123 training return: tensor(99.7211, device='cuda:0')
epoch: 31 test_true_pfm: 4054.8096476899623 sim_pfm: 139.56277067641108
episode: 124 training return: tensor(139.6383, device='cuda:0')
episode: 125 training return: tensor(155.4058, device='cuda:0')
episode: 126 training return: tensor(165.1919, device='cuda:0')
episode: 127 training return: tensor(131.8945, device='cuda:0')
epoch: 32 test_true_pfm: 4064.5633390500766 sim_pfm: 109.69526294754662
episode: 128 training return: tensor(140.9671, device='cuda:0')
episode: 129 training return: tensor(152.8158, device='cuda:0')
episode: 130 training return: tensor(89.3652, device='cuda:0')
episode: 131 training return: tensor(128.4999, device='cuda:0')
epoch: 33 test_true_pfm: 4035.4930335311033 sim_pfm: 90.33482369282865
episode: 132 training return: tensor(130.6967, device='cuda:0')
episode: 133 training return: tensor(150.3109, device='cuda:0')
episode: 134 training return: tensor(101.9844, device='cuda:0')
episode: 135 training return: tensor(154.8380, device='cuda:0')
epoch: 34 test_true_pfm: 4031.7157262113246 sim_pfm: 71.1047225530104
episode: 136 training return: tensor(129.1508, device='cuda:0')
episode: 137 training return: tensor(149.6703, device='cuda:0')
episode: 138 training return: tensor(110.2543, device='cuda:0')
episode: 139 training return: tensor(121.3158, device='cuda:0')
epoch: 35 test_true_pfm: 4046.353004147772 sim_pfm: 139.64206881983168
episode: 140 training return: tensor(130.8331, device='cuda:0')
episode: 141 training return: tensor(114.5079, device='cuda:0')
episode: 142 training return: tensor(88.9364, device='cuda:0')
episode: 143 training return: tensor(-798.8433, device='cuda:0')
epoch: 36 test_true_pfm: 4018.9304145390392 sim_pfm: 145.77984479428656
episode: 144 training return: tensor(143.6071, device='cuda:0')
episode: 145 training return: tensor(162.2611, device='cuda:0')
episode: 146 training return: tensor(53.8374, device='cuda:0')
episode: 147 training return: tensor(170.8440, device='cuda:0')
epoch: 37 test_true_pfm: 4059.4440964907517 sim_pfm: 105.09167637633315
episode: 148 training return: tensor(111.1810, device='cuda:0')
episode: 149 training return: tensor(105.1035, device='cuda:0')
episode: 150 training return: tensor(132.2024, device='cuda:0')
episode: 151 training return: tensor(163.4524, device='cuda:0')
epoch: 38 test_true_pfm: 4023.783575977291 sim_pfm: 138.72403138276422
episode: 152 training return: tensor(168.3083, device='cuda:0')
episode: 153 training return: tensor(81.7231, device='cuda:0')
episode: 154 training return: tensor(141.2386, device='cuda:0')
episode: 155 training return: tensor(122.6186, device='cuda:0')
epoch: 39 test_true_pfm: 4033.8563299262364 sim_pfm: 136.09306337628126
episode: 156 training return: tensor(79.1055, device='cuda:0')
episode: 157 training return: tensor(158.5288, device='cuda:0')
episode: 158 training return: tensor(108.1775, device='cuda:0')
episode: 159 training return: tensor(94.5800, device='cuda:0')
epoch: 40 test_true_pfm: 4022.9768294707847 sim_pfm: 125.94683949399041
episode: 160 training return: tensor(179.1349, device='cuda:0')
episode: 161 training return: tensor(139.2733, device='cuda:0')
episode: 162 training return: tensor(65.3484, device='cuda:0')
episode: 163 training return: tensor(108.8335, device='cuda:0')
epoch: 41 test_true_pfm: 4025.4447844637066 sim_pfm: 140.3444080593375
episode: 164 training return: tensor(81.5033, device='cuda:0')
episode: 165 training return: tensor(54.0882, device='cuda:0')
episode: 166 training return: tensor(149.5160, device='cuda:0')
episode: 167 training return: tensor(143.2449, device='cuda:0')
epoch: 42 test_true_pfm: 4027.157577558566 sim_pfm: 116.12147082623171
episode: 168 training return: tensor(117.7289, device='cuda:0')
episode: 169 training return: tensor(119.5201, device='cuda:0')
episode: 170 training return: tensor(155.3376, device='cuda:0')
episode: 171 training return: tensor(73.8145, device='cuda:0')
epoch: 43 test_true_pfm: 4040.7963335908207 sim_pfm: 144.30776560906088
episode: 172 training return: tensor(74.2279, device='cuda:0')
episode: 173 training return: tensor(37.2802, device='cuda:0')
episode: 174 training return: tensor(137.9558, device='cuda:0')
episode: 175 training return: tensor(138.2950, device='cuda:0')
epoch: 44 test_true_pfm: 4059.1582529542375 sim_pfm: 142.42899899656186
episode: 176 training return: tensor(96.1511, device='cuda:0')
episode: 177 training return: tensor(123.2510, device='cuda:0')
episode: 178 training return: tensor(162.1408, device='cuda:0')
episode: 179 training return: tensor(59.7123, device='cuda:0')
epoch: 45 test_true_pfm: 4026.700096074636 sim_pfm: 122.70905964194874
episode: 180 training return: tensor(76.9676, device='cuda:0')
episode: 181 training return: tensor(189.2321, device='cuda:0')
episode: 182 training return: tensor(139.4025, device='cuda:0')
episode: 183 training return: tensor(139.6359, device='cuda:0')
epoch: 46 test_true_pfm: 4055.6026566044184 sim_pfm: 158.6366446901326
episode: 184 training return: tensor(116.9550, device='cuda:0')
episode: 185 training return: tensor(128.9544, device='cuda:0')
episode: 186 training return: tensor(109.1013, device='cuda:0')
episode: 187 training return: tensor(151.3267, device='cuda:0')
epoch: 47 test_true_pfm: 4020.311406334142 sim_pfm: 118.13961967508658
episode: 188 training return: tensor(135.4365, device='cuda:0')
episode: 189 training return: tensor(142.6225, device='cuda:0')
episode: 190 training return: tensor(143.6127, device='cuda:0')
episode: 191 training return: tensor(147.5336, device='cuda:0')
epoch: 48 test_true_pfm: 4068.0653061294247 sim_pfm: 103.80455834352567
episode: 192 training return: tensor(101.6785, device='cuda:0')
episode: 193 training return: tensor(-5.4697, device='cuda:0')
episode: 194 training return: tensor(111.3065, device='cuda:0')
episode: 195 training return: tensor(94.4753, device='cuda:0')
epoch: 49 test_true_pfm: 4042.4128911748826 sim_pfm: 131.19657667382853
episode: 196 training return: tensor(83.1516, device='cuda:0')
episode: 197 training return: tensor(-747.5869, device='cuda:0')
episode: 198 training return: tensor(87.1470, device='cuda:0')
episode: 199 training return: tensor(167.4375, device='cuda:0')
epoch: 50 test_true_pfm: 4034.7071472974626 sim_pfm: 97.30586004606448
episode: 200 training return: tensor(112.2343, device='cuda:0')
episode: 201 training return: tensor(112.9813, device='cuda:0')
episode: 202 training return: tensor(59.7196, device='cuda:0')
episode: 203 training return: tensor(148.7493, device='cuda:0')
epoch: 51 test_true_pfm: 4017.6400704583225 sim_pfm: 106.7295658486546
episode: 204 training return: tensor(127.9102, device='cuda:0')
episode: 205 training return: tensor(129.5770, device='cuda:0')
episode: 206 training return: tensor(120.7499, device='cuda:0')
episode: 207 training return: tensor(134.3260, device='cuda:0')
epoch: 52 test_true_pfm: 4014.4730716999693 sim_pfm: 127.5967405643217
episode: 208 training return: tensor(111.8514, device='cuda:0')
episode: 209 training return: tensor(127.2887, device='cuda:0')
episode: 210 training return: tensor(142.7097, device='cuda:0')
episode: 211 training return: tensor(56.5069, device='cuda:0')
epoch: 53 test_true_pfm: 4016.2176084011626 sim_pfm: 125.59542611421784
episode: 212 training return: tensor(123.1540, device='cuda:0')
episode: 213 training return: tensor(141.7186, device='cuda:0')
episode: 214 training return: tensor(88.9758, device='cuda:0')
episode: 215 training return: tensor(86.6576, device='cuda:0')
epoch: 54 test_true_pfm: 4065.4563163025255 sim_pfm: 122.71108929935144
episode: 216 training return: tensor(105.1117, device='cuda:0')
episode: 217 training return: tensor(133.1763, device='cuda:0')
episode: 218 training return: tensor(135.7185, device='cuda:0')
episode: 219 training return: tensor(122.0916, device='cuda:0')
epoch: 55 test_true_pfm: 4067.0741417648856 sim_pfm: 159.22600286322026
episode: 220 training return: tensor(141.1481, device='cuda:0')
episode: 221 training return: tensor(64.9101, device='cuda:0')
episode: 222 training return: tensor(92.5851, device='cuda:0')
episode: 223 training return: tensor(184.8233, device='cuda:0')
epoch: 56 test_true_pfm: 4048.2759600622035 sim_pfm: 156.43763919703392
episode: 224 training return: tensor(146.4613, device='cuda:0')
episode: 225 training return: tensor(109.9663, device='cuda:0')
episode: 226 training return: tensor(169.0024, device='cuda:0')
episode: 227 training return: tensor(67.8215, device='cuda:0')
epoch: 57 test_true_pfm: 4041.2393867213486 sim_pfm: -138.00945677169753
episode: 228 training return: tensor(79.2793, device='cuda:0')
episode: 229 training return: tensor(96.3940, device='cuda:0')
episode: 230 training return: tensor(137.2255, device='cuda:0')
episode: 231 training return: tensor(92.6755, device='cuda:0')
epoch: 58 test_true_pfm: 4062.222793039786 sim_pfm: 162.31632685714672
episode: 232 training return: tensor(153.8809, device='cuda:0')
episode: 233 training return: tensor(104.1414, device='cuda:0')
episode: 234 training return: tensor(145.5772, device='cuda:0')
episode: 235 training return: tensor(141.4324, device='cuda:0')
epoch: 59 test_true_pfm: 4048.4827786704677 sim_pfm: 148.02818250332106
episode: 236 training return: tensor(122.7356, device='cuda:0')
episode: 237 training return: tensor(136.8571, device='cuda:0')
episode: 238 training return: tensor(131.4083, device='cuda:0')
episode: 239 training return: tensor(108.8498, device='cuda:0')
epoch: 60 test_true_pfm: 4041.0236053921967 sim_pfm: 139.60299545222855
episode: 240 training return: tensor(155.4183, device='cuda:0')
episode: 241 training return: tensor(101.9778, device='cuda:0')
episode: 242 training return: tensor(160.6979, device='cuda:0')
episode: 243 training return: tensor(101.9037, device='cuda:0')
epoch: 61 test_true_pfm: 4024.9955120295363 sim_pfm: -134.84039139122856
episode: 244 training return: tensor(103.9228, device='cuda:0')
episode: 245 training return: tensor(105.3822, device='cuda:0')
episode: 246 training return: tensor(132.3619, device='cuda:0')
episode: 247 training return: tensor(104.3955, device='cuda:0')
epoch: 62 test_true_pfm: 4022.826523395171 sim_pfm: 120.85289252805524
episode: 248 training return: tensor(161.8243, device='cuda:0')
episode: 249 training return: tensor(155.4166, device='cuda:0')
episode: 250 training return: tensor(119.6224, device='cuda:0')
episode: 251 training return: tensor(106.6874, device='cuda:0')
epoch: 63 test_true_pfm: 4058.920127672154 sim_pfm: 154.2286656400344
episode: 252 training return: tensor(150.8073, device='cuda:0')
episode: 253 training return: tensor(154.7030, device='cuda:0')
episode: 254 training return: tensor(144.7502, device='cuda:0')
episode: 255 training return: tensor(146.4734, device='cuda:0')
epoch: 64 test_true_pfm: 4063.30440058838 sim_pfm: 146.159569209946
episode: 256 training return: tensor(169.9318, device='cuda:0')
episode: 257 training return: tensor(88.7486, device='cuda:0')
episode: 258 training return: tensor(186.6264, device='cuda:0')
episode: 259 training return: tensor(144.8079, device='cuda:0')
epoch: 65 test_true_pfm: 4073.521881421886 sim_pfm: 111.49632973541156
episode: 260 training return: tensor(125.5221, device='cuda:0')
episode: 261 training return: tensor(210.5661, device='cuda:0')
episode: 262 training return: tensor(129.9734, device='cuda:0')
episode: 263 training return: tensor(-953.5623, device='cuda:0')
epoch: 66 test_true_pfm: 4057.067783468657 sim_pfm: 166.12016160806525
episode: 264 training return: tensor(-776.6760, device='cuda:0')
episode: 265 training return: tensor(139.6805, device='cuda:0')
episode: 266 training return: tensor(140.6023, device='cuda:0')
episode: 267 training return: tensor(113.1452, device='cuda:0')
epoch: 67 test_true_pfm: 4064.2366428799505 sim_pfm: 171.62931059314482
episode: 268 training return: tensor(90.8332, device='cuda:0')
episode: 269 training return: tensor(72.9674, device='cuda:0')
episode: 270 training return: tensor(137.9080, device='cuda:0')
episode: 271 training return: tensor(100.2642, device='cuda:0')
epoch: 68 test_true_pfm: 4081.626516689195 sim_pfm: 128.49780931815621
episode: 272 training return: tensor(155.7503, device='cuda:0')
episode: 273 training return: tensor(147.6293, device='cuda:0')
episode: 274 training return: tensor(92.9038, device='cuda:0')
episode: 275 training return: tensor(170.5226, device='cuda:0')
epoch: 69 test_true_pfm: 4042.700092712445 sim_pfm: 169.82597401478174
episode: 276 training return: tensor(88.9513, device='cuda:0')
episode: 277 training return: tensor(137.9824, device='cuda:0')
episode: 278 training return: tensor(85.5927, device='cuda:0')
episode: 279 training return: tensor(151.5343, device='cuda:0')
epoch: 70 test_true_pfm: 4035.337080162908 sim_pfm: 166.5573906142963
episode: 280 training return: tensor(100.7346, device='cuda:0')
episode: 281 training return: tensor(136.6779, device='cuda:0')
episode: 282 training return: tensor(111.4888, device='cuda:0')
episode: 283 training return: tensor(154.0749, device='cuda:0')
epoch: 71 test_true_pfm: 4046.7928922185056 sim_pfm: 150.27090175570143
episode: 284 training return: tensor(79.1564, device='cuda:0')
episode: 285 training return: tensor(166.0243, device='cuda:0')
episode: 286 training return: tensor(128.9812, device='cuda:0')
episode: 287 training return: tensor(108.8489, device='cuda:0')
epoch: 72 test_true_pfm: 4071.4526451948445 sim_pfm: 144.1278793802097
episode: 288 training return: tensor(128.9652, device='cuda:0')
episode: 289 training return: tensor(152.4414, device='cuda:0')
episode: 290 training return: tensor(101.5046, device='cuda:0')
episode: 291 training return: tensor(168.9042, device='cuda:0')
epoch: 73 test_true_pfm: 4045.4913329264145 sim_pfm: 171.71651569472547
episode: 292 training return: tensor(95.4895, device='cuda:0')
episode: 293 training return: tensor(89.8829, device='cuda:0')
episode: 294 training return: tensor(158.0259, device='cuda:0')
episode: 295 training return: tensor(160.7989, device='cuda:0')
epoch: 74 test_true_pfm: 4059.449707064674 sim_pfm: 180.5223009143762
episode: 296 training return: tensor(108.8345, device='cuda:0')
episode: 297 training return: tensor(154.5675, device='cuda:0')
episode: 298 training return: tensor(165.7501, device='cuda:0')
episode: 299 training return: tensor(152.6425, device='cuda:0')
epoch: 75 test_true_pfm: 4047.467247608182 sim_pfm: 139.22090146967093
episode: 300 training return: tensor(170.5865, device='cuda:0')
episode: 301 training return: tensor(112.7025, device='cuda:0')
episode: 302 training return: tensor(131.2857, device='cuda:0')
episode: 303 training return: tensor(146.7489, device='cuda:0')
epoch: 76 test_true_pfm: 4074.3110214654457 sim_pfm: 185.72239675712385
episode: 304 training return: tensor(125.8081, device='cuda:0')
episode: 305 training return: tensor(131.9535, device='cuda:0')
episode: 306 training return: tensor(90.5667, device='cuda:0')
episode: 307 training return: tensor(155.4073, device='cuda:0')
epoch: 77 test_true_pfm: 4067.6123988471923 sim_pfm: 139.3894919923914
episode: 308 training return: tensor(135.8983, device='cuda:0')
episode: 309 training return: tensor(136.3037, device='cuda:0')
episode: 310 training return: tensor(148.1678, device='cuda:0')
episode: 311 training return: tensor(181.3431, device='cuda:0')
epoch: 78 test_true_pfm: 4061.841929661978 sim_pfm: 143.63582592804838
episode: 312 training return: tensor(90.7513, device='cuda:0')
episode: 313 training return: tensor(113.0995, device='cuda:0')
episode: 314 training return: tensor(156.9103, device='cuda:0')
episode: 315 training return: tensor(159.9611, device='cuda:0')
epoch: 79 test_true_pfm: 4036.9358783917337 sim_pfm: 143.61779646661793
episode: 316 training return: tensor(149.3474, device='cuda:0')
episode: 317 training return: tensor(144.1117, device='cuda:0')
episode: 318 training return: tensor(136.8792, device='cuda:0')
episode: 319 training return: tensor(134.8180, device='cuda:0')
epoch: 80 test_true_pfm: 4080.689395066878 sim_pfm: 173.99680123929284
episode: 320 training return: tensor(88.7409, device='cuda:0')
episode: 321 training return: tensor(175.0346, device='cuda:0')
episode: 322 training return: tensor(85.0425, device='cuda:0')
episode: 323 training return: tensor(115.9908, device='cuda:0')
epoch: 81 test_true_pfm: 4078.578048077044 sim_pfm: 158.10245759389363
episode: 324 training return: tensor(137.0170, device='cuda:0')
episode: 325 training return: tensor(83.3971, device='cuda:0')
episode: 326 training return: tensor(156.9012, device='cuda:0')
episode: 327 training return: tensor(70.1093, device='cuda:0')
epoch: 82 test_true_pfm: 4070.9700748425444 sim_pfm: 152.0875036138265
episode: 328 training return: tensor(156.1295, device='cuda:0')
episode: 329 training return: tensor(152.6953, device='cuda:0')
episode: 330 training return: tensor(90.1437, device='cuda:0')
episode: 331 training return: tensor(177.4957, device='cuda:0')
epoch: 83 test_true_pfm: 4065.996071128051 sim_pfm: 163.37719185826913
episode: 332 training return: tensor(115.6600, device='cuda:0')
episode: 333 training return: tensor(159.4689, device='cuda:0')
episode: 334 training return: tensor(137.2341, device='cuda:0')
episode: 335 training return: tensor(132.8479, device='cuda:0')
epoch: 84 test_true_pfm: 3180.4381857671374 sim_pfm: 158.32503955434854
episode: 336 training return: tensor(163.8024, device='cuda:0')
episode: 337 training return: tensor(116.7373, device='cuda:0')
episode: 338 training return: tensor(147.8402, device='cuda:0')
episode: 339 training return: tensor(126.7704, device='cuda:0')
epoch: 85 test_true_pfm: 4067.1726101004447 sim_pfm: 154.36474780129114
episode: 340 training return: tensor(91.2906, device='cuda:0')
episode: 341 training return: tensor(116.2215, device='cuda:0')
episode: 342 training return: tensor(175.7790, device='cuda:0')
episode: 343 training return: tensor(167.9822, device='cuda:0')
epoch: 86 test_true_pfm: 4083.233170260735 sim_pfm: 167.75319870762178
episode: 344 training return: tensor(175.3271, device='cuda:0')
episode: 345 training return: tensor(60.1147, device='cuda:0')
episode: 346 training return: tensor(58.2110, device='cuda:0')
episode: 347 training return: tensor(126.1305, device='cuda:0')
epoch: 87 test_true_pfm: 4060.681770931496 sim_pfm: 161.2254449777732
episode: 348 training return: tensor(159.5745, device='cuda:0')
episode: 349 training return: tensor(24.4290, device='cuda:0')
episode: 350 training return: tensor(180.3413, device='cuda:0')
episode: 351 training return: tensor(172.2511, device='cuda:0')
epoch: 88 test_true_pfm: 4042.1546475193495 sim_pfm: 153.48382511667054
episode: 352 training return: tensor(136.6317, device='cuda:0')
episode: 353 training return: tensor(141.3943, device='cuda:0')
episode: 354 training return: tensor(101.1947, device='cuda:0')
episode: 355 training return: tensor(124.4464, device='cuda:0')
epoch: 89 test_true_pfm: 4059.0550242087525 sim_pfm: 167.44186134981769
episode: 356 training return: tensor(114.8530, device='cuda:0')
episode: 357 training return: tensor(83.3716, device='cuda:0')
episode: 358 training return: tensor(144.6470, device='cuda:0')
episode: 359 training return: tensor(110.1956, device='cuda:0')
epoch: 90 test_true_pfm: 4042.6427690021933 sim_pfm: 158.6883772413615
episode: 360 training return: tensor(93.0309, device='cuda:0')
episode: 361 training return: tensor(136.8753, device='cuda:0')
episode: 362 training return: tensor(103.0819, device='cuda:0')
episode: 363 training return: tensor(142.2361, device='cuda:0')
epoch: 91 test_true_pfm: 4056.4040596791965 sim_pfm: 171.49091905836636
episode: 364 training return: tensor(156.7764, device='cuda:0')
episode: 365 training return: tensor(152.3930, device='cuda:0')
episode: 366 training return: tensor(148.1336, device='cuda:0')
episode: 367 training return: tensor(72.5887, device='cuda:0')
epoch: 92 test_true_pfm: 4048.29635359332 sim_pfm: 169.66227119438312
episode: 368 training return: tensor(164.0700, device='cuda:0')
episode: 369 training return: tensor(182.1027, device='cuda:0')
episode: 370 training return: tensor(44.0740, device='cuda:0')
episode: 371 training return: tensor(131.4019, device='cuda:0')
epoch: 93 test_true_pfm: 4048.4442913613 sim_pfm: 172.45476819753335
episode: 372 training return: tensor(167.8017, device='cuda:0')
episode: 373 training return: tensor(113.8047, device='cuda:0')
episode: 374 training return: tensor(169.9544, device='cuda:0')
episode: 375 training return: tensor(118.2979, device='cuda:0')
epoch: 94 test_true_pfm: 4068.433244407834 sim_pfm: 152.0005748984695
episode: 376 training return: tensor(121.9036, device='cuda:0')
episode: 377 training return: tensor(176.9314, device='cuda:0')
episode: 378 training return: tensor(151.8061, device='cuda:0')
episode: 379 training return: tensor(126.4108, device='cuda:0')
epoch: 95 test_true_pfm: 4065.7526610645145 sim_pfm: 141.29922576014846
episode: 380 training return: tensor(155.6236, device='cuda:0')
episode: 381 training return: tensor(95.2387, device='cuda:0')
episode: 382 training return: tensor(133.8555, device='cuda:0')
episode: 383 training return: tensor(155.1650, device='cuda:0')
epoch: 96 test_true_pfm: 4058.3893811953185 sim_pfm: -173.59961838355716
episode: 384 training return: tensor(140.0439, device='cuda:0')
episode: 385 training return: tensor(161.0311, device='cuda:0')
episode: 386 training return: tensor(141.9259, device='cuda:0')
episode: 387 training return: tensor(77.7414, device='cuda:0')
epoch: 97 test_true_pfm: 4050.898390922397 sim_pfm: 138.70359672238314
episode: 388 training return: tensor(88.6253, device='cuda:0')
episode: 389 training return: tensor(116.6167, device='cuda:0')
episode: 390 training return: tensor(112.3773, device='cuda:0')
episode: 391 training return: tensor(114.3901, device='cuda:0')
epoch: 98 test_true_pfm: 4073.8038938072145 sim_pfm: 157.5843645282536
episode: 392 training return: tensor(146.0628, device='cuda:0')
episode: 393 training return: tensor(-664.0497, device='cuda:0')
episode: 394 training return: tensor(144.3827, device='cuda:0')
episode: 395 training return: tensor(127.2446, device='cuda:0')
epoch: 99 test_true_pfm: 4078.2878937281944 sim_pfm: 185.80118015263966
episode: 396 training return: tensor(129.7993, device='cuda:0')
episode: 397 training return: tensor(159.7155, device='cuda:0')
episode: 398 training return: tensor(140.5148, device='cuda:0')
episode: 399 training return: tensor(-669.3007, device='cuda:0')
epoch: 100 test_true_pfm: 4082.426149027313 sim_pfm: 145.64429174456745
episode: 400 training return: tensor(114.3098, device='cuda:0')
episode: 401 training return: tensor(144.2222, device='cuda:0')
episode: 402 training return: tensor(162.5557, device='cuda:0')
episode: 403 training return: tensor(-835.2205, device='cuda:0')
epoch: 101 test_true_pfm: 4039.948151847282 sim_pfm: 160.57745081440467
episode: 404 training return: tensor(121.9453, device='cuda:0')
episode: 405 training return: tensor(160.9095, device='cuda:0')
episode: 406 training return: tensor(141.0745, device='cuda:0')
episode: 407 training return: tensor(115.2197, device='cuda:0')
epoch: 102 test_true_pfm: 4049.092439245996 sim_pfm: 153.61959634780456
episode: 408 training return: tensor(150.2942, device='cuda:0')
episode: 409 training return: tensor(-707.5653, device='cuda:0')
episode: 410 training return: tensor(131.9467, device='cuda:0')
episode: 411 training return: tensor(137.4191, device='cuda:0')
epoch: 103 test_true_pfm: 4087.730396678015 sim_pfm: 160.50476744708916
episode: 412 training return: tensor(94.0393, device='cuda:0')
episode: 413 training return: tensor(135.5129, device='cuda:0')
episode: 414 training return: tensor(126.7768, device='cuda:0')
episode: 415 training return: tensor(74.5666, device='cuda:0')
epoch: 104 test_true_pfm: 4076.0221881701978 sim_pfm: 156.89688304628362
episode: 416 training return: tensor(127.4264, device='cuda:0')
episode: 417 training return: tensor(116.1615, device='cuda:0')
episode: 418 training return: tensor(165.4302, device='cuda:0')
episode: 419 training return: tensor(153.4464, device='cuda:0')
epoch: 105 test_true_pfm: 4048.3624187526125 sim_pfm: 173.17713175632525
episode: 420 training return: tensor(145.4242, device='cuda:0')
episode: 421 training return: tensor(162.6221, device='cuda:0')
episode: 422 training return: tensor(-820.4291, device='cuda:0')
episode: 423 training return: tensor(153.1465, device='cuda:0')
epoch: 106 test_true_pfm: 4059.5031806620136 sim_pfm: 166.57263624852445
episode: 424 training return: tensor(87.4027, device='cuda:0')
episode: 425 training return: tensor(167.3302, device='cuda:0')
episode: 426 training return: tensor(99.0477, device='cuda:0')
episode: 427 training return: tensor(147.9744, device='cuda:0')
epoch: 107 test_true_pfm: 4066.7444225233976 sim_pfm: 132.20161067981584
episode: 428 training return: tensor(109.0828, device='cuda:0')
episode: 429 training return: tensor(152.3818, device='cuda:0')
episode: 430 training return: tensor(152.9041, device='cuda:0')
episode: 431 training return: tensor(160.3165, device='cuda:0')
epoch: 108 test_true_pfm: 4053.699023237874 sim_pfm: 150.488761632451
episode: 432 training return: tensor(171.4201, device='cuda:0')
episode: 433 training return: tensor(143.1935, device='cuda:0')
episode: 434 training return: tensor(55.7011, device='cuda:0')
episode: 435 training return: tensor(92.7747, device='cuda:0')
epoch: 109 test_true_pfm: 4050.6746503029367 sim_pfm: 112.09660546006246
episode: 436 training return: tensor(115.0918, device='cuda:0')
episode: 437 training return: tensor(112.2443, device='cuda:0')
episode: 438 training return: tensor(124.1482, device='cuda:0')
episode: 439 training return: tensor(146.6749, device='cuda:0')
epoch: 110 test_true_pfm: 4046.994679891914 sim_pfm: 143.7130354901892
episode: 440 training return: tensor(145.0703, device='cuda:0')
episode: 441 training return: tensor(81.6475, device='cuda:0')
episode: 442 training return: tensor(93.3734, device='cuda:0')
episode: 443 training return: tensor(135.5743, device='cuda:0')
epoch: 111 test_true_pfm: 4055.9975469363458 sim_pfm: 149.46321317766947
episode: 444 training return: tensor(133.6656, device='cuda:0')
episode: 445 training return: tensor(131.9569, device='cuda:0')
episode: 446 training return: tensor(115.4503, device='cuda:0')
episode: 447 training return: tensor(56.2146, device='cuda:0')
epoch: 112 test_true_pfm: 4061.671278519996 sim_pfm: 157.28876997141438
episode: 448 training return: tensor(76.9870, device='cuda:0')
episode: 449 training return: tensor(110.1816, device='cuda:0')
episode: 450 training return: tensor(121.5363, device='cuda:0')
episode: 451 training return: tensor(161.7048, device='cuda:0')
epoch: 113 test_true_pfm: 4049.896135548513 sim_pfm: 174.90089192109494
episode: 452 training return: tensor(92.8358, device='cuda:0')
episode: 453 training return: tensor(148.5884, device='cuda:0')
episode: 454 training return: tensor(144.8400, device='cuda:0')
episode: 455 training return: tensor(185.3045, device='cuda:0')
epoch: 114 test_true_pfm: 4014.744336824513 sim_pfm: 158.79045614447872
episode: 456 training return: tensor(154.4451, device='cuda:0')
episode: 457 training return: tensor(96.8705, device='cuda:0')
episode: 458 training return: tensor(141.6610, device='cuda:0')
episode: 459 training return: tensor(160.2507, device='cuda:0')
epoch: 115 test_true_pfm: 4076.982577786163 sim_pfm: 172.61483234397988
episode: 460 training return: tensor(107.4901, device='cuda:0')
episode: 461 training return: tensor(112.1312, device='cuda:0')
episode: 462 training return: tensor(114.1205, device='cuda:0')
episode: 463 training return: tensor(129.0795, device='cuda:0')
epoch: 116 test_true_pfm: 4071.5699586287024 sim_pfm: 174.00530019949656
episode: 464 training return: tensor(118.2663, device='cuda:0')
episode: 465 training return: tensor(135.7982, device='cuda:0')
episode: 466 training return: tensor(153.6332, device='cuda:0')
episode: 467 training return: tensor(133.1279, device='cuda:0')
epoch: 117 test_true_pfm: 4080.8742132720567 sim_pfm: 161.28792502965857
episode: 468 training return: tensor(146.2863, device='cuda:0')
episode: 469 training return: tensor(145.8733, device='cuda:0')
episode: 470 training return: tensor(150.9597, device='cuda:0')
episode: 471 training return: tensor(100.7533, device='cuda:0')
epoch: 118 test_true_pfm: 4079.9289477658517 sim_pfm: 170.99243119771322
episode: 472 training return: tensor(142.2647, device='cuda:0')
episode: 473 training return: tensor(170.4412, device='cuda:0')
episode: 474 training return: tensor(158.8227, device='cuda:0')
episode: 475 training return: tensor(174.6951, device='cuda:0')
epoch: 119 test_true_pfm: 4078.6895715830447 sim_pfm: 170.48882668865068
episode: 476 training return: tensor(124.6889, device='cuda:0')
episode: 477 training return: tensor(136.9617, device='cuda:0')
episode: 478 training return: tensor(171.9313, device='cuda:0')
episode: 479 training return: tensor(158.1211, device='cuda:0')
epoch: 120 test_true_pfm: 4063.1642805960823 sim_pfm: 181.3825754748541
episode: 480 training return: tensor(117.9382, device='cuda:0')
episode: 481 training return: tensor(117.5724, device='cuda:0')
episode: 482 training return: tensor(149.4855, device='cuda:0')
episode: 483 training return: tensor(138.7633, device='cuda:0')
epoch: 121 test_true_pfm: 4041.4278042986384 sim_pfm: 176.30091338951993
episode: 484 training return: tensor(151.5165, device='cuda:0')
episode: 485 training return: tensor(141.3693, device='cuda:0')
episode: 486 training return: tensor(150.7129, device='cuda:0')
episode: 487 training return: tensor(155.0682, device='cuda:0')
epoch: 122 test_true_pfm: 4036.6900674149874 sim_pfm: 134.17850625805053
episode: 488 training return: tensor(174.2954, device='cuda:0')
episode: 489 training return: tensor(147.2471, device='cuda:0')
episode: 490 training return: tensor(72.3957, device='cuda:0')
episode: 491 training return: tensor(134.9681, device='cuda:0')
epoch: 123 test_true_pfm: 4101.38077794061 sim_pfm: 164.44351141809602
episode: 492 training return: tensor(153.0990, device='cuda:0')
episode: 493 training return: tensor(127.9815, device='cuda:0')
episode: 494 training return: tensor(118.8916, device='cuda:0')
episode: 495 training return: tensor(166.8302, device='cuda:0')
epoch: 124 test_true_pfm: 4078.9031880377443 sim_pfm: 158.68753339977897
episode: 496 training return: tensor(100.2803, device='cuda:0')
episode: 497 training return: tensor(168.4486, device='cuda:0')
episode: 498 training return: tensor(144.0771, device='cuda:0')
episode: 499 training return: tensor(189.4724, device='cuda:0')
epoch: 125 test_true_pfm: 4078.7821586719506 sim_pfm: 177.3720213311511
episode: 500 training return: tensor(158.4371, device='cuda:0')
episode: 501 training return: tensor(98.4974, device='cuda:0')
episode: 502 training return: tensor(140.7401, device='cuda:0')
episode: 503 training return: tensor(143.7535, device='cuda:0')
epoch: 126 test_true_pfm: 4082.7915099594684 sim_pfm: 150.47572397019636
episode: 504 training return: tensor(153.2804, device='cuda:0')
episode: 505 training return: tensor(154.8508, device='cuda:0')
episode: 506 training return: tensor(153.9109, device='cuda:0')
episode: 507 training return: tensor(152.2297, device='cuda:0')
epoch: 127 test_true_pfm: 4081.6088787241865 sim_pfm: 165.57067371788435
episode: 508 training return: tensor(172.6222, device='cuda:0')
episode: 509 training return: tensor(192.8166, device='cuda:0')
episode: 510 training return: tensor(152.8844, device='cuda:0')
episode: 511 training return: tensor(168.9975, device='cuda:0')
epoch: 128 test_true_pfm: 4024.3448543210393 sim_pfm: 177.4820851525195
episode: 512 training return: tensor(103.9855, device='cuda:0')
episode: 513 training return: tensor(97.5084, device='cuda:0')
episode: 514 training return: tensor(169.5236, device='cuda:0')
episode: 515 training return: tensor(171.2173, device='cuda:0')
epoch: 129 test_true_pfm: 4087.449838028459 sim_pfm: 177.07551859048544
episode: 516 training return: tensor(156.1098, device='cuda:0')
episode: 517 training return: tensor(143.2815, device='cuda:0')
episode: 518 training return: tensor(159.0464, device='cuda:0')
episode: 519 training return: tensor(163.1621, device='cuda:0')
epoch: 130 test_true_pfm: 4096.811205313349 sim_pfm: 180.75002648060521
episode: 520 training return: tensor(158.0992, device='cuda:0')
episode: 521 training return: tensor(165.6786, device='cuda:0')
episode: 522 training return: tensor(136.1275, device='cuda:0')
episode: 523 training return: tensor(143.0569, device='cuda:0')
epoch: 131 test_true_pfm: 4095.729998817256 sim_pfm: 192.81237174546308
episode: 524 training return: tensor(166.6619, device='cuda:0')
episode: 525 training return: tensor(177.0609, device='cuda:0')
episode: 526 training return: tensor(159.4154, device='cuda:0')
episode: 527 training return: tensor(90.1645, device='cuda:0')
epoch: 132 test_true_pfm: 4056.6672027470136 sim_pfm: 180.85491744767447
episode: 528 training return: tensor(97.0382, device='cuda:0')
episode: 529 training return: tensor(175.5321, device='cuda:0')
episode: 530 training return: tensor(170.4484, device='cuda:0')
episode: 531 training return: tensor(113.0195, device='cuda:0')
epoch: 133 test_true_pfm: 4083.3031005271605 sim_pfm: 173.34926721747615
episode: 532 training return: tensor(136.2436, device='cuda:0')
episode: 533 training return: tensor(172.3109, device='cuda:0')
episode: 534 training return: tensor(161.9147, device='cuda:0')
episode: 535 training return: tensor(140.3071, device='cuda:0')
epoch: 134 test_true_pfm: 4074.5361916072216 sim_pfm: 141.84240485631744
episode: 536 training return: tensor(155.7050, device='cuda:0')
episode: 537 training return: tensor(174.7157, device='cuda:0')
episode: 538 training return: tensor(117.0632, device='cuda:0')
episode: 539 training return: tensor(121.1981, device='cuda:0')
epoch: 135 test_true_pfm: 4048.456335293047 sim_pfm: 98.96569467085646
episode: 540 training return: tensor(118.7328, device='cuda:0')
episode: 541 training return: tensor(159.6078, device='cuda:0')
episode: 542 training return: tensor(184.4416, device='cuda:0')
episode: 543 training return: tensor(145.8090, device='cuda:0')
epoch: 136 test_true_pfm: 4038.8204694404862 sim_pfm: 153.1013507798974
episode: 544 training return: tensor(151.8462, device='cuda:0')
episode: 545 training return: tensor(138.6036, device='cuda:0')
episode: 546 training return: tensor(179.6522, device='cuda:0')
episode: 547 training return: tensor(99.5554, device='cuda:0')
epoch: 137 test_true_pfm: 4075.1341931794054 sim_pfm: 170.3766434599529
episode: 548 training return: tensor(133.3528, device='cuda:0')
episode: 549 training return: tensor(151.4411, device='cuda:0')
episode: 550 training return: tensor(145.4095, device='cuda:0')
episode: 551 training return: tensor(175.1909, device='cuda:0')
epoch: 138 test_true_pfm: 4088.382202492338 sim_pfm: 174.41793253470678
episode: 552 training return: tensor(153.6355, device='cuda:0')
episode: 553 training return: tensor(146.2038, device='cuda:0')
episode: 554 training return: tensor(124.8414, device='cuda:0')
episode: 555 training return: tensor(116.5432, device='cuda:0')
epoch: 139 test_true_pfm: 4080.362945152923 sim_pfm: 154.58976715392782
episode: 556 training return: tensor(170.8258, device='cuda:0')
episode: 557 training return: tensor(130.6770, device='cuda:0')
episode: 558 training return: tensor(137.9519, device='cuda:0')
episode: 559 training return: tensor(167.0019, device='cuda:0')
epoch: 140 test_true_pfm: 4060.759883461893 sim_pfm: 162.22537392102336
episode: 560 training return: tensor(160.8664, device='cuda:0')
episode: 561 training return: tensor(166.6046, device='cuda:0')
episode: 562 training return: tensor(157.4102, device='cuda:0')
episode: 563 training return: tensor(134.0772, device='cuda:0')
epoch: 141 test_true_pfm: 4080.1594806572552 sim_pfm: 180.65348492442476
episode: 564 training return: tensor(162.1704, device='cuda:0')
episode: 565 training return: tensor(148.1062, device='cuda:0')
episode: 566 training return: tensor(135.6004, device='cuda:0')
episode: 567 training return: tensor(130.9810, device='cuda:0')
epoch: 142 test_true_pfm: 4091.0400333686953 sim_pfm: 176.60375500420923
episode: 568 training return: tensor(129.1615, device='cuda:0')
episode: 569 training return: tensor(156.0952, device='cuda:0')
episode: 570 training return: tensor(124.6997, device='cuda:0')
episode: 571 training return: tensor(169.7571, device='cuda:0')
epoch: 143 test_true_pfm: 4076.423982922031 sim_pfm: 137.58299177013882
episode: 572 training return: tensor(107.1169, device='cuda:0')
episode: 573 training return: tensor(99.9784, device='cuda:0')
episode: 574 training return: tensor(130.1640, device='cuda:0')
episode: 575 training return: tensor(157.1439, device='cuda:0')
epoch: 144 test_true_pfm: 4073.4707419319616 sim_pfm: 194.79148999029226
episode: 576 training return: tensor(160.4280, device='cuda:0')
episode: 577 training return: tensor(158.8014, device='cuda:0')
episode: 578 training return: tensor(107.8358, device='cuda:0')
episode: 579 training return: tensor(171.6562, device='cuda:0')
epoch: 145 test_true_pfm: 4073.8543446879485 sim_pfm: 179.2099319002591
episode: 580 training return: tensor(180.4573, device='cuda:0')
episode: 581 training return: tensor(175.8281, device='cuda:0')
episode: 582 training return: tensor(146.9192, device='cuda:0')
episode: 583 training return: tensor(173.6894, device='cuda:0')
epoch: 146 test_true_pfm: 4092.9506087006484 sim_pfm: 177.19252390208808
episode: 584 training return: tensor(-720.4270, device='cuda:0')
episode: 585 training return: tensor(161.6866, device='cuda:0')
episode: 586 training return: tensor(-1.6587, device='cuda:0')
episode: 587 training return: tensor(159.8520, device='cuda:0')
epoch: 147 test_true_pfm: 4044.675867986318 sim_pfm: 176.16089816214904
episode: 588 training return: tensor(147.1192, device='cuda:0')
episode: 589 training return: tensor(65.1556, device='cuda:0')
episode: 590 training return: tensor(139.9056, device='cuda:0')
episode: 591 training return: tensor(158.1334, device='cuda:0')
epoch: 148 test_true_pfm: 4098.746202760515 sim_pfm: 180.58535906827697
episode: 592 training return: tensor(129.2308, device='cuda:0')
episode: 593 training return: tensor(136.4440, device='cuda:0')
episode: 594 training return: tensor(154.7939, device='cuda:0')
episode: 595 training return: tensor(158.1335, device='cuda:0')
epoch: 149 test_true_pfm: 4088.6509784232444 sim_pfm: 182.90350367724508
episode: 596 training return: tensor(149.5680, device='cuda:0')
episode: 597 training return: tensor(153.6291, device='cuda:0')
episode: 598 training return: tensor(-789.6395, device='cuda:0')
episode: 599 training return: tensor(-907.0730, device='cuda:0')
epoch: 150 test_true_pfm: 4074.4157685276055 sim_pfm: 195.22716480179224
