['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'behavior', '--traj', 'medium', '--seed', '0']
epoch: 0 training_loss 0.31040515199303625 test_loss: 0.1935383439064026
epoch: 1 training_loss 0.17626357614994048 test_loss: 0.18267289400100709
epoch: 2 training_loss 0.15493510656058787 test_loss: 0.13793776035308838
epoch: 3 training_loss 0.1434070822969079 test_loss: 0.13850443363189696
epoch: 4 training_loss 0.13788095824420452 test_loss: 0.14562724828720092
epoch: 5 training_loss 0.12904183458536864 test_loss: 0.13354984521865845
epoch: 6 training_loss 0.1208313275873661 test_loss: 0.11074942350387573
epoch: 7 training_loss 0.12396422550082206 test_loss: 0.14250127077102662
epoch: 8 training_loss 0.11861386064440012 test_loss: 0.10761208534240722
epoch: 9 training_loss 0.12081320151686668 test_loss: 0.12543414831161498
epoch: 10 training_loss 0.12038561994209886 test_loss: 0.11718006134033203
epoch: 11 training_loss 0.11678930405527353 test_loss: 0.12212369441986085
epoch: 12 training_loss 0.11810651330277323 test_loss: 0.11771063804626465
epoch: 13 training_loss 0.11062971621751785 test_loss: 0.12683436870574952
epoch: 14 training_loss 0.11681920047849417 test_loss: 0.12073214054107666
epoch: 15 training_loss 0.12163633983582259 test_loss: 0.12291946411132812
epoch: 16 training_loss 0.12160738248378039 test_loss: 0.12303242683410645
epoch: 17 training_loss 0.10489008896052837 test_loss: 0.11711199283599853
epoch: 18 training_loss 0.09779828913509846 test_loss: 0.11566530466079712
epoch: 19 training_loss 0.1095256357640028 test_loss: 0.11666938066482543
epoch: 20 training_loss 0.11036962442100048 test_loss: 0.09446811079978942
epoch: 21 training_loss 0.11053661700338126 test_loss: 0.11976978778839112
epoch: 22 training_loss 0.10678269920870662 test_loss: 0.09483935832977294
epoch: 23 training_loss 0.10781553529202938 test_loss: 0.1069690465927124
epoch: 24 training_loss 0.11364531081169843 test_loss: 0.11848242282867431
epoch: 25 training_loss 0.10718204095959663 test_loss: 0.11524735689163208
epoch: 26 training_loss 0.10667295258492232 test_loss: 0.10909833908081054
epoch: 27 training_loss 0.12004629030823707 test_loss: 0.11836984157562255
epoch: 28 training_loss 0.10919705513864755 test_loss: 0.11574434041976929
epoch: 29 training_loss 0.10884586755186319 test_loss: 0.10112196207046509
epoch: 30 training_loss 0.1091637546569109 test_loss: 0.10660631656646728
epoch: 31 training_loss 0.110520243588835 test_loss: 0.11565742492675782
epoch: 32 training_loss 0.10658138694241642 test_loss: 0.11193902492523193
epoch: 33 training_loss 0.11160459123551845 test_loss: 0.12349753379821778
epoch: 34 training_loss 0.11181055141612888 test_loss: 0.11853729486465454
epoch: 35 training_loss 0.10062553321942687 test_loss: 0.11365407705307007
epoch: 36 training_loss 0.10533851116895676 test_loss: 0.11159852743148804
epoch: 37 training_loss 0.11040530040860176 test_loss: 0.11912912130355835
epoch: 38 training_loss 0.10346965309232474 test_loss: 0.09681841135025024
epoch: 39 training_loss 0.11235178578644991 test_loss: 0.12799052000045777
epoch: 40 training_loss 0.10291051097214222 test_loss: 0.1094821572303772
epoch: 41 training_loss 0.10683763105422259 test_loss: 0.10724939107894897
epoch: 42 training_loss 0.10356466900557279 test_loss: 0.11171025037765503
epoch: 43 training_loss 0.1049764733016491 test_loss: 0.11010266542434692
epoch: 44 training_loss 0.1067337908409536 test_loss: 0.10348800420761109
epoch: 45 training_loss 0.10931474760174752 test_loss: 0.13530590534210205
epoch: 46 training_loss 0.11067465037107467 test_loss: 0.10741677284240722
epoch: 47 training_loss 0.11065298303961754 test_loss: 0.10047051906585694
epoch: 48 training_loss 0.10083793953061104 test_loss: 0.1098319411277771
epoch: 49 training_loss 0.10606353745795787 test_loss: 0.10078926086425781
epoch: 50 training_loss 0.110032897926867 test_loss: 0.11345233917236328
epoch: 51 training_loss 0.10204611714929342 test_loss: 0.12005590200424195
epoch: 52 training_loss 0.10663836838677526 test_loss: 0.11466916799545288
epoch: 53 training_loss 0.10983453616499901 test_loss: 0.12185962200164795
epoch: 54 training_loss 0.105392608307302 test_loss: 0.09516583681106568
epoch: 55 training_loss 0.10635512623935937 test_loss: 0.10073957443237305
epoch: 56 training_loss 0.10838548589497804 test_loss: 0.11742168664932251
epoch: 57 training_loss 0.10561190757900477 test_loss: 0.11496981382369995
epoch: 58 training_loss 0.10811442175880075 test_loss: 0.11340516805648804
epoch: 59 training_loss 0.10234912693500518 test_loss: 0.096063894033432
epoch: 60 training_loss 0.1129737475514412 test_loss: 0.11417235136032104
epoch: 61 training_loss 0.10533504996448756 test_loss: 0.10165251493453979
epoch: 62 training_loss 0.10446597874164582 test_loss: 0.12249113321304321
epoch: 63 training_loss 0.10426325775682926 test_loss: 0.11010382175445557
epoch: 64 training_loss 0.10221217099577189 test_loss: 0.11319457292556763
epoch: 65 training_loss 0.10233226265758276 test_loss: 0.10805675983428956
epoch: 66 training_loss 0.10371273446828128 test_loss: 0.11337287425994873
epoch: 67 training_loss 0.109936046525836 test_loss: 0.11668096780776978
epoch: 68 training_loss 0.10886445596814155 test_loss: 0.1017850399017334
epoch: 69 training_loss 0.10674547918140888 test_loss: 0.1183374285697937
epoch: 70 training_loss 0.11332736544311046 test_loss: 0.11252479553222657
epoch: 71 training_loss 0.10752096883952618 test_loss: 0.11130962371826172
epoch: 72 training_loss 0.09973103176802396 test_loss: 0.11562074422836303
epoch: 73 training_loss 0.11408977046608924 test_loss: 0.1077724575996399
epoch: 74 training_loss 0.10660509418696165 test_loss: 0.10053359270095825
epoch: 75 training_loss 0.10595499958842992 test_loss: 0.10930968523025512
epoch: 76 training_loss 0.10892903234809637 test_loss: 0.10189696550369262
epoch: 77 training_loss 0.10438060503453016 test_loss: 0.09933624267578126
epoch: 78 training_loss 0.103589246571064 test_loss: 0.1150553822517395
epoch: 79 training_loss 0.10411221615970134 test_loss: 0.10242867469787598
epoch: 80 training_loss 0.11271520690992475 test_loss: 0.10617302656173706
epoch: 81 training_loss 0.10252171600237489 test_loss: 0.1076863169670105
epoch: 82 training_loss 0.10610204067081214 test_loss: 0.10953745841979981
epoch: 83 training_loss 0.11256705038249493 test_loss: 0.11390694379806518
epoch: 84 training_loss 0.10488441552966833 test_loss: 0.10352139472961426
epoch: 85 training_loss 0.10654177036136389 test_loss: 0.10810197591781616
epoch: 86 training_loss 0.10784573195502162 test_loss: 0.11080012321472169
epoch: 87 training_loss 0.10416913960129022 test_loss: 0.11295480728149414
epoch: 88 training_loss 0.10810986332595349 test_loss: 0.10424270629882812
epoch: 89 training_loss 0.10360775923356413 test_loss: 0.09370878338813782
epoch: 90 training_loss 0.10733303867280483 test_loss: 0.11461504697799682
epoch: 91 training_loss 0.10617360729724169 test_loss: 0.11193935871124268
epoch: 92 training_loss 0.11308040119707584 test_loss: 0.10924140214920045
epoch: 93 training_loss 0.10265678497031332 test_loss: 0.10590577125549316
epoch: 94 training_loss 0.1017549820896238 test_loss: 0.10332256555557251
epoch: 95 training_loss 0.10938877150416375 test_loss: 0.12206795215606689
epoch: 96 training_loss 0.09611000729724765 test_loss: 0.10601996183395386
epoch: 97 training_loss 0.09097466140985488 test_loss: 0.09949966669082641
epoch: 98 training_loss 0.10414162196218968 test_loss: 0.11153321266174317
epoch: 99 training_loss 0.1060769423097372 test_loss: 0.10602766275405884
epoch: 100 training_loss 0.10249891182407737 test_loss: 0.11046305894851685
epoch: 101 training_loss 0.10237945582717657 test_loss: 0.09199248552322388
epoch: 102 training_loss 0.10398280644789339 test_loss: 0.11956945657730103
epoch: 103 training_loss 0.11585643518716097 test_loss: 0.10235685110092163
epoch: 104 training_loss 0.10863527158275246 test_loss: 0.10555808544158936
epoch: 105 training_loss 0.09463709525763989 test_loss: 0.11006438732147217
epoch: 106 training_loss 0.10110085155814887 test_loss: 0.09993003606796265
epoch: 107 training_loss 0.10302858460694551 test_loss: 0.1198890209197998
epoch: 108 training_loss 0.10572810772806406 test_loss: 0.09465835690498352
epoch: 109 training_loss 0.10634348820894957 test_loss: 0.10805076360702515
epoch: 110 training_loss 0.10443428914994002 test_loss: 0.11158442497253418
epoch: 111 training_loss 0.11119576781988144 test_loss: 0.13633463382720948
epoch: 112 training_loss 0.10467657580971718 test_loss: 0.11034481525421143
epoch: 113 training_loss 0.10218271967023611 test_loss: 0.1048930525779724
epoch: 114 training_loss 0.10043057465925813 test_loss: 0.09719008207321167
epoch: 115 training_loss 0.09551277112215757 test_loss: 0.13020360469818115
epoch: 116 training_loss 0.11147253729403019 test_loss: 0.10638270378112794
epoch: 117 training_loss 0.10566704209893941 test_loss: 0.11761537790298462
epoch: 118 training_loss 0.10671828038990498 test_loss: 0.11015336513519287
epoch: 119 training_loss 0.11113237742334604 test_loss: 0.09994122982025147
epoch: 120 training_loss 0.10406503401696682 test_loss: 0.11889511346817017
epoch: 121 training_loss 0.10506874769926071 test_loss: 0.10877459049224854
epoch: 122 training_loss 0.1106818838790059 test_loss: 0.09470921158790588
epoch: 123 training_loss 0.10138605834916234 test_loss: 0.10749130249023438
epoch: 124 training_loss 0.10509874692186713 test_loss: 0.10670222043991089
epoch: 125 training_loss 0.09865513369441033 test_loss: 0.11684885025024414
epoch: 126 training_loss 0.10634869321249425 test_loss: 0.09832655191421509
epoch: 127 training_loss 0.10361585533246398 test_loss: 0.11096335649490356
epoch: 128 training_loss 0.10502296734601259 test_loss: 0.1214978575706482
epoch: 129 training_loss 0.11091164745390415 test_loss: 0.10661345720291138
epoch: 130 training_loss 0.10265478422865272 test_loss: 0.09944757223129272
epoch: 131 training_loss 0.09944113072007894 test_loss: 0.092868971824646
epoch: 132 training_loss 0.10673252742737532 test_loss: 0.11286671161651611
epoch: 133 training_loss 0.1027478433214128 test_loss: 0.11678751707077026
epoch: 134 training_loss 0.1068294920399785 test_loss: 0.12024956941604614
epoch: 135 training_loss 0.09796642240136862 test_loss: 0.11520686149597167
epoch: 136 training_loss 0.09961677987128496 test_loss: 0.09415091276168823
epoch: 137 training_loss 0.1054795760475099 test_loss: 0.1043400526046753
epoch: 138 training_loss 0.10573379389941692 test_loss: 0.1019084095954895
epoch: 139 training_loss 0.10428778579458595 test_loss: 0.09837681651115418
epoch: 140 training_loss 0.10628794163465499 test_loss: 0.12730059623718262
epoch: 141 training_loss 0.10706439012661577 test_loss: 0.10970902442932129
epoch: 142 training_loss 0.10300271661952137 test_loss: 0.1106670618057251
epoch: 143 training_loss 0.11021442640572786 test_loss: 0.11226385831832886
epoch: 144 training_loss 0.10567318070679903 test_loss: 0.10591686964035034
epoch: 145 training_loss 0.10527551541104913 test_loss: 0.11247273683547973
epoch: 146 training_loss 0.10076523093506694 test_loss: 0.11126217842102051
epoch: 147 training_loss 0.10043556319549679 test_loss: 0.10502632856369018
epoch: 148 training_loss 0.10137993760406971 test_loss: 0.10864764451980591
epoch: 149 training_loss 0.10055385580286384 test_loss: 0.10374671220779419
epoch: 0 training_loss 49.63102905273438 test_loss: 24.615399169921876
epoch: 1 training_loss 18.596471729278566 test_loss: 14.6579345703125
epoch: 2 training_loss 13.200260791778565 test_loss: 11.515761566162109
epoch: 3 training_loss 10.56753345489502 test_loss: 9.766110229492188
epoch: 4 training_loss 8.712801117897033 test_loss: 8.056861114501952
epoch: 5 training_loss 7.551800961494446 test_loss: 7.133634948730469
epoch: 6 training_loss 6.661382732391357 test_loss: 6.32707405090332
epoch: 7 training_loss 6.059565606117249 test_loss: 5.869784927368164
epoch: 8 training_loss 5.510230894088745 test_loss: 5.249103164672851
epoch: 9 training_loss 5.208041563034057 test_loss: 5.152350616455078
epoch: 10 training_loss 4.7964421653747555 test_loss: 4.603088760375977
epoch: 11 training_loss 4.478775534629822 test_loss: 4.2742053985595705
epoch: 12 training_loss 4.2890016865730285 test_loss: 3.9787372589111327
epoch: 13 training_loss 4.098481631278991 test_loss: 3.8731971740722657
epoch: 14 training_loss 3.998663408756256 test_loss: 3.6979042053222657
epoch: 15 training_loss 3.7778626036643983 test_loss: 3.7444705963134766
epoch: 16 training_loss 3.6169981479644777 test_loss: 3.400217056274414
epoch: 17 training_loss 3.466827292442322 test_loss: 3.4494049072265627
epoch: 18 training_loss 3.398536894321442 test_loss: 3.4124610900878904
epoch: 19 training_loss 3.1668831801414488 test_loss: 3.1921674728393556
epoch: 20 training_loss 3.1375865960121154 test_loss: 3.043418121337891
epoch: 21 training_loss 3.0378193187713625 test_loss: 3.009466361999512
epoch: 22 training_loss 2.9681368136405943 test_loss: 2.903410530090332
epoch: 23 training_loss 2.911801357269287 test_loss: 2.7870868682861327
epoch: 24 training_loss 2.8904643058776855 test_loss: 2.933658409118652
epoch: 25 training_loss 2.8479883909225463 test_loss: 2.859908676147461
epoch: 26 training_loss 2.7121262836456297 test_loss: 2.5908042907714846
epoch: 27 training_loss 2.6801316380500793 test_loss: 2.6675556182861326
epoch: 28 training_loss 2.6132717609405516 test_loss: 2.5363157272338865
epoch: 29 training_loss 2.62935754776001 test_loss: 2.5581783294677733
epoch: 30 training_loss 2.6371569538116457 test_loss: 2.492960739135742
epoch: 31 training_loss 2.5114468550682068 test_loss: 2.452745246887207
epoch: 32 training_loss 2.530588998794556 test_loss: 2.452539825439453
epoch: 33 training_loss 2.3931753039360046 test_loss: 2.3951078414916993
epoch: 34 training_loss 2.3962164223194122 test_loss: 2.478361129760742
epoch: 35 training_loss 2.364052256345749 test_loss: 2.4389274597167967
epoch: 36 training_loss 2.3535414946079256 test_loss: 2.218968963623047
epoch: 37 training_loss 2.2647901916503907 test_loss: 2.357952117919922
epoch: 38 training_loss 2.29756454706192 test_loss: 2.1787519454956055
epoch: 39 training_loss 2.2642397487163546 test_loss: 2.1925050735473635
epoch: 40 training_loss 2.2195875489711763 test_loss: 2.1817712783813477
epoch: 41 training_loss 2.25250559926033 test_loss: 2.099562072753906
epoch: 42 training_loss 2.229092116355896 test_loss: 2.1507499694824217
epoch: 43 training_loss 2.1762614941596983 test_loss: 2.11331729888916
epoch: 44 training_loss 2.1505767714977266 test_loss: 2.0073076248168946
epoch: 45 training_loss 2.1310533821582793 test_loss: 2.0042129516601563
epoch: 46 training_loss 2.1010646343231203 test_loss: 2.0371021270751952
epoch: 47 training_loss 2.1037453639507295 test_loss: 2.1320940017700196
epoch: 48 training_loss 2.0501016688346865 test_loss: 2.2024349212646483
epoch: 49 training_loss 2.0564552426338194 test_loss: 2.0058786392211916
epoch: 50 training_loss 2.00189772605896 test_loss: 1.956292724609375
epoch: 51 training_loss 2.0437948536872863 test_loss: 2.132351303100586
epoch: 52 training_loss 2.0649891936779023 test_loss: 2.0714509963989256
epoch: 53 training_loss 2.008497886657715 test_loss: 1.9865474700927734
epoch: 54 training_loss 2.00249764084816 test_loss: 1.8989652633666991
epoch: 55 training_loss 1.9859449362754822 test_loss: 2.0474048614501954
epoch: 56 training_loss 2.0113079106807707 test_loss: 1.9510454177856444
epoch: 57 training_loss 1.8957298290729523 test_loss: 2.0026269912719727
epoch: 58 training_loss 1.949670125246048 test_loss: 1.8674755096435547
epoch: 59 training_loss 1.99108544588089 test_loss: 1.8853805541992188
epoch: 60 training_loss 1.9046481800079347 test_loss: 1.8508922576904296
epoch: 61 training_loss 1.8881208002567291 test_loss: 1.8902149200439453
epoch: 62 training_loss 1.8321147561073303 test_loss: 1.8876766204833983
epoch: 63 training_loss 1.9165304136276244 test_loss: 1.8464132308959962
epoch: 64 training_loss 1.9104797053337097 test_loss: 1.8251071929931642
epoch: 65 training_loss 1.8377764165401458 test_loss: 1.7337759017944336
epoch: 66 training_loss 1.8011242377758026 test_loss: 1.8476612091064453
epoch: 67 training_loss 1.810725712776184 test_loss: 1.8535541534423827
epoch: 68 training_loss 1.858549590110779 test_loss: 1.834752082824707
epoch: 69 training_loss 1.8260787403583527 test_loss: 1.7541282653808594
epoch: 70 training_loss 1.786048582792282 test_loss: 1.8208856582641602
epoch: 71 training_loss 1.7851387476921081 test_loss: 1.8626981735229493
epoch: 72 training_loss 1.8095970726013184 test_loss: 1.795941162109375
epoch: 73 training_loss 1.840108277797699 test_loss: 1.8058210372924806
epoch: 74 training_loss 1.7465343809127807 test_loss: 1.7849212646484376
epoch: 75 training_loss 1.825614823102951 test_loss: 1.7151107788085938
epoch: 76 training_loss 1.7890714240074157 test_loss: 1.7763465881347655
epoch: 77 training_loss 1.8093899500370025 test_loss: 1.7371572494506835
epoch: 78 training_loss 1.8005279278755189 test_loss: 1.7705820083618165
epoch: 79 training_loss 1.7320552015304564 test_loss: 1.7353166580200194
epoch: 80 training_loss 1.8043511652946471 test_loss: 1.6944828033447266
epoch: 81 training_loss 1.7317743933200835 test_loss: 1.6937442779541017
epoch: 82 training_loss 1.7508579182624817 test_loss: 1.6545795440673827
epoch: 83 training_loss 1.7441250097751617 test_loss: 1.744166374206543
epoch: 84 training_loss 1.699190149307251 test_loss: 1.6791488647460937
epoch: 85 training_loss 1.6721658420562744 test_loss: 1.7409175872802733
epoch: 86 training_loss 1.6860791182518005 test_loss: 1.7082082748413085
epoch: 87 training_loss 1.6896952545642854 test_loss: 1.6765287399291993
epoch: 88 training_loss 1.6885494220256805 test_loss: 1.7163476943969727
epoch: 89 training_loss 1.6571355199813842 test_loss: 1.6247871398925782
epoch: 90 training_loss 1.6780133402347566 test_loss: 1.589000129699707
epoch: 91 training_loss 1.6660080993175506 test_loss: 1.6642013549804688
epoch: 92 training_loss 1.6751243722438813 test_loss: 1.6721572875976562
epoch: 93 training_loss 1.6526055920124054 test_loss: 1.6155134201049806
epoch: 94 training_loss 1.6443687665462494 test_loss: 1.6062116622924805
epoch: 95 training_loss 1.649261746406555 test_loss: 1.6041410446166993
epoch: 96 training_loss 1.6471636044979094 test_loss: 1.6848886489868165
epoch: 97 training_loss 1.6209987676143647 test_loss: 1.6119916915893555
epoch: 98 training_loss 1.6252464771270752 test_loss: 1.5658699035644532
epoch: 99 training_loss 1.5933812534809113 test_loss: 1.6179935455322265
epoch: 100 training_loss 1.6092579662799835 test_loss: 1.5609195709228516
epoch: 101 training_loss 1.6272761595249177 test_loss: 1.5899929046630858
epoch: 102 training_loss 1.5889715456962585 test_loss: 1.5584606170654296
epoch: 103 training_loss 1.6085213708877564 test_loss: 1.5775334358215332
epoch: 104 training_loss 1.5886445128917694 test_loss: 1.5613941192626952
epoch: 105 training_loss 1.5974475717544556 test_loss: 1.5641968727111817
epoch: 106 training_loss 1.5853481411933898 test_loss: 1.5511301040649415
epoch: 107 training_loss 1.6203025138378144 test_loss: 1.5576754570007325
epoch: 108 training_loss 1.578973914384842 test_loss: 1.5894021034240722
epoch: 109 training_loss 1.5596229600906373 test_loss: 1.5402045249938965
epoch: 110 training_loss 1.55899844288826 test_loss: 1.5269490242004395
epoch: 111 training_loss 1.582707633972168 test_loss: 1.629563331604004
epoch: 112 training_loss 1.540757405757904 test_loss: 1.5717620849609375
epoch: 113 training_loss 1.5424428498744964 test_loss: 1.5211006164550782
epoch: 114 training_loss 1.550836614370346 test_loss: 1.5614227294921874
epoch: 115 training_loss 1.5585442507266998 test_loss: 1.5304786682128906
epoch: 116 training_loss 1.5710399067401886 test_loss: 1.5396167755126953
epoch: 117 training_loss 1.536449875831604 test_loss: 1.5120701789855957
epoch: 118 training_loss 1.5358876180648804 test_loss: 1.5518261909484863
epoch: 119 training_loss 1.537022522687912 test_loss: 1.5591960906982423
epoch: 120 training_loss 1.542264186143875 test_loss: 1.5855865478515625
epoch: 121 training_loss 1.528059220314026 test_loss: 1.5266554832458497
epoch: 122 training_loss 1.530789040327072 test_loss: 1.535158634185791
epoch: 123 training_loss 1.5241677737236023 test_loss: 1.494722843170166
epoch: 124 training_loss 1.5098859584331512 test_loss: 1.5086481094360351
epoch: 125 training_loss 1.5217998433113098 test_loss: 1.4639158248901367
epoch: 126 training_loss 1.5321568310260774 test_loss: 1.5324137687683106
epoch: 127 training_loss 1.5267564678192138 test_loss: 1.5056440353393554
epoch: 128 training_loss 1.5117514884471894 test_loss: 1.5257421493530274
epoch: 129 training_loss 1.5011961472034454 test_loss: 1.507053279876709
epoch: 130 training_loss 1.5035007405281067 test_loss: 1.4836201667785645
epoch: 131 training_loss 1.5036862289905548 test_loss: 1.4748809814453125
epoch: 132 training_loss 1.4766454446315764 test_loss: 1.5218358039855957
epoch: 133 training_loss 1.5043580257892608 test_loss: 1.4826999664306642
epoch: 134 training_loss 1.4965079784393311 test_loss: 1.5100340843200684
epoch: 135 training_loss 1.5062148463726044 test_loss: 1.4637440681457519
epoch: 136 training_loss 1.4902394533157348 test_loss: 1.501026439666748
epoch: 137 training_loss 1.496309095621109 test_loss: 1.4532866477966309
epoch: 138 training_loss 1.4843080806732178 test_loss: 1.5300235748291016
epoch: 139 training_loss 1.4737565410137177 test_loss: 1.4550402641296387
epoch: 140 training_loss 1.4896985709667205 test_loss: 1.4711332321166992
epoch: 141 training_loss 1.4747332561016082 test_loss: 1.5079431533813477
epoch: 142 training_loss 1.4742268478870393 test_loss: 1.479388427734375
epoch: 143 training_loss 1.4891424441337586 test_loss: 1.4419313430786134
epoch: 144 training_loss 1.4753845310211182 test_loss: 1.4493718147277832
epoch: 145 training_loss 1.4677465569972992 test_loss: 1.4350526809692383
epoch: 146 training_loss 1.4972568154335022 test_loss: 1.474632740020752
epoch: 147 training_loss 1.4827361512184143 test_loss: 1.431363296508789
epoch: 148 training_loss 1.486360512971878 test_loss: 1.5385477066040039
epoch: 149 training_loss 1.446246076822281 test_loss: 1.4458606719970704
5116.65205286657
episode: 0 training return: tensor(1.7722, device='cuda:0')
episode: 1 training return: tensor(117.2525, device='cuda:0')
episode: 2 training return: tensor(38.5885, device='cuda:0')
episode: 3 training return: tensor(-36.0158, device='cuda:0')
epoch: 1 test_true_pfm: 5064.927916152849 sim_pfm: 128.34813380185128
episode: 4 training return: tensor(79.4721, device='cuda:0')
episode: 5 training return: tensor(89.8885, device='cuda:0')
episode: 6 training return: tensor(-142.0861, device='cuda:0')
episode: 7 training return: tensor(10.1163, device='cuda:0')
epoch: 2 test_true_pfm: 5117.440762350644 sim_pfm: 63.54617683121857
episode: 8 training return: tensor(-31.9745, device='cuda:0')
episode: 9 training return: tensor(13.7661, device='cuda:0')
episode: 10 training return: tensor(112.8161, device='cuda:0')
episode: 11 training return: tensor(174.5009, device='cuda:0')
epoch: 3 test_true_pfm: 5156.439735774256 sim_pfm: 39.06951309354432
episode: 12 training return: tensor(-123.6215, device='cuda:0')
episode: 13 training return: tensor(190.4873, device='cuda:0')
episode: 14 training return: tensor(144.2988, device='cuda:0')
episode: 15 training return: tensor(-37.3490, device='cuda:0')
epoch: 4 test_true_pfm: 5087.177992249242 sim_pfm: 87.60546260881044
episode: 16 training return: tensor(-7.6338, device='cuda:0')
episode: 17 training return: tensor(-3.6004, device='cuda:0')
episode: 18 training return: tensor(-8.9911, device='cuda:0')
episode: 19 training return: tensor(-25.4565, device='cuda:0')
epoch: 5 test_true_pfm: 5154.829265244156 sim_pfm: 79.36343768283648
episode: 20 training return: tensor(44.8736, device='cuda:0')
episode: 21 training return: tensor(188.5532, device='cuda:0')
episode: 22 training return: tensor(195.7652, device='cuda:0')
episode: 23 training return: tensor(132.9299, device='cuda:0')
epoch: 6 test_true_pfm: 5075.746900489965 sim_pfm: 89.34110902789205
episode: 24 training return: tensor(3.1960, device='cuda:0')
episode: 25 training return: tensor(-45.1442, device='cuda:0')
episode: 26 training return: tensor(111.3710, device='cuda:0')
episode: 27 training return: tensor(101.7433, device='cuda:0')
epoch: 7 test_true_pfm: 5062.425308239387 sim_pfm: 187.04547606405686
episode: 28 training return: tensor(92.9348, device='cuda:0')
episode: 29 training return: tensor(29.1838, device='cuda:0')
episode: 30 training return: tensor(79.0568, device='cuda:0')
episode: 31 training return: tensor(108.1466, device='cuda:0')
epoch: 8 test_true_pfm: 5140.391650062969 sim_pfm: 204.1423270124166
episode: 32 training return: tensor(128.7484, device='cuda:0')
episode: 33 training return: tensor(80.3035, device='cuda:0')
episode: 34 training return: tensor(149.7217, device='cuda:0')
episode: 35 training return: tensor(117.2095, device='cuda:0')
epoch: 9 test_true_pfm: 5139.755758383907 sim_pfm: 155.12959932052763
episode: 36 training return: tensor(168.6331, device='cuda:0')
episode: 37 training return: tensor(75.2642, device='cuda:0')
episode: 38 training return: tensor(80.2646, device='cuda:0')
episode: 39 training return: tensor(42.8892, device='cuda:0')
epoch: 10 test_true_pfm: 5210.2040951681665 sim_pfm: 108.79810509923846
episode: 40 training return: tensor(94.8267, device='cuda:0')
episode: 41 training return: tensor(135.4322, device='cuda:0')
episode: 42 training return: tensor(-0.2144, device='cuda:0')
episode: 43 training return: tensor(-71.5063, device='cuda:0')
epoch: 11 test_true_pfm: 5295.710705250225 sim_pfm: 100.19974301204395
episode: 44 training return: tensor(155.4223, device='cuda:0')
episode: 45 training return: tensor(211.3081, device='cuda:0')
episode: 46 training return: tensor(119.0545, device='cuda:0')
episode: 47 training return: tensor(224.3205, device='cuda:0')
epoch: 12 test_true_pfm: 5267.738624301583 sim_pfm: 121.85898245122128
episode: 48 training return: tensor(156.5560, device='cuda:0')
episode: 49 training return: tensor(192.5520, device='cuda:0')
episode: 50 training return: tensor(82.0982, device='cuda:0')
episode: 51 training return: tensor(156.0379, device='cuda:0')
epoch: 13 test_true_pfm: 3945.012250453657 sim_pfm: 196.6908931871876
episode: 52 training return: tensor(59.8832, device='cuda:0')
episode: 53 training return: tensor(158.4606, device='cuda:0')
episode: 54 training return: tensor(121.2275, device='cuda:0')
episode: 55 training return: tensor(39.6234, device='cuda:0')
epoch: 14 test_true_pfm: 5306.607679850175 sim_pfm: 196.10503123272792
episode: 56 training return: tensor(31.5880, device='cuda:0')
episode: 57 training return: tensor(88.6186, device='cuda:0')
episode: 58 training return: tensor(170.5581, device='cuda:0')
episode: 59 training return: tensor(38.4215, device='cuda:0')
epoch: 15 test_true_pfm: 5415.6149386120105 sim_pfm: 224.09274494022247
episode: 60 training return: tensor(78.3865, device='cuda:0')
episode: 61 training return: tensor(152.1134, device='cuda:0')
episode: 62 training return: tensor(169.7736, device='cuda:0')
episode: 63 training return: tensor(147.6949, device='cuda:0')
epoch: 16 test_true_pfm: 5302.54267970103 sim_pfm: 231.40121650637593
episode: 64 training return: tensor(78.0192, device='cuda:0')
episode: 65 training return: tensor(143.9845, device='cuda:0')
episode: 66 training return: tensor(308.1768, device='cuda:0')
episode: 67 training return: tensor(23.5846, device='cuda:0')
epoch: 17 test_true_pfm: 5336.72310663098 sim_pfm: 258.32468734069454
episode: 68 training return: tensor(37.6605, device='cuda:0')
episode: 69 training return: tensor(225.8542, device='cuda:0')
episode: 70 training return: tensor(227.0463, device='cuda:0')
episode: 71 training return: tensor(111.1144, device='cuda:0')
epoch: 18 test_true_pfm: 5178.76751378274 sim_pfm: 252.2988049373283
episode: 72 training return: tensor(56.6822, device='cuda:0')
episode: 73 training return: tensor(149.4064, device='cuda:0')
episode: 74 training return: tensor(62.1669, device='cuda:0')
episode: 75 training return: tensor(147.5723, device='cuda:0')
epoch: 19 test_true_pfm: 5400.017005304358 sim_pfm: 245.49322002229746
episode: 76 training return: tensor(186.6571, device='cuda:0')
episode: 77 training return: tensor(67.9962, device='cuda:0')
episode: 78 training return: tensor(117.8632, device='cuda:0')
episode: 79 training return: tensor(71.8281, device='cuda:0')
epoch: 20 test_true_pfm: 5214.050134294318 sim_pfm: 196.2014034127545
episode: 80 training return: tensor(67.9030, device='cuda:0')
episode: 81 training return: tensor(153.7541, device='cuda:0')
episode: 82 training return: tensor(212.2583, device='cuda:0')
episode: 83 training return: tensor(232.4583, device='cuda:0')
epoch: 21 test_true_pfm: 5265.221742119337 sim_pfm: 236.1836825199231
episode: 84 training return: tensor(157.0082, device='cuda:0')
episode: 85 training return: tensor(153.9366, device='cuda:0')
episode: 86 training return: tensor(175.4340, device='cuda:0')
episode: 87 training return: tensor(152.5412, device='cuda:0')
epoch: 22 test_true_pfm: 5360.5984150432605 sim_pfm: 266.0181289291165
episode: 88 training return: tensor(271.3218, device='cuda:0')
episode: 89 training return: tensor(138.4565, device='cuda:0')
episode: 90 training return: tensor(56.2197, device='cuda:0')
episode: 91 training return: tensor(157.3201, device='cuda:0')
epoch: 23 test_true_pfm: 5390.2212846325065 sim_pfm: 223.66680754511617
episode: 92 training return: tensor(73.7776, device='cuda:0')
episode: 93 training return: tensor(214.0317, device='cuda:0')
episode: 94 training return: tensor(173.8012, device='cuda:0')
episode: 95 training return: tensor(136.8457, device='cuda:0')
epoch: 24 test_true_pfm: 5410.761094298705 sim_pfm: 261.08931662217947
episode: 96 training return: tensor(255.6871, device='cuda:0')
episode: 97 training return: tensor(341.1923, device='cuda:0')
episode: 98 training return: tensor(247.5308, device='cuda:0')
episode: 99 training return: tensor(198.2280, device='cuda:0')
epoch: 25 test_true_pfm: 4500.931256210798 sim_pfm: 217.8862786014021
episode: 100 training return: tensor(97.8942, device='cuda:0')
episode: 101 training return: tensor(199.0961, device='cuda:0')
episode: 102 training return: tensor(20.8809, device='cuda:0')
episode: 103 training return: tensor(172.9944, device='cuda:0')
epoch: 26 test_true_pfm: 5439.351569465297 sim_pfm: 287.7212022734263
episode: 104 training return: tensor(279.0941, device='cuda:0')
episode: 105 training return: tensor(281.6695, device='cuda:0')
episode: 106 training return: tensor(129.7383, device='cuda:0')
episode: 107 training return: tensor(160.2198, device='cuda:0')
epoch: 27 test_true_pfm: 5383.338130966342 sim_pfm: 232.25081558468324
episode: 108 training return: tensor(113.8408, device='cuda:0')
episode: 109 training return: tensor(254.3178, device='cuda:0')
episode: 110 training return: tensor(198.9537, device='cuda:0')
episode: 111 training return: tensor(114.7527, device='cuda:0')
epoch: 28 test_true_pfm: 5485.98256299203 sim_pfm: 275.2678076347026
episode: 112 training return: tensor(253.0097, device='cuda:0')
episode: 113 training return: tensor(138.8386, device='cuda:0')
episode: 114 training return: tensor(260.5030, device='cuda:0')
episode: 115 training return: tensor(128.5027, device='cuda:0')
epoch: 29 test_true_pfm: 3951.1262500577777 sim_pfm: 379.46181157190586
episode: 116 training return: tensor(67.8484, device='cuda:0')
episode: 117 training return: tensor(-241.1092, device='cuda:0')
episode: 118 training return: tensor(151.1400, device='cuda:0')
episode: 119 training return: tensor(164.9653, device='cuda:0')
epoch: 30 test_true_pfm: 5357.540728224484 sim_pfm: 288.8215985308634
episode: 120 training return: tensor(235.7899, device='cuda:0')
episode: 121 training return: tensor(259.0253, device='cuda:0')
episode: 122 training return: tensor(207.2628, device='cuda:0')
episode: 123 training return: tensor(212.4002, device='cuda:0')
epoch: 31 test_true_pfm: 5296.510097799902 sim_pfm: 277.6174699239976
episode: 124 training return: tensor(153.7539, device='cuda:0')
episode: 125 training return: tensor(236.6171, device='cuda:0')
episode: 126 training return: tensor(263.9751, device='cuda:0')
episode: 127 training return: tensor(136.1527, device='cuda:0')
epoch: 32 test_true_pfm: 5385.383685533168 sim_pfm: 374.0626550729309
episode: 128 training return: tensor(166.1497, device='cuda:0')
episode: 129 training return: tensor(214.7467, device='cuda:0')
episode: 130 training return: tensor(249.0535, device='cuda:0')
episode: 131 training return: tensor(112.1271, device='cuda:0')
epoch: 33 test_true_pfm: 5311.067578241705 sim_pfm: 202.07703070056354
episode: 132 training return: tensor(258.9898, device='cuda:0')
episode: 133 training return: tensor(350.9336, device='cuda:0')
episode: 134 training return: tensor(264.0453, device='cuda:0')
episode: 135 training return: tensor(251.8361, device='cuda:0')
epoch: 34 test_true_pfm: 5285.729619077017 sim_pfm: 259.4567808951445
episode: 136 training return: tensor(223.4170, device='cuda:0')
episode: 137 training return: tensor(218.6653, device='cuda:0')
episode: 138 training return: tensor(160.3664, device='cuda:0')
episode: 139 training return: tensor(222.8583, device='cuda:0')
epoch: 35 test_true_pfm: 5440.777046835607 sim_pfm: 354.3521524600219
episode: 140 training return: tensor(220.6211, device='cuda:0')
episode: 141 training return: tensor(84.6509, device='cuda:0')
episode: 142 training return: tensor(179.2982, device='cuda:0')
episode: 143 training return: tensor(108.0060, device='cuda:0')
epoch: 36 test_true_pfm: 5514.650429753455 sim_pfm: 318.364663640629
episode: 144 training return: tensor(216.1588, device='cuda:0')
episode: 145 training return: tensor(188.9121, device='cuda:0')
episode: 146 training return: tensor(291.5437, device='cuda:0')
episode: 147 training return: tensor(215.4529, device='cuda:0')
epoch: 37 test_true_pfm: 5347.736605648096 sim_pfm: 349.36870799978107
episode: 148 training return: tensor(263.0837, device='cuda:0')
episode: 149 training return: tensor(20.9198, device='cuda:0')
episode: 150 training return: tensor(189.1255, device='cuda:0')
episode: 151 training return: tensor(334.0206, device='cuda:0')
epoch: 38 test_true_pfm: 5430.130033532489 sim_pfm: 281.5630645766311
episode: 152 training return: tensor(288.0007, device='cuda:0')
episode: 153 training return: tensor(235.6587, device='cuda:0')
episode: 154 training return: tensor(257.9086, device='cuda:0')
episode: 155 training return: tensor(205.2098, device='cuda:0')
epoch: 39 test_true_pfm: 5423.586704060421 sim_pfm: 323.48453278709593
episode: 156 training return: tensor(189.7280, device='cuda:0')
episode: 157 training return: tensor(140.2303, device='cuda:0')
episode: 158 training return: tensor(237.3525, device='cuda:0')
episode: 159 training return: tensor(255.2169, device='cuda:0')
epoch: 40 test_true_pfm: 5463.550382126504 sim_pfm: 317.1511680972083
episode: 160 training return: tensor(218.6573, device='cuda:0')
episode: 161 training return: tensor(222.1013, device='cuda:0')
episode: 162 training return: tensor(363.5954, device='cuda:0')
episode: 163 training return: tensor(257.7068, device='cuda:0')
epoch: 41 test_true_pfm: 5480.758868445231 sim_pfm: 243.71980577012678
episode: 164 training return: tensor(187.7726, device='cuda:0')
episode: 165 training return: tensor(294.2198, device='cuda:0')
episode: 166 training return: tensor(334.3281, device='cuda:0')
episode: 167 training return: tensor(162.2670, device='cuda:0')
epoch: 42 test_true_pfm: 5526.401268430697 sim_pfm: 305.69001657131594
episode: 168 training return: tensor(219.5816, device='cuda:0')
episode: 169 training return: tensor(116.7068, device='cuda:0')
episode: 170 training return: tensor(255.0658, device='cuda:0')
episode: 171 training return: tensor(322.3335, device='cuda:0')
epoch: 43 test_true_pfm: 5478.149673219472 sim_pfm: 334.4830758400106
episode: 172 training return: tensor(52.2646, device='cuda:0')
episode: 173 training return: tensor(224.4052, device='cuda:0')
episode: 174 training return: tensor(277.4408, device='cuda:0')
episode: 175 training return: tensor(147.3803, device='cuda:0')
epoch: 44 test_true_pfm: 5441.518752514181 sim_pfm: 317.26299065314623
episode: 176 training return: tensor(133.0237, device='cuda:0')
episode: 177 training return: tensor(339.3569, device='cuda:0')
episode: 178 training return: tensor(288.9098, device='cuda:0')
episode: 179 training return: tensor(95.4294, device='cuda:0')
epoch: 45 test_true_pfm: 5384.772244288143 sim_pfm: 365.3314411466902
episode: 180 training return: tensor(275.2822, device='cuda:0')
episode: 181 training return: tensor(242.7559, device='cuda:0')
episode: 182 training return: tensor(188.0567, device='cuda:0')
episode: 183 training return: tensor(138.8969, device='cuda:0')
epoch: 46 test_true_pfm: 5514.162778471057 sim_pfm: 249.9829803010216
episode: 184 training return: tensor(280.6173, device='cuda:0')
episode: 185 training return: tensor(236.1682, device='cuda:0')
episode: 186 training return: tensor(254.5709, device='cuda:0')
episode: 187 training return: tensor(357.0335, device='cuda:0')
epoch: 47 test_true_pfm: 5558.248534167582 sim_pfm: 363.69480846725247
episode: 188 training return: tensor(285.2139, device='cuda:0')
episode: 189 training return: tensor(167.1439, device='cuda:0')
episode: 190 training return: tensor(398.6441, device='cuda:0')
episode: 191 training return: tensor(297.5484, device='cuda:0')
epoch: 48 test_true_pfm: 5505.685022542218 sim_pfm: 362.4677988481708
episode: 192 training return: tensor(78.4041, device='cuda:0')
episode: 193 training return: tensor(152.3283, device='cuda:0')
episode: 194 training return: tensor(159.1632, device='cuda:0')
episode: 195 training return: tensor(331.2463, device='cuda:0')
epoch: 49 test_true_pfm: 5479.873519555091 sim_pfm: 363.4321714352118
episode: 196 training return: tensor(275.4678, device='cuda:0')
episode: 197 training return: tensor(265.0684, device='cuda:0')
episode: 198 training return: tensor(189.6706, device='cuda:0')
episode: 199 training return: tensor(394.1435, device='cuda:0')
epoch: 50 test_true_pfm: 5479.901125492761 sim_pfm: 395.5700567883905
episode: 200 training return: tensor(334.0358, device='cuda:0')
episode: 201 training return: tensor(387.5413, device='cuda:0')
episode: 202 training return: tensor(237.6330, device='cuda:0')
episode: 203 training return: tensor(348.7895, device='cuda:0')
epoch: 51 test_true_pfm: 5577.0976380976 sim_pfm: 379.3124040648884
episode: 204 training return: tensor(343.2834, device='cuda:0')
episode: 205 training return: tensor(331.5830, device='cuda:0')
episode: 206 training return: tensor(292.2218, device='cuda:0')
episode: 207 training return: tensor(318.6212, device='cuda:0')
epoch: 52 test_true_pfm: 5573.726394998175 sim_pfm: 303.9254923192008
episode: 208 training return: tensor(212.3576, device='cuda:0')
episode: 209 training return: tensor(269.1397, device='cuda:0')
episode: 210 training return: tensor(129.7438, device='cuda:0')
episode: 211 training return: tensor(213.4282, device='cuda:0')
epoch: 53 test_true_pfm: 5435.017450300172 sim_pfm: 349.23817729431903
episode: 212 training return: tensor(167.1249, device='cuda:0')
episode: 213 training return: tensor(158.7320, device='cuda:0')
episode: 214 training return: tensor(222.2014, device='cuda:0')
episode: 215 training return: tensor(198.2014, device='cuda:0')
epoch: 54 test_true_pfm: 5516.464125822142 sim_pfm: 364.18478009685833
episode: 216 training return: tensor(284.0607, device='cuda:0')
episode: 217 training return: tensor(272.5872, device='cuda:0')
episode: 218 training return: tensor(219.8936, device='cuda:0')
episode: 219 training return: tensor(311.8832, device='cuda:0')
epoch: 55 test_true_pfm: 5522.091596258278 sim_pfm: 390.3692979109862
episode: 220 training return: tensor(264.2135, device='cuda:0')
episode: 221 training return: tensor(227.7299, device='cuda:0')
episode: 222 training return: tensor(248.1611, device='cuda:0')
episode: 223 training return: tensor(259.4817, device='cuda:0')
epoch: 56 test_true_pfm: 5538.7872495248475 sim_pfm: 457.0707909684473
episode: 224 training return: tensor(336.0906, device='cuda:0')
episode: 225 training return: tensor(154.7406, device='cuda:0')
episode: 226 training return: tensor(122.4203, device='cuda:0')
episode: 227 training return: tensor(293.6773, device='cuda:0')
epoch: 57 test_true_pfm: 5488.575776277445 sim_pfm: 339.1462856683065
episode: 228 training return: tensor(232.7804, device='cuda:0')
episode: 229 training return: tensor(343.4102, device='cuda:0')
episode: 230 training return: tensor(296.2995, device='cuda:0')
episode: 231 training return: tensor(244.3237, device='cuda:0')
epoch: 58 test_true_pfm: 5506.365426021049 sim_pfm: 457.2023574943887
episode: 232 training return: tensor(353.7056, device='cuda:0')
episode: 233 training return: tensor(174.0220, device='cuda:0')
episode: 234 training return: tensor(163.7692, device='cuda:0')
episode: 235 training return: tensor(177.0893, device='cuda:0')
epoch: 59 test_true_pfm: 5586.519748012662 sim_pfm: 340.8740579424484
episode: 236 training return: tensor(306.8707, device='cuda:0')
episode: 237 training return: tensor(372.3670, device='cuda:0')
episode: 238 training return: tensor(294.6400, device='cuda:0')
episode: 239 training return: tensor(145.2375, device='cuda:0')
epoch: 60 test_true_pfm: 5555.4765092774105 sim_pfm: 412.3044730348823
episode: 240 training return: tensor(278.7129, device='cuda:0')
episode: 241 training return: tensor(318.9035, device='cuda:0')
episode: 242 training return: tensor(146.7371, device='cuda:0')
episode: 243 training return: tensor(216.1743, device='cuda:0')
epoch: 61 test_true_pfm: 5537.401619686072 sim_pfm: 347.8349285965475
episode: 244 training return: tensor(172.3325, device='cuda:0')
episode: 245 training return: tensor(269.9568, device='cuda:0')
episode: 246 training return: tensor(184.9046, device='cuda:0')
episode: 247 training return: tensor(232.1869, device='cuda:0')
epoch: 62 test_true_pfm: 5565.700956063273 sim_pfm: 391.3391412496664
episode: 248 training return: tensor(270.1514, device='cuda:0')
episode: 249 training return: tensor(422.0845, device='cuda:0')
episode: 250 training return: tensor(472.8868, device='cuda:0')
episode: 251 training return: tensor(262.2989, device='cuda:0')
epoch: 63 test_true_pfm: 5608.863561494995 sim_pfm: 410.57017696043476
episode: 252 training return: tensor(201.4978, device='cuda:0')
episode: 253 training return: tensor(279.3586, device='cuda:0')
episode: 254 training return: tensor(363.5175, device='cuda:0')
episode: 255 training return: tensor(206.9504, device='cuda:0')
epoch: 64 test_true_pfm: 5621.802318991118 sim_pfm: 392.12736559995875
episode: 256 training return: tensor(302.1561, device='cuda:0')
episode: 257 training return: tensor(237.4433, device='cuda:0')
episode: 258 training return: tensor(320.4896, device='cuda:0')
episode: 259 training return: tensor(260.1550, device='cuda:0')
epoch: 65 test_true_pfm: 5505.110424580834 sim_pfm: 395.96952066271723
episode: 260 training return: tensor(211.7613, device='cuda:0')
episode: 261 training return: tensor(218.9132, device='cuda:0')
episode: 262 training return: tensor(301.6470, device='cuda:0')
episode: 263 training return: tensor(323.1279, device='cuda:0')
epoch: 66 test_true_pfm: 5583.660853389351 sim_pfm: 452.5968898391972
episode: 264 training return: tensor(222.1293, device='cuda:0')
episode: 265 training return: tensor(317.4863, device='cuda:0')
episode: 266 training return: tensor(232.9615, device='cuda:0')
episode: 267 training return: tensor(333.9509, device='cuda:0')
epoch: 67 test_true_pfm: 5548.071813913654 sim_pfm: 392.87212644534884
episode: 268 training return: tensor(160.3271, device='cuda:0')
episode: 269 training return: tensor(253.6941, device='cuda:0')
episode: 270 training return: tensor(126.2156, device='cuda:0')
episode: 271 training return: tensor(282.0374, device='cuda:0')
epoch: 68 test_true_pfm: 5549.871284775775 sim_pfm: 408.8882440994009
episode: 272 training return: tensor(198.5299, device='cuda:0')
episode: 273 training return: tensor(229.6713, device='cuda:0')
episode: 274 training return: tensor(258.0510, device='cuda:0')
episode: 275 training return: tensor(262.5262, device='cuda:0')
epoch: 69 test_true_pfm: 5583.5624018397175 sim_pfm: 435.8851634582388
episode: 276 training return: tensor(234.9627, device='cuda:0')
episode: 277 training return: tensor(319.9386, device='cuda:0')
episode: 278 training return: tensor(331.9926, device='cuda:0')
episode: 279 training return: tensor(248.4475, device='cuda:0')
epoch: 70 test_true_pfm: 5523.431432867419 sim_pfm: 408.7736918248702
episode: 280 training return: tensor(291.1461, device='cuda:0')
episode: 281 training return: tensor(209.1947, device='cuda:0')
episode: 282 training return: tensor(200.9966, device='cuda:0')
episode: 283 training return: tensor(182.5598, device='cuda:0')
epoch: 71 test_true_pfm: 5617.702807343051 sim_pfm: 390.80438011062023
episode: 284 training return: tensor(280.2939, device='cuda:0')
episode: 285 training return: tensor(310.1493, device='cuda:0')
episode: 286 training return: tensor(356.8636, device='cuda:0')
episode: 287 training return: tensor(320.8473, device='cuda:0')
epoch: 72 test_true_pfm: 4832.158202094032 sim_pfm: 411.69114569945185
episode: 288 training return: tensor(245.2743, device='cuda:0')
episode: 289 training return: tensor(222.1683, device='cuda:0')
episode: 290 training return: tensor(255.2352, device='cuda:0')
episode: 291 training return: tensor(326.5645, device='cuda:0')
epoch: 73 test_true_pfm: 5650.898232561448 sim_pfm: 358.1465423067954
episode: 292 training return: tensor(257.6349, device='cuda:0')
episode: 293 training return: tensor(324.4581, device='cuda:0')
episode: 294 training return: tensor(259.6570, device='cuda:0')
episode: 295 training return: tensor(204.7929, device='cuda:0')
epoch: 74 test_true_pfm: 5540.539767097994 sim_pfm: 384.9258391226176
episode: 296 training return: tensor(285.0304, device='cuda:0')
episode: 297 training return: tensor(406.6025, device='cuda:0')
episode: 298 training return: tensor(261.6030, device='cuda:0')
episode: 299 training return: tensor(334.2928, device='cuda:0')
epoch: 75 test_true_pfm: 5600.707952554654 sim_pfm: 400.8219181315605
episode: 300 training return: tensor(370.0156, device='cuda:0')
episode: 301 training return: tensor(349.3362, device='cuda:0')
episode: 302 training return: tensor(340.4214, device='cuda:0')
episode: 303 training return: tensor(332.2682, device='cuda:0')
epoch: 76 test_true_pfm: 5664.644077347118 sim_pfm: 436.44865347342176
episode: 304 training return: tensor(282.4162, device='cuda:0')
episode: 305 training return: tensor(419.0575, device='cuda:0')
episode: 306 training return: tensor(295.1784, device='cuda:0')
episode: 307 training return: tensor(328.9072, device='cuda:0')
epoch: 77 test_true_pfm: 5657.254919444694 sim_pfm: 415.97634352311917
episode: 308 training return: tensor(311.1126, device='cuda:0')
episode: 309 training return: tensor(230.4687, device='cuda:0')
episode: 310 training return: tensor(261.9727, device='cuda:0')
episode: 311 training return: tensor(287.4396, device='cuda:0')
epoch: 78 test_true_pfm: 5633.802951431581 sim_pfm: 399.8871777441721
episode: 312 training return: tensor(298.1702, device='cuda:0')
episode: 313 training return: tensor(404.6467, device='cuda:0')
episode: 314 training return: tensor(290.4550, device='cuda:0')
episode: 315 training return: tensor(408.8916, device='cuda:0')
epoch: 79 test_true_pfm: 5582.253402160551 sim_pfm: 392.6159885462839
episode: 316 training return: tensor(281.3257, device='cuda:0')
episode: 317 training return: tensor(350.9280, device='cuda:0')
episode: 318 training return: tensor(319.0426, device='cuda:0')
episode: 319 training return: tensor(341.1679, device='cuda:0')
epoch: 80 test_true_pfm: 5509.544327462209 sim_pfm: 446.5546915395341
episode: 320 training return: tensor(228.4300, device='cuda:0')
episode: 321 training return: tensor(291.1286, device='cuda:0')
episode: 322 training return: tensor(374.7310, device='cuda:0')
episode: 323 training return: tensor(385.5142, device='cuda:0')
epoch: 81 test_true_pfm: 5589.256185572565 sim_pfm: 448.51281368511263
episode: 324 training return: tensor(229.6069, device='cuda:0')
episode: 325 training return: tensor(274.1724, device='cuda:0')
episode: 326 training return: tensor(413.6146, device='cuda:0')
episode: 327 training return: tensor(370.2321, device='cuda:0')
epoch: 82 test_true_pfm: 5590.958269744279 sim_pfm: 413.0142878987438
episode: 328 training return: tensor(268.8972, device='cuda:0')
episode: 329 training return: tensor(204.1805, device='cuda:0')
episode: 330 training return: tensor(250.9198, device='cuda:0')
episode: 331 training return: tensor(390.8677, device='cuda:0')
epoch: 83 test_true_pfm: 5326.372775242918 sim_pfm: 419.11066523595946
episode: 332 training return: tensor(358.9892, device='cuda:0')
episode: 333 training return: tensor(374.9182, device='cuda:0')
episode: 334 training return: tensor(262.2513, device='cuda:0')
episode: 335 training return: tensor(375.0578, device='cuda:0')
epoch: 84 test_true_pfm: 5634.658051443313 sim_pfm: 422.2263109254418
episode: 336 training return: tensor(330.8145, device='cuda:0')
episode: 337 training return: tensor(299.4259, device='cuda:0')
episode: 338 training return: tensor(401.7683, device='cuda:0')
episode: 339 training return: tensor(322.6286, device='cuda:0')
epoch: 85 test_true_pfm: 5585.713730900073 sim_pfm: 415.7749492750736
episode: 340 training return: tensor(191.0544, device='cuda:0')
episode: 341 training return: tensor(339.9774, device='cuda:0')
episode: 342 training return: tensor(216.4374, device='cuda:0')
episode: 343 training return: tensor(330.8597, device='cuda:0')
epoch: 86 test_true_pfm: 5587.9239944521105 sim_pfm: 440.3035577485959
episode: 344 training return: tensor(225.8881, device='cuda:0')
episode: 345 training return: tensor(320.0634, device='cuda:0')
episode: 346 training return: tensor(229.2609, device='cuda:0')
episode: 347 training return: tensor(344.8844, device='cuda:0')
epoch: 87 test_true_pfm: 5532.942183217002 sim_pfm: 437.3415828087309
episode: 348 training return: tensor(354.8184, device='cuda:0')
episode: 349 training return: tensor(370.4673, device='cuda:0')
episode: 350 training return: tensor(239.6829, device='cuda:0')
episode: 351 training return: tensor(375.1896, device='cuda:0')
epoch: 88 test_true_pfm: 5596.778118749655 sim_pfm: 442.1164321341882
episode: 352 training return: tensor(328.4141, device='cuda:0')
episode: 353 training return: tensor(288.2767, device='cuda:0')
episode: 354 training return: tensor(308.5504, device='cuda:0')
episode: 355 training return: tensor(230.2186, device='cuda:0')
epoch: 89 test_true_pfm: 5631.264034559729 sim_pfm: 401.2904881610787
episode: 356 training return: tensor(335.6600, device='cuda:0')
episode: 357 training return: tensor(229.3566, device='cuda:0')
episode: 358 training return: tensor(377.0355, device='cuda:0')
episode: 359 training return: tensor(287.0300, device='cuda:0')
epoch: 90 test_true_pfm: 5632.707469441005 sim_pfm: 449.82527845859295
episode: 360 training return: tensor(383.2766, device='cuda:0')
episode: 361 training return: tensor(290.7859, device='cuda:0')
episode: 362 training return: tensor(258.4373, device='cuda:0')
episode: 363 training return: tensor(423.7274, device='cuda:0')
epoch: 91 test_true_pfm: 5657.271459567888 sim_pfm: 398.0966436721889
episode: 364 training return: tensor(305.3290, device='cuda:0')
episode: 365 training return: tensor(239.6685, device='cuda:0')
episode: 366 training return: tensor(393.4456, device='cuda:0')
episode: 367 training return: tensor(246.4070, device='cuda:0')
epoch: 92 test_true_pfm: 5609.762583269817 sim_pfm: 421.1991283611472
episode: 368 training return: tensor(356.8259, device='cuda:0')
episode: 369 training return: tensor(335.6880, device='cuda:0')
episode: 370 training return: tensor(311.0576, device='cuda:0')
episode: 371 training return: tensor(357.3536, device='cuda:0')
epoch: 93 test_true_pfm: 5623.561970644019 sim_pfm: 402.82010493477964
episode: 372 training return: tensor(327.7621, device='cuda:0')
episode: 373 training return: tensor(348.9576, device='cuda:0')
episode: 374 training return: tensor(316.3532, device='cuda:0')
episode: 375 training return: tensor(319.0579, device='cuda:0')
epoch: 94 test_true_pfm: 5592.277361040164 sim_pfm: 389.7507755952344
episode: 376 training return: tensor(420.7030, device='cuda:0')
episode: 377 training return: tensor(330.3757, device='cuda:0')
episode: 378 training return: tensor(251.4272, device='cuda:0')
episode: 379 training return: tensor(243.3339, device='cuda:0')
epoch: 95 test_true_pfm: 5560.45796851127 sim_pfm: 381.1446175922077
episode: 380 training return: tensor(333.6828, device='cuda:0')
episode: 381 training return: tensor(345.6052, device='cuda:0')
episode: 382 training return: tensor(261.5944, device='cuda:0')
episode: 383 training return: tensor(319.4289, device='cuda:0')
epoch: 96 test_true_pfm: 5597.463863982812 sim_pfm: 438.0570514445232
episode: 384 training return: tensor(299.6587, device='cuda:0')
episode: 385 training return: tensor(354.3562, device='cuda:0')
episode: 386 training return: tensor(372.7218, device='cuda:0')
episode: 387 training return: tensor(389.5326, device='cuda:0')
epoch: 97 test_true_pfm: 5599.78651898283 sim_pfm: 433.96738018943387
episode: 388 training return: tensor(420.2110, device='cuda:0')
episode: 389 training return: tensor(315.5398, device='cuda:0')
episode: 390 training return: tensor(386.8648, device='cuda:0')
episode: 391 training return: tensor(266.0869, device='cuda:0')
epoch: 98 test_true_pfm: 5576.111522793966 sim_pfm: 422.98410063083674
episode: 392 training return: tensor(364.2516, device='cuda:0')
episode: 393 training return: tensor(323.8754, device='cuda:0')
episode: 394 training return: tensor(306.5374, device='cuda:0')
episode: 395 training return: tensor(426.7213, device='cuda:0')
epoch: 99 test_true_pfm: 5478.565167250832 sim_pfm: 505.23332915086456
episode: 396 training return: tensor(284.3771, device='cuda:0')
episode: 397 training return: tensor(289.5765, device='cuda:0')
episode: 398 training return: tensor(323.1214, device='cuda:0')
episode: 399 training return: tensor(361.7535, device='cuda:0')
epoch: 100 test_true_pfm: 5543.038423797195 sim_pfm: 413.5463538101564
episode: 400 training return: tensor(411.7386, device='cuda:0')
episode: 401 training return: tensor(392.5954, device='cuda:0')
episode: 402 training return: tensor(328.2024, device='cuda:0')
episode: 403 training return: tensor(433.8401, device='cuda:0')
epoch: 101 test_true_pfm: 5562.236442010816 sim_pfm: 479.65642112020095
episode: 404 training return: tensor(403.1010, device='cuda:0')
episode: 405 training return: tensor(421.3222, device='cuda:0')
episode: 406 training return: tensor(341.2521, device='cuda:0')
episode: 407 training return: tensor(375.3882, device='cuda:0')
epoch: 102 test_true_pfm: 5669.200247731639 sim_pfm: 407.32451172116754
episode: 408 training return: tensor(396.3913, device='cuda:0')
episode: 409 training return: tensor(305.8518, device='cuda:0')
episode: 410 training return: tensor(283.2405, device='cuda:0')
episode: 411 training return: tensor(361.8316, device='cuda:0')
epoch: 103 test_true_pfm: 5680.852062026258 sim_pfm: 480.4444188847362
episode: 412 training return: tensor(299.1733, device='cuda:0')
episode: 413 training return: tensor(118.5221, device='cuda:0')
episode: 414 training return: tensor(284.2370, device='cuda:0')
episode: 415 training return: tensor(296.7285, device='cuda:0')
epoch: 104 test_true_pfm: 5416.835276375682 sim_pfm: 441.39501055749133
episode: 416 training return: tensor(358.5468, device='cuda:0')
episode: 417 training return: tensor(308.0957, device='cuda:0')
episode: 418 training return: tensor(412.2500, device='cuda:0')
episode: 419 training return: tensor(342.1265, device='cuda:0')
epoch: 105 test_true_pfm: 5632.559587365925 sim_pfm: 438.49951431155205
episode: 420 training return: tensor(241.8055, device='cuda:0')
episode: 421 training return: tensor(320.5668, device='cuda:0')
episode: 422 training return: tensor(270.5558, device='cuda:0')
episode: 423 training return: tensor(356.1700, device='cuda:0')
epoch: 106 test_true_pfm: 5582.875870074914 sim_pfm: 400.5184398693964
episode: 424 training return: tensor(345.0365, device='cuda:0')
episode: 425 training return: tensor(279.3362, device='cuda:0')
episode: 426 training return: tensor(354.0396, device='cuda:0')
episode: 427 training return: tensor(286.9201, device='cuda:0')
epoch: 107 test_true_pfm: 5579.09699845054 sim_pfm: 448.04888394711696
episode: 428 training return: tensor(288.2761, device='cuda:0')
episode: 429 training return: tensor(278.2199, device='cuda:0')
episode: 430 training return: tensor(359.5410, device='cuda:0')
episode: 431 training return: tensor(301.4264, device='cuda:0')
epoch: 108 test_true_pfm: 5679.868640380788 sim_pfm: 438.2048247329173
episode: 432 training return: tensor(290.4485, device='cuda:0')
episode: 433 training return: tensor(455.4791, device='cuda:0')
episode: 434 training return: tensor(313.0536, device='cuda:0')
episode: 435 training return: tensor(249.6746, device='cuda:0')
epoch: 109 test_true_pfm: 5584.798118135244 sim_pfm: 360.95052977337036
episode: 436 training return: tensor(349.3673, device='cuda:0')
episode: 437 training return: tensor(348.2691, device='cuda:0')
episode: 438 training return: tensor(347.9406, device='cuda:0')
episode: 439 training return: tensor(309.2157, device='cuda:0')
epoch: 110 test_true_pfm: 5658.548050571556 sim_pfm: 450.8628703800884
episode: 440 training return: tensor(324.9044, device='cuda:0')
episode: 441 training return: tensor(368.6453, device='cuda:0')
episode: 442 training return: tensor(340.3587, device='cuda:0')
episode: 443 training return: tensor(258.8601, device='cuda:0')
epoch: 111 test_true_pfm: 5568.64854423612 sim_pfm: 382.9242244800941
episode: 444 training return: tensor(433.1211, device='cuda:0')
episode: 445 training return: tensor(419.8386, device='cuda:0')
episode: 446 training return: tensor(315.5974, device='cuda:0')
episode: 447 training return: tensor(249.7209, device='cuda:0')
epoch: 112 test_true_pfm: 5639.02353587393 sim_pfm: 463.65321858863655
episode: 448 training return: tensor(352.6932, device='cuda:0')
episode: 449 training return: tensor(318.6331, device='cuda:0')
episode: 450 training return: tensor(341.9550, device='cuda:0')
episode: 451 training return: tensor(295.4540, device='cuda:0')
epoch: 113 test_true_pfm: 5685.3493291784225 sim_pfm: 452.0335187347179
episode: 452 training return: tensor(396.6812, device='cuda:0')
episode: 453 training return: tensor(284.1884, device='cuda:0')
episode: 454 training return: tensor(304.5841, device='cuda:0')
episode: 455 training return: tensor(283.3281, device='cuda:0')
epoch: 114 test_true_pfm: 5639.762321990547 sim_pfm: 491.6708132204464
episode: 456 training return: tensor(318.6359, device='cuda:0')
episode: 457 training return: tensor(322.5266, device='cuda:0')
episode: 458 training return: tensor(364.9832, device='cuda:0')
episode: 459 training return: tensor(402.6312, device='cuda:0')
epoch: 115 test_true_pfm: 5662.782708247544 sim_pfm: 404.4450340104813
episode: 460 training return: tensor(348.6346, device='cuda:0')
episode: 461 training return: tensor(235.6528, device='cuda:0')
episode: 462 training return: tensor(399.9969, device='cuda:0')
episode: 463 training return: tensor(380.9955, device='cuda:0')
epoch: 116 test_true_pfm: 5591.911506644971 sim_pfm: 434.8433828366299
episode: 464 training return: tensor(286.1561, device='cuda:0')
episode: 465 training return: tensor(429.7346, device='cuda:0')
episode: 466 training return: tensor(431.6421, device='cuda:0')
episode: 467 training return: tensor(431.4830, device='cuda:0')
epoch: 117 test_true_pfm: 5661.725828437709 sim_pfm: 481.00808520930394
episode: 468 training return: tensor(175.7589, device='cuda:0')
episode: 469 training return: tensor(415.4994, device='cuda:0')
episode: 470 training return: tensor(421.8372, device='cuda:0')
episode: 471 training return: tensor(344.7211, device='cuda:0')
epoch: 118 test_true_pfm: 5648.573947117701 sim_pfm: 461.78327910010313
episode: 472 training return: tensor(247.3046, device='cuda:0')
episode: 473 training return: tensor(261.2072, device='cuda:0')
episode: 474 training return: tensor(291.6184, device='cuda:0')
episode: 475 training return: tensor(319.1550, device='cuda:0')
epoch: 119 test_true_pfm: 5630.581998462997 sim_pfm: 446.60138595591224
episode: 476 training return: tensor(406.5252, device='cuda:0')
episode: 477 training return: tensor(434.5686, device='cuda:0')
episode: 478 training return: tensor(377.4748, device='cuda:0')
episode: 479 training return: tensor(444.7243, device='cuda:0')
epoch: 120 test_true_pfm: 5674.4137939933435 sim_pfm: 448.3231088553633
episode: 480 training return: tensor(353.8311, device='cuda:0')
episode: 481 training return: tensor(361.5636, device='cuda:0')
episode: 482 training return: tensor(340.7098, device='cuda:0')
episode: 483 training return: tensor(358.0586, device='cuda:0')
epoch: 121 test_true_pfm: 5598.645805333198 sim_pfm: 404.4369305984389
episode: 484 training return: tensor(280.8775, device='cuda:0')
episode: 485 training return: tensor(340.0906, device='cuda:0')
episode: 486 training return: tensor(321.9591, device='cuda:0')
episode: 487 training return: tensor(305.6212, device='cuda:0')
epoch: 122 test_true_pfm: 5613.587151692357 sim_pfm: 442.13490701885894
episode: 488 training return: tensor(359.0135, device='cuda:0')
episode: 489 training return: tensor(361.3159, device='cuda:0')
episode: 490 training return: tensor(387.0665, device='cuda:0')
episode: 491 training return: tensor(382.4260, device='cuda:0')
epoch: 123 test_true_pfm: 5578.102470213921 sim_pfm: 425.3195462593382
episode: 492 training return: tensor(265.4841, device='cuda:0')
episode: 493 training return: tensor(376.4772, device='cuda:0')
episode: 494 training return: tensor(330.8551, device='cuda:0')
episode: 495 training return: tensor(208.5177, device='cuda:0')
epoch: 124 test_true_pfm: 5706.50162969144 sim_pfm: 460.63372913464747
episode: 496 training return: tensor(329.5240, device='cuda:0')
episode: 497 training return: tensor(291.7458, device='cuda:0')
episode: 498 training return: tensor(311.5957, device='cuda:0')
episode: 499 training return: tensor(310.2618, device='cuda:0')
epoch: 125 test_true_pfm: 5619.445785200495 sim_pfm: 434.2521238988168
episode: 500 training return: tensor(397.2581, device='cuda:0')
episode: 501 training return: tensor(370.6908, device='cuda:0')
episode: 502 training return: tensor(342.6854, device='cuda:0')
episode: 503 training return: tensor(300.7560, device='cuda:0')
epoch: 126 test_true_pfm: 5548.094517685181 sim_pfm: 447.2086198593509
episode: 504 training return: tensor(318.9715, device='cuda:0')
episode: 505 training return: tensor(287.6721, device='cuda:0')
episode: 506 training return: tensor(324.4155, device='cuda:0')
episode: 507 training return: tensor(333.3817, device='cuda:0')
epoch: 127 test_true_pfm: 5666.982166410461 sim_pfm: 467.8841388390865
episode: 508 training return: tensor(319.3322, device='cuda:0')
episode: 509 training return: tensor(337.3669, device='cuda:0')
episode: 510 training return: tensor(414.3315, device='cuda:0')
episode: 511 training return: tensor(370.6339, device='cuda:0')
epoch: 128 test_true_pfm: 5622.601523349737 sim_pfm: 455.25625496412005
episode: 512 training return: tensor(274.2262, device='cuda:0')
episode: 513 training return: tensor(201.4083, device='cuda:0')
episode: 514 training return: tensor(300.3854, device='cuda:0')
episode: 515 training return: tensor(333.9218, device='cuda:0')
epoch: 129 test_true_pfm: 5604.806812107466 sim_pfm: 455.97753685200587
episode: 516 training return: tensor(288.0116, device='cuda:0')
episode: 517 training return: tensor(280.8692, device='cuda:0')
episode: 518 training return: tensor(502.9416, device='cuda:0')
episode: 519 training return: tensor(392.5171, device='cuda:0')
epoch: 130 test_true_pfm: 5674.289000169006 sim_pfm: 482.20756839122623
episode: 520 training return: tensor(339.4891, device='cuda:0')
episode: 521 training return: tensor(291.0558, device='cuda:0')
episode: 522 training return: tensor(311.1778, device='cuda:0')
episode: 523 training return: tensor(142.3517, device='cuda:0')
epoch: 131 test_true_pfm: 5670.92124944067 sim_pfm: 445.6712477583981
episode: 524 training return: tensor(329.2692, device='cuda:0')
episode: 525 training return: tensor(398.6775, device='cuda:0')
episode: 526 training return: tensor(322.7802, device='cuda:0')
episode: 527 training return: tensor(262.0420, device='cuda:0')
epoch: 132 test_true_pfm: 5590.879442704223 sim_pfm: 482.4525865817753
episode: 528 training return: tensor(273.6634, device='cuda:0')
episode: 529 training return: tensor(312.3534, device='cuda:0')
episode: 530 training return: tensor(356.8463, device='cuda:0')
episode: 531 training return: tensor(290.3588, device='cuda:0')
epoch: 133 test_true_pfm: 5649.451482651697 sim_pfm: 491.4526350277786
episode: 532 training return: tensor(368.5461, device='cuda:0')
episode: 533 training return: tensor(260.7067, device='cuda:0')
episode: 534 training return: tensor(412.9552, device='cuda:0')
episode: 535 training return: tensor(356.8359, device='cuda:0')
epoch: 134 test_true_pfm: 5697.507852745254 sim_pfm: 452.6252768174745
episode: 536 training return: tensor(391.4414, device='cuda:0')
episode: 537 training return: tensor(393.4023, device='cuda:0')
episode: 538 training return: tensor(393.3169, device='cuda:0')
episode: 539 training return: tensor(380.1070, device='cuda:0')
epoch: 135 test_true_pfm: 5659.300620537609 sim_pfm: 393.899876448287
episode: 540 training return: tensor(305.6093, device='cuda:0')
episode: 541 training return: tensor(443.3028, device='cuda:0')
episode: 542 training return: tensor(398.8224, device='cuda:0')
episode: 543 training return: tensor(301.4470, device='cuda:0')
epoch: 136 test_true_pfm: 5651.753601049096 sim_pfm: 416.15940109807224
episode: 544 training return: tensor(383.6582, device='cuda:0')
episode: 545 training return: tensor(331.5972, device='cuda:0')
episode: 546 training return: tensor(355.5517, device='cuda:0')
episode: 547 training return: tensor(305.3709, device='cuda:0')
epoch: 137 test_true_pfm: 5707.44729556009 sim_pfm: 438.3934729917285
episode: 548 training return: tensor(376.9996, device='cuda:0')
episode: 549 training return: tensor(398.0205, device='cuda:0')
episode: 550 training return: tensor(321.5851, device='cuda:0')
episode: 551 training return: tensor(452.9538, device='cuda:0')
epoch: 138 test_true_pfm: 5674.907310852104 sim_pfm: 502.50063877700205
episode: 552 training return: tensor(340.4261, device='cuda:0')
episode: 553 training return: tensor(301.4118, device='cuda:0')
episode: 554 training return: tensor(467.5876, device='cuda:0')
episode: 555 training return: tensor(327.3991, device='cuda:0')
epoch: 139 test_true_pfm: 5639.318464617224 sim_pfm: 488.7642953346464
episode: 556 training return: tensor(306.7279, device='cuda:0')
episode: 557 training return: tensor(299.5625, device='cuda:0')
episode: 558 training return: tensor(434.5893, device='cuda:0')
episode: 559 training return: tensor(439.2759, device='cuda:0')
epoch: 140 test_true_pfm: 5660.667574827119 sim_pfm: 490.7162405089475
episode: 560 training return: tensor(363.9835, device='cuda:0')
episode: 561 training return: tensor(409.4661, device='cuda:0')
episode: 562 training return: tensor(319.6320, device='cuda:0')
episode: 563 training return: tensor(307.3646, device='cuda:0')
epoch: 141 test_true_pfm: 5712.820761615007 sim_pfm: 477.3844681729873
episode: 564 training return: tensor(391.0406, device='cuda:0')
episode: 565 training return: tensor(306.8118, device='cuda:0')
episode: 566 training return: tensor(471.7115, device='cuda:0')
episode: 567 training return: tensor(302.5959, device='cuda:0')
epoch: 142 test_true_pfm: 5615.643632194432 sim_pfm: 441.9497229559541
episode: 568 training return: tensor(370.6832, device='cuda:0')
episode: 569 training return: tensor(351.4606, device='cuda:0')
episode: 570 training return: tensor(380.4887, device='cuda:0')
episode: 571 training return: tensor(313.8938, device='cuda:0')
epoch: 143 test_true_pfm: 5748.813569759682 sim_pfm: 509.27358359115897
episode: 572 training return: tensor(387.8818, device='cuda:0')
episode: 573 training return: tensor(180.5177, device='cuda:0')
episode: 574 training return: tensor(355.2327, device='cuda:0')
episode: 575 training return: tensor(515.2586, device='cuda:0')
epoch: 144 test_true_pfm: 5643.84568906442 sim_pfm: 452.8833420580874
episode: 576 training return: tensor(347.3473, device='cuda:0')
episode: 577 training return: tensor(439.2938, device='cuda:0')
episode: 578 training return: tensor(330.3706, device='cuda:0')
episode: 579 training return: tensor(345.4371, device='cuda:0')
epoch: 145 test_true_pfm: 5656.354654892522 sim_pfm: 450.70349847872666
episode: 580 training return: tensor(306.6817, device='cuda:0')
episode: 581 training return: tensor(409.4427, device='cuda:0')
episode: 582 training return: tensor(381.4461, device='cuda:0')
episode: 583 training return: tensor(363.9334, device='cuda:0')
epoch: 146 test_true_pfm: 5709.539648828654 sim_pfm: 429.09265383662813
episode: 584 training return: tensor(290.0946, device='cuda:0')
episode: 585 training return: tensor(433.5584, device='cuda:0')
episode: 586 training return: tensor(358.3036, device='cuda:0')
episode: 587 training return: tensor(384.3113, device='cuda:0')
epoch: 147 test_true_pfm: 5450.710020583055 sim_pfm: 484.0388997310074
episode: 588 training return: tensor(350.3718, device='cuda:0')
episode: 589 training return: tensor(312.0220, device='cuda:0')
episode: 590 training return: tensor(363.0187, device='cuda:0')
episode: 591 training return: tensor(443.6261, device='cuda:0')
epoch: 148 test_true_pfm: 5655.392132745624 sim_pfm: 439.74957971911255
episode: 592 training return: tensor(335.7689, device='cuda:0')
episode: 593 training return: tensor(356.2618, device='cuda:0')
episode: 594 training return: tensor(284.0874, device='cuda:0')
episode: 595 training return: tensor(309.0813, device='cuda:0')
epoch: 149 test_true_pfm: 5741.603486245284 sim_pfm: 467.11629033994785
episode: 596 training return: tensor(472.1611, device='cuda:0')
episode: 597 training return: tensor(190.2803, device='cuda:0')
episode: 598 training return: tensor(373.7144, device='cuda:0')
episode: 599 training return: tensor(323.3853, device='cuda:0')
epoch: 150 test_true_pfm: 5773.376344161489 sim_pfm: 519.7967454801159
