['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'behavior', '--traj', 'mixed', '--seed', '2', '--data', '100000']
epoch: 0 training_loss 0.22034125685691833 test_loss: 0.16353558301925658
epoch: 1 training_loss 0.14576560124754906 test_loss: 0.13543654680252076
epoch: 2 training_loss 0.13503346029669047 test_loss: 0.11596047878265381
epoch: 3 training_loss 0.11968415267765523 test_loss: 0.10603218078613282
epoch: 4 training_loss 0.11100316133350134 test_loss: 0.09920142889022827
epoch: 5 training_loss 0.11780933927744627 test_loss: 0.12230738401412963
epoch: 6 training_loss 0.10937856692820787 test_loss: 0.11557097434997558
epoch: 7 training_loss 0.11057565715163946 test_loss: 0.10861016511917114
epoch: 8 training_loss 0.10369205459952355 test_loss: 0.10066218376159668
epoch: 9 training_loss 0.10329288979992271 test_loss: 0.10371129512786866
epoch: 10 training_loss 0.1104536870494485 test_loss: 0.10478124618530274
epoch: 11 training_loss 0.10094132266938687 test_loss: 0.10752830505371094
epoch: 12 training_loss 0.10421200390905141 test_loss: 0.10122765302658081
epoch: 13 training_loss 0.09949244748800994 test_loss: 0.096245938539505
epoch: 14 training_loss 0.09780271973460913 test_loss: 0.11102720499038696
epoch: 15 training_loss 0.09707464225590229 test_loss: 0.08833734393119812
epoch: 16 training_loss 0.09959809385240077 test_loss: 0.10162186622619629
epoch: 17 training_loss 0.09715480696409941 test_loss: 0.09568383693695068
epoch: 18 training_loss 0.09771311951801182 test_loss: 0.10655163526535034
epoch: 19 training_loss 0.09072166884317995 test_loss: 0.09686003923416138
epoch: 20 training_loss 0.09142255442216993 test_loss: 0.09085694551467896
epoch: 21 training_loss 0.0830560515075922 test_loss: 0.08189681768417359
epoch: 22 training_loss 0.08947552181780338 test_loss: 0.08331899642944336
epoch: 23 training_loss 0.0852240140736103 test_loss: 0.09151450395584107
epoch: 24 training_loss 0.08034871375188231 test_loss: 0.07516546845436096
epoch: 25 training_loss 0.08399098480120301 test_loss: 0.08905717134475707
epoch: 26 training_loss 0.08423787733539939 test_loss: 0.08427679538726807
epoch: 27 training_loss 0.07747491843998432 test_loss: 0.0932895839214325
epoch: 28 training_loss 0.07944073265418411 test_loss: 0.08379115462303162
epoch: 29 training_loss 0.0781281310133636 test_loss: 0.06880751848220826
epoch: 30 training_loss 0.07826996991410852 test_loss: 0.07373926043510437
epoch: 31 training_loss 0.06916741617023944 test_loss: 0.07192054390907288
epoch: 32 training_loss 0.07705179627053439 test_loss: 0.07892990112304688
epoch: 33 training_loss 0.07427752180024981 test_loss: 0.06925053596496582
epoch: 34 training_loss 0.07289290096610784 test_loss: 0.07327407002449035
epoch: 35 training_loss 0.0768629079312086 test_loss: 0.07939403653144836
epoch: 36 training_loss 0.07218242157250643 test_loss: 0.07332454919815064
epoch: 37 training_loss 0.07345679864287376 test_loss: 0.07858948707580567
epoch: 38 training_loss 0.07712589550763368 test_loss: 0.06807830929756165
epoch: 39 training_loss 0.07383227907121181 test_loss: 0.07807388305664062
epoch: 40 training_loss 0.07004892265424133 test_loss: 0.07403686046600341
epoch: 41 training_loss 0.07253387045115232 test_loss: 0.06771852374076844
epoch: 42 training_loss 0.07257403690367938 test_loss: 0.07886814475059509
epoch: 43 training_loss 0.06795853938907385 test_loss: 0.06684419512748718
epoch: 44 training_loss 0.08045386420562864 test_loss: 0.07452055811882019
epoch: 45 training_loss 0.07215889342129231 test_loss: 0.07148351073265076
epoch: 46 training_loss 0.07408488746732474 test_loss: 0.07023961544036865
epoch: 47 training_loss 0.07147243628278374 test_loss: 0.07048869729042054
epoch: 48 training_loss 0.07245538825169205 test_loss: 0.06072706580162048
epoch: 49 training_loss 0.07400884138420225 test_loss: 0.05839900374412536
epoch: 50 training_loss 0.07101424299180507 test_loss: 0.08324453830718995
epoch: 51 training_loss 0.0652244584262371 test_loss: 0.06324503421783448
epoch: 52 training_loss 0.06661306450143456 test_loss: 0.07312626242637635
epoch: 53 training_loss 0.07366205137223006 test_loss: 0.06411855220794678
epoch: 54 training_loss 0.07092956190928817 test_loss: 0.06795338392257691
epoch: 55 training_loss 0.0721773286163807 test_loss: 0.07160884141921997
epoch: 56 training_loss 0.07151467775925995 test_loss: 0.0670260727405548
epoch: 57 training_loss 0.06652239313349127 test_loss: 0.06606614589691162
epoch: 58 training_loss 0.07340266450308264 test_loss: 0.06422796845436096
epoch: 59 training_loss 0.06528412472456693 test_loss: 0.07430275678634643
epoch: 60 training_loss 0.06383024327456951 test_loss: 0.06882201433181763
epoch: 61 training_loss 0.06741399211809039 test_loss: 0.06878076791763306
epoch: 62 training_loss 0.071839640121907 test_loss: 0.07315115928649903
epoch: 63 training_loss 0.06873970806598663 test_loss: 0.06707803606986999
epoch: 64 training_loss 0.0698675636202097 test_loss: 0.07422963976860046
epoch: 65 training_loss 0.07615097146481276 test_loss: 0.070317542552948
epoch: 66 training_loss 0.06650754153728484 test_loss: 0.07470774054527282
epoch: 67 training_loss 0.06902797551825643 test_loss: 0.06619204878807068
epoch: 68 training_loss 0.06546334138140082 test_loss: 0.07225437760353089
epoch: 69 training_loss 0.06888213410973548 test_loss: 0.06055181622505188
epoch: 70 training_loss 0.06914681864902378 test_loss: 0.06304066777229309
epoch: 71 training_loss 0.06951080532744527 test_loss: 0.08012398481369018
epoch: 72 training_loss 0.0704308932647109 test_loss: 0.06637967824935913
epoch: 73 training_loss 0.06774524863809347 test_loss: 0.07636299133300781
epoch: 74 training_loss 0.061305327285081146 test_loss: 0.07791441679000854
epoch: 75 training_loss 0.06735944211483001 test_loss: 0.07244687080383301
epoch: 76 training_loss 0.067351238951087 test_loss: 0.07049253582954407
epoch: 77 training_loss 0.06627702854573726 test_loss: 0.07080522179603577
epoch: 78 training_loss 0.07184263745322823 test_loss: 0.06748977899551392
epoch: 79 training_loss 0.07282629767432809 test_loss: 0.07203992009162903
epoch: 80 training_loss 0.06843204497359694 test_loss: 0.07063734531402588
epoch: 81 training_loss 0.07066608129069209 test_loss: 0.06024770140647888
epoch: 82 training_loss 0.0682827440276742 test_loss: 0.058203589916229245
epoch: 83 training_loss 0.06975829536095261 test_loss: 0.06675156950950623
epoch: 84 training_loss 0.06842129878699779 test_loss: 0.08036439418792725
epoch: 85 training_loss 0.06790480798110365 test_loss: 0.06444779634475709
epoch: 86 training_loss 0.06656776141375303 test_loss: 0.0625774621963501
epoch: 87 training_loss 0.06453813630156219 test_loss: 0.05232568383216858
epoch: 88 training_loss 0.06939962908625602 test_loss: 0.060558277368545535
epoch: 89 training_loss 0.0686921425163746 test_loss: 0.06081627607345581
epoch: 90 training_loss 0.0649276511836797 test_loss: 0.0660541832447052
epoch: 91 training_loss 0.06781273921951651 test_loss: 0.06389147639274598
epoch: 92 training_loss 0.06916583688929677 test_loss: 0.067813640832901
epoch: 93 training_loss 0.07144815200939775 test_loss: 0.06790576577186584
epoch: 94 training_loss 0.06538521945476532 test_loss: 0.0628601610660553
epoch: 95 training_loss 0.07332939861342311 test_loss: 0.07370569109916687
epoch: 96 training_loss 0.06761949395760894 test_loss: 0.0657221496105194
epoch: 97 training_loss 0.06736685665324331 test_loss: 0.06334018111228942
epoch: 98 training_loss 0.06860360011458397 test_loss: 0.06911527514457702
epoch: 99 training_loss 0.06348585389554501 test_loss: 0.06182225346565247
epoch: 100 training_loss 0.06913240866735577 test_loss: 0.05532083511352539
epoch: 101 training_loss 0.07004025993868708 test_loss: 0.06208535432815552
epoch: 102 training_loss 0.061897536870092154 test_loss: 0.06797925233840943
epoch: 103 training_loss 0.070044727306813 test_loss: 0.0675097405910492
epoch: 104 training_loss 0.061666791355237366 test_loss: 0.06579756140708923
epoch: 105 training_loss 0.0685875311680138 test_loss: 0.0674048662185669
epoch: 106 training_loss 0.07360408637672662 test_loss: 0.06797789335250855
epoch: 107 training_loss 0.0628101428039372 test_loss: 0.05731712579727173
epoch: 108 training_loss 0.06482900956645608 test_loss: 0.07373659014701843
epoch: 109 training_loss 0.07337036462500691 test_loss: 0.0646479070186615
epoch: 110 training_loss 0.06519822672940791 test_loss: 0.060390764474868776
epoch: 111 training_loss 0.06487827820703387 test_loss: 0.06659396290779114
epoch: 112 training_loss 0.06586168083362282 test_loss: 0.06734393239021301
epoch: 113 training_loss 0.06444802600890398 test_loss: 0.06115486025810242
epoch: 114 training_loss 0.06620629472658038 test_loss: 0.07299690842628478
epoch: 115 training_loss 0.06142254956066608 test_loss: 0.08503458499908448
epoch: 116 training_loss 0.07033062781207264 test_loss: 0.07126324772834777
epoch: 117 training_loss 0.05961914299055934 test_loss: 0.06890515685081482
epoch: 118 training_loss 0.06646622354164719 test_loss: 0.05550849437713623
epoch: 119 training_loss 0.06599303861148656 test_loss: 0.05996666550636291
epoch: 120 training_loss 0.06769545134156943 test_loss: 0.069824481010437
epoch: 121 training_loss 0.06722943841479719 test_loss: 0.07012124061584472
epoch: 122 training_loss 0.06830809435807168 test_loss: 0.07584097981452942
epoch: 123 training_loss 0.06073589250445366 test_loss: 0.06666455864906311
epoch: 124 training_loss 0.06968613077886403 test_loss: 0.06637367606163025
epoch: 125 training_loss 0.06181129816919565 test_loss: 0.06931652426719666
epoch: 126 training_loss 0.06555098241195083 test_loss: 0.07738874554634094
epoch: 127 training_loss 0.06242834206670523 test_loss: 0.057112759351730345
epoch: 128 training_loss 0.06790129998698831 test_loss: 0.0610734760761261
epoch: 129 training_loss 0.060469968002289536 test_loss: 0.07095374464988709
epoch: 130 training_loss 0.06686226699501276 test_loss: 0.06450419425964356
epoch: 131 training_loss 0.061820801012218 test_loss: 0.06726303696632385
epoch: 132 training_loss 0.07104508690536022 test_loss: 0.0702294647693634
epoch: 133 training_loss 0.061283339466899636 test_loss: 0.05366730093955994
epoch: 134 training_loss 0.0641480103507638 test_loss: 0.06580273509025573
epoch: 135 training_loss 0.06952168058604002 test_loss: 0.07372297644615174
epoch: 136 training_loss 0.062403751080855725 test_loss: 0.07970890402793884
epoch: 137 training_loss 0.07082879992201924 test_loss: 0.07689104080200196
epoch: 138 training_loss 0.06793182857334613 test_loss: 0.05402228832244873
epoch: 139 training_loss 0.06731768107041716 test_loss: 0.05805227756500244
epoch: 140 training_loss 0.06258582879789173 test_loss: 0.06648508310317994
epoch: 141 training_loss 0.07070720070973038 test_loss: 0.06553896069526673
epoch: 142 training_loss 0.06690453438088298 test_loss: 0.060759943723678586
epoch: 143 training_loss 0.06393077567219735 test_loss: 0.06323844194412231
epoch: 144 training_loss 0.06689183702692389 test_loss: 0.06892014741897583
epoch: 145 training_loss 0.06434087170287967 test_loss: 0.06705531477928162
epoch: 146 training_loss 0.067348026111722 test_loss: 0.07253828048706054
epoch: 147 training_loss 0.06222541408613324 test_loss: 0.08771606683731079
epoch: 148 training_loss 0.062435578070580956 test_loss: 0.07369529604911804
epoch: 149 training_loss 0.07088860570453108 test_loss: 0.06293864846229554
epoch: 0 training_loss 56.38053375244141 test_loss: 34.233837890625
epoch: 1 training_loss 27.663629684448242 test_loss: 22.80579528808594
epoch: 2 training_loss 20.666176280975343 test_loss: 18.994609069824218
epoch: 3 training_loss 17.401554832458498 test_loss: 15.894142150878906
epoch: 4 training_loss 15.164822702407838 test_loss: 14.332481384277344
epoch: 5 training_loss 13.707146434783935 test_loss: 12.82474365234375
epoch: 6 training_loss 12.574403429031372 test_loss: 11.749765777587891
epoch: 7 training_loss 11.467712240219116 test_loss: 11.118438720703125
epoch: 8 training_loss 10.759580516815186 test_loss: 10.526803588867187
epoch: 9 training_loss 10.052000331878663 test_loss: 9.816252899169921
epoch: 10 training_loss 9.45132891178131 test_loss: 9.075479888916016
epoch: 11 training_loss 9.078975715637206 test_loss: 8.783280944824218
epoch: 12 training_loss 8.671059961318969 test_loss: 8.5155517578125
epoch: 13 training_loss 8.252507977485656 test_loss: 8.045132446289063
epoch: 14 training_loss 7.91444993019104 test_loss: 7.52936019897461
epoch: 15 training_loss 7.573328776359558 test_loss: 7.424653625488281
epoch: 16 training_loss 7.364479112625122 test_loss: 7.127483367919922
epoch: 17 training_loss 7.000235123634338 test_loss: 6.924861907958984
epoch: 18 training_loss 7.015475316047668 test_loss: 6.569658660888672
epoch: 19 training_loss 6.725156254768372 test_loss: 6.939170837402344
epoch: 20 training_loss 6.583061800003052 test_loss: 6.699633026123047
epoch: 21 training_loss 6.414853129386902 test_loss: 6.501651000976563
epoch: 22 training_loss 6.300500068664551 test_loss: 6.216886138916015
epoch: 23 training_loss 6.046053771972656 test_loss: 6.04033088684082
epoch: 24 training_loss 5.944120230674744 test_loss: 5.766338348388672
epoch: 25 training_loss 5.879377269744873 test_loss: 5.684434127807617
epoch: 26 training_loss 5.69899836063385 test_loss: 5.322952651977539
epoch: 27 training_loss 5.6450214672088626 test_loss: 5.885216522216797
epoch: 28 training_loss 5.607327299118042 test_loss: 5.369872665405273
epoch: 29 training_loss 5.482966156005859 test_loss: 5.451149749755859
epoch: 30 training_loss 5.41509521484375 test_loss: 5.6614330291748045
epoch: 31 training_loss 5.250569734573364 test_loss: 5.156260681152344
epoch: 32 training_loss 5.29804801940918 test_loss: 5.062273406982422
epoch: 33 training_loss 5.158457078933716 test_loss: 5.009162139892578
epoch: 34 training_loss 5.111685967445373 test_loss: 4.891055679321289
epoch: 35 training_loss 5.043066983222961 test_loss: 4.969706726074219
epoch: 36 training_loss 5.027785878181458 test_loss: 4.952177429199219
epoch: 37 training_loss 4.948117864131928 test_loss: 5.084077453613281
epoch: 38 training_loss 4.833149480819702 test_loss: 4.872596740722656
epoch: 39 training_loss 4.951732647418976 test_loss: 4.833840560913086
epoch: 40 training_loss 4.823667845726013 test_loss: 4.740359878540039
epoch: 41 training_loss 4.744932577610016 test_loss: 4.872062301635742
epoch: 42 training_loss 4.818351888656617 test_loss: 4.729257583618164
epoch: 43 training_loss 4.637781991958618 test_loss: 4.511356353759766
epoch: 44 training_loss 4.720650203227997 test_loss: 4.543890380859375
epoch: 45 training_loss 4.557460784912109 test_loss: 4.394830703735352
epoch: 46 training_loss 4.452763137817382 test_loss: 4.535899353027344
epoch: 47 training_loss 4.478592312335968 test_loss: 4.363293838500977
epoch: 48 training_loss 4.410728826522827 test_loss: 4.283376693725586
epoch: 49 training_loss 4.441129674911499 test_loss: 4.46331558227539
epoch: 50 training_loss 4.416884803771973 test_loss: 4.402060699462891
epoch: 51 training_loss 4.382943592071533 test_loss: 4.301045227050781
epoch: 52 training_loss 4.327709653377533 test_loss: 4.405910873413086
epoch: 53 training_loss 4.326389665603638 test_loss: 4.271771621704102
epoch: 54 training_loss 4.288652296066284 test_loss: 4.2823139190673825
epoch: 55 training_loss 4.221552004814148 test_loss: 4.301898574829101
epoch: 56 training_loss 4.067514922618866 test_loss: 4.014066696166992
epoch: 57 training_loss 4.164415910243988 test_loss: 4.137827682495117
epoch: 58 training_loss 4.113635032176972 test_loss: 4.1108043670654295
epoch: 59 training_loss 4.057308094501495 test_loss: 3.992330551147461
epoch: 60 training_loss 4.039190635681153 test_loss: 4.063001251220703
epoch: 61 training_loss 4.01902057647705 test_loss: 4.028262329101563
epoch: 62 training_loss 4.028008263111115 test_loss: 4.127831649780274
epoch: 63 training_loss 4.052295641899109 test_loss: 4.0386089324951175
epoch: 64 training_loss 4.031537446975708 test_loss: 3.915836715698242
epoch: 65 training_loss 3.9358160376548765 test_loss: 3.7504512786865236
epoch: 66 training_loss 3.9405438828468324 test_loss: 4.0354045867919925
epoch: 67 training_loss 3.967496223449707 test_loss: 3.9042015075683594
epoch: 68 training_loss 3.9383300948143005 test_loss: 3.784947967529297
epoch: 69 training_loss 3.964676284790039 test_loss: 3.8081932067871094
epoch: 70 training_loss 3.883782410621643 test_loss: 3.956736373901367
epoch: 71 training_loss 3.785942828655243 test_loss: 3.715828704833984
epoch: 72 training_loss 3.800900659561157 test_loss: 3.689794158935547
epoch: 73 training_loss 3.8599067974090575 test_loss: 3.761092758178711
epoch: 74 training_loss 3.728076195716858 test_loss: 3.734576416015625
epoch: 75 training_loss 3.790710995197296 test_loss: 3.835037612915039
epoch: 76 training_loss 3.7740552186965943 test_loss: 3.644165802001953
epoch: 77 training_loss 3.7061436939239503 test_loss: 3.6087512969970703
epoch: 78 training_loss 3.7189628863334656 test_loss: 3.769510269165039
epoch: 79 training_loss 3.6818070006370545 test_loss: 3.65845947265625
epoch: 80 training_loss 3.6473822832107543 test_loss: 3.7734970092773437
epoch: 81 training_loss 3.690946879386902 test_loss: 3.6885398864746093
epoch: 82 training_loss 3.6456129503250123 test_loss: 3.522467041015625
epoch: 83 training_loss 3.5874563431739808 test_loss: 3.600291061401367
epoch: 84 training_loss 3.670514979362488 test_loss: 3.5880157470703127
epoch: 85 training_loss 3.60197669506073 test_loss: 3.750484848022461
epoch: 86 training_loss 3.5446187233924866 test_loss: 3.5770919799804686
epoch: 87 training_loss 3.6094088101387025 test_loss: 3.444605255126953
epoch: 88 training_loss 3.6003907108306885 test_loss: 3.67052001953125
epoch: 89 training_loss 3.5573797726631167 test_loss: 3.373818588256836
epoch: 90 training_loss 3.5245534133911134 test_loss: 3.499378967285156
epoch: 91 training_loss 3.4637949514389037 test_loss: 3.4632949829101562
epoch: 92 training_loss 3.5231510972976685 test_loss: 3.5199420928955076
epoch: 93 training_loss 3.4945605254173278 test_loss: 3.2433544158935548
epoch: 94 training_loss 3.4706186413764955 test_loss: 3.6239070892333984
epoch: 95 training_loss 3.476006655693054 test_loss: 3.4458263397216795
epoch: 96 training_loss 3.5035169291496278 test_loss: 3.4876495361328126
epoch: 97 training_loss 3.4956271839141846 test_loss: 3.5182239532470705
epoch: 98 training_loss 3.3957420945167542 test_loss: 3.4729637145996093
epoch: 99 training_loss 3.4183835864067076 test_loss: 3.49548454284668
epoch: 100 training_loss 3.4314694929122926 test_loss: 3.307371139526367
epoch: 101 training_loss 3.433339159488678 test_loss: 3.3647186279296877
epoch: 102 training_loss 3.4099763107299803 test_loss: 3.4378807067871096
epoch: 103 training_loss 3.4193259358406065 test_loss: 3.3474769592285156
epoch: 104 training_loss 3.3286788296699523 test_loss: 3.3101436614990236
epoch: 105 training_loss 3.419483098983765 test_loss: 3.413427734375
epoch: 106 training_loss 3.3195210814476015 test_loss: 3.3710586547851564
epoch: 107 training_loss 3.298585977554321 test_loss: 3.200862503051758
epoch: 108 training_loss 3.2862155556678774 test_loss: 3.268636703491211
epoch: 109 training_loss 3.341524519920349 test_loss: 3.339626693725586
epoch: 110 training_loss 3.286221523284912 test_loss: 3.2989013671875
epoch: 111 training_loss 3.292559723854065 test_loss: 3.3186416625976562
epoch: 112 training_loss 3.3377412080764772 test_loss: 3.3472660064697264
epoch: 113 training_loss 3.279006280899048 test_loss: 3.1895383834838866
epoch: 114 training_loss 3.2883284878730774 test_loss: 3.164251136779785
epoch: 115 training_loss 3.2263507771492006 test_loss: 3.2534366607666017
epoch: 116 training_loss 3.1843406748771668 test_loss: 3.3274391174316404
epoch: 117 training_loss 3.240994074344635 test_loss: 3.1817609786987306
epoch: 118 training_loss 3.2563463282585143 test_loss: 3.360493850708008
epoch: 119 training_loss 3.2651060914993284 test_loss: 3.2645034790039062
epoch: 120 training_loss 3.211088228225708 test_loss: 3.2574691772460938
epoch: 121 training_loss 3.1414886140823364 test_loss: 3.2388660430908205
epoch: 122 training_loss 3.2382918190956116 test_loss: 3.1852161407470705
epoch: 123 training_loss 3.1405159544944765 test_loss: 3.1185962677001955
epoch: 124 training_loss 3.1347491431236265 test_loss: 3.001060676574707
epoch: 125 training_loss 3.191488656997681 test_loss: 3.241769027709961
epoch: 126 training_loss 3.0909317564964294 test_loss: 3.184044075012207
epoch: 127 training_loss 3.1969649744033815 test_loss: 3.223380279541016
epoch: 128 training_loss 3.09656907081604 test_loss: 3.2221324920654295
epoch: 129 training_loss 3.150689799785614 test_loss: 3.204450225830078
epoch: 130 training_loss 3.10959166765213 test_loss: 3.207097625732422
epoch: 131 training_loss 3.143592269420624 test_loss: 3.2111347198486326
epoch: 132 training_loss 3.1478906416893007 test_loss: 3.1915027618408205
epoch: 133 training_loss 3.145610337257385 test_loss: 2.992172050476074
epoch: 134 training_loss 3.108680772781372 test_loss: 3.139872360229492
epoch: 135 training_loss 3.0801728892326357 test_loss: 3.1368696212768556
epoch: 136 training_loss 3.0920055866241456 test_loss: 3.1376155853271483
epoch: 137 training_loss 3.0624339389801025 test_loss: 2.9935163497924804
epoch: 138 training_loss 3.0520549535751345 test_loss: 3.0951906204223634
epoch: 139 training_loss 3.039620590209961 test_loss: 3.036277961730957
epoch: 140 training_loss 3.1242514848709106 test_loss: 2.9408601760864257
epoch: 141 training_loss 2.996651086807251 test_loss: 3.0120965957641603
epoch: 142 training_loss 3.0880307126045228 test_loss: 3.0234676361083985
epoch: 143 training_loss 3.0245380544662477 test_loss: 3.016786575317383
epoch: 144 training_loss 3.0105402302742004 test_loss: 3.1089174270629885
epoch: 145 training_loss 2.990592906475067 test_loss: 3.0707780838012697
epoch: 146 training_loss 3.005357575416565 test_loss: 3.0463685989379883
epoch: 147 training_loss 2.9345839381217957 test_loss: 3.050658416748047
epoch: 148 training_loss 3.0334737396240232 test_loss: 2.8502254486083984
epoch: 149 training_loss 3.028741571903229 test_loss: 2.946356773376465
5104.733356724982
episode: 0 training return: tensor(-465.3329, device='cuda:0')
episode: 1 training return: tensor(-512.2949, device='cuda:0')
episode: 2 training return: tensor(-316.3682, device='cuda:0')
episode: 3 training return: tensor(-613.2471, device='cuda:0')
epoch: 1 test_true_pfm: 5512.515713432055 sim_pfm: -465.29167881120037
episode: 4 training return: tensor(-468.2606, device='cuda:0')
episode: 5 training return: tensor(-513.7327, device='cuda:0')
episode: 6 training return: tensor(-468.6607, device='cuda:0')
episode: 7 training return: tensor(-522.2807, device='cuda:0')
epoch: 2 test_true_pfm: 5047.533029952575 sim_pfm: -523.3806015863278
episode: 8 training return: tensor(-469.9788, device='cuda:0')
episode: 9 training return: tensor(-550.0649, device='cuda:0')
episode: 10 training return: tensor(-538.0314, device='cuda:0')
episode: 11 training return: tensor(-453.1681, device='cuda:0')
epoch: 3 test_true_pfm: 5269.2992083011295 sim_pfm: -463.92880618952523
episode: 12 training return: tensor(-453.9751, device='cuda:0')
episode: 13 training return: tensor(-543.2357, device='cuda:0')
episode: 14 training return: tensor(-612.1022, device='cuda:0')
episode: 15 training return: tensor(-559.1441, device='cuda:0')
epoch: 4 test_true_pfm: 5158.142973091231 sim_pfm: -488.02684356427443
episode: 16 training return: tensor(-483.6529, device='cuda:0')
episode: 17 training return: tensor(-594.6267, device='cuda:0')
episode: 18 training return: tensor(-465.3503, device='cuda:0')
episode: 19 training return: tensor(-532.6677, device='cuda:0')
epoch: 5 test_true_pfm: 5102.424668926985 sim_pfm: -457.2193538371551
episode: 20 training return: tensor(-527.4880, device='cuda:0')
episode: 21 training return: tensor(-544.6085, device='cuda:0')
episode: 22 training return: tensor(-448.0193, device='cuda:0')
episode: 23 training return: tensor(-464.7609, device='cuda:0')
epoch: 6 test_true_pfm: 5161.084678646209 sim_pfm: -463.80589137703646
episode: 24 training return: tensor(-498.2663, device='cuda:0')
episode: 25 training return: tensor(-502.9971, device='cuda:0')
episode: 26 training return: tensor(-462.8968, device='cuda:0')
episode: 27 training return: tensor(-505.4079, device='cuda:0')
epoch: 7 test_true_pfm: 5158.7307826819115 sim_pfm: -423.1649459506637
episode: 28 training return: tensor(-509.9128, device='cuda:0')
episode: 29 training return: tensor(-508.0691, device='cuda:0')
episode: 30 training return: tensor(-427.6925, device='cuda:0')
episode: 31 training return: tensor(-390.4797, device='cuda:0')
epoch: 8 test_true_pfm: 5087.77831349158 sim_pfm: -439.46186470019165
episode: 32 training return: tensor(-530.6224, device='cuda:0')
episode: 33 training return: tensor(-450.0652, device='cuda:0')
episode: 34 training return: tensor(-523.6335, device='cuda:0')
episode: 35 training return: tensor(-490.8785, device='cuda:0')
epoch: 9 test_true_pfm: 5147.438737208174 sim_pfm: -441.47287726870854
episode: 36 training return: tensor(-601.9683, device='cuda:0')
episode: 37 training return: tensor(-457.7823, device='cuda:0')
episode: 38 training return: tensor(-407.7468, device='cuda:0')
episode: 39 training return: tensor(-438.8781, device='cuda:0')
epoch: 10 test_true_pfm: 5246.248808339674 sim_pfm: -413.80319566023536
episode: 40 training return: tensor(-350.2135, device='cuda:0')
episode: 41 training return: tensor(-454.4930, device='cuda:0')
episode: 42 training return: tensor(-341.0872, device='cuda:0')
episode: 43 training return: tensor(-489.6851, device='cuda:0')
epoch: 11 test_true_pfm: 5202.0304608710985 sim_pfm: -450.65055906565004
episode: 44 training return: tensor(-494.6663, device='cuda:0')
episode: 45 training return: tensor(-490.3010, device='cuda:0')
episode: 46 training return: tensor(-527.9220, device='cuda:0')
episode: 47 training return: tensor(-577.9341, device='cuda:0')
epoch: 12 test_true_pfm: 5163.548232890447 sim_pfm: -392.30196134211536
episode: 48 training return: tensor(-491.2843, device='cuda:0')
episode: 49 training return: tensor(-522.5018, device='cuda:0')
episode: 50 training return: tensor(-475.2211, device='cuda:0')
episode: 51 training return: tensor(-429.7390, device='cuda:0')
epoch: 13 test_true_pfm: 5244.043466050151 sim_pfm: -452.5240403037751
episode: 52 training return: tensor(-471.5236, device='cuda:0')
episode: 53 training return: tensor(-510.1921, device='cuda:0')
episode: 54 training return: tensor(-481.4471, device='cuda:0')
episode: 55 training return: tensor(-430.2660, device='cuda:0')
epoch: 14 test_true_pfm: 5328.598748827943 sim_pfm: -341.550561499161
episode: 56 training return: tensor(-485.1768, device='cuda:0')
episode: 57 training return: tensor(-481.9640, device='cuda:0')
episode: 58 training return: tensor(-445.0451, device='cuda:0')
episode: 59 training return: tensor(-423.5905, device='cuda:0')
epoch: 15 test_true_pfm: 5251.974072084849 sim_pfm: -393.2658931562716
episode: 60 training return: tensor(-438.7376, device='cuda:0')
episode: 61 training return: tensor(-430.9400, device='cuda:0')
episode: 62 training return: tensor(-424.4651, device='cuda:0')
episode: 63 training return: tensor(-443.6818, device='cuda:0')
epoch: 16 test_true_pfm: 5168.06613337361 sim_pfm: -347.0562798406075
episode: 64 training return: tensor(-435.9894, device='cuda:0')
episode: 65 training return: tensor(-457.2738, device='cuda:0')
episode: 66 training return: tensor(-456.3770, device='cuda:0')
episode: 67 training return: tensor(-407.6832, device='cuda:0')
epoch: 17 test_true_pfm: 5124.630403999933 sim_pfm: -411.199368220948
episode: 68 training return: tensor(-509.7900, device='cuda:0')
episode: 69 training return: tensor(-448.9522, device='cuda:0')
episode: 70 training return: tensor(-595.9723, device='cuda:0')
episode: 71 training return: tensor(-512.0883, device='cuda:0')
epoch: 18 test_true_pfm: 5220.433650919407 sim_pfm: -390.4434831228864
episode: 72 training return: tensor(-441.0497, device='cuda:0')
episode: 73 training return: tensor(-410.2159, device='cuda:0')
episode: 74 training return: tensor(-433.6517, device='cuda:0')
episode: 75 training return: tensor(-516.8669, device='cuda:0')
epoch: 19 test_true_pfm: 5227.5346994196925 sim_pfm: -381.91275970230345
episode: 76 training return: tensor(-381.4213, device='cuda:0')
episode: 77 training return: tensor(-530.2634, device='cuda:0')
episode: 78 training return: tensor(-474.0291, device='cuda:0')
episode: 79 training return: tensor(-414.1915, device='cuda:0')
epoch: 20 test_true_pfm: 5377.797371801172 sim_pfm: -359.7112539317459
episode: 80 training return: tensor(-405.7495, device='cuda:0')
episode: 81 training return: tensor(-410.2626, device='cuda:0')
episode: 82 training return: tensor(-459.8892, device='cuda:0')
episode: 83 training return: tensor(-420.3452, device='cuda:0')
epoch: 21 test_true_pfm: 5282.4355780979095 sim_pfm: -353.41516171443317
episode: 84 training return: tensor(-495.6317, device='cuda:0')
episode: 85 training return: tensor(-469.7244, device='cuda:0')
episode: 86 training return: tensor(-412.1726, device='cuda:0')
episode: 87 training return: tensor(-514.1299, device='cuda:0')
epoch: 22 test_true_pfm: 5330.230804773695 sim_pfm: -379.6343545020306
episode: 88 training return: tensor(-492.3622, device='cuda:0')
episode: 89 training return: tensor(-410.3218, device='cuda:0')
episode: 90 training return: tensor(-449.3440, device='cuda:0')
episode: 91 training return: tensor(-384.3526, device='cuda:0')
epoch: 23 test_true_pfm: 5311.152192426681 sim_pfm: -380.355960413183
episode: 92 training return: tensor(-476.2693, device='cuda:0')
episode: 93 training return: tensor(-467.7619, device='cuda:0')
episode: 94 training return: tensor(-474.4862, device='cuda:0')
episode: 95 training return: tensor(-457.2083, device='cuda:0')
epoch: 24 test_true_pfm: 5317.151968501333 sim_pfm: -403.3245629488374
episode: 96 training return: tensor(-446.0649, device='cuda:0')
episode: 97 training return: tensor(-504.7998, device='cuda:0')
episode: 98 training return: tensor(-438.9402, device='cuda:0')
episode: 99 training return: tensor(-443.6127, device='cuda:0')
epoch: 25 test_true_pfm: 5218.688256006157 sim_pfm: -388.6255735544643
episode: 100 training return: tensor(-401.9270, device='cuda:0')
episode: 101 training return: tensor(-385.9065, device='cuda:0')
episode: 102 training return: tensor(-459.6161, device='cuda:0')
episode: 103 training return: tensor(-490.7425, device='cuda:0')
epoch: 26 test_true_pfm: 3848.278432507876 sim_pfm: -432.53118226118386
episode: 104 training return: tensor(-740.0403, device='cuda:0')
episode: 105 training return: tensor(-437.7769, device='cuda:0')
episode: 106 training return: tensor(-517.6719, device='cuda:0')
episode: 107 training return: tensor(-442.0050, device='cuda:0')
epoch: 27 test_true_pfm: 4753.1345499824565 sim_pfm: -347.5375754737567
episode: 108 training return: tensor(-520.3975, device='cuda:0')
episode: 109 training return: tensor(-547.2333, device='cuda:0')
episode: 110 training return: tensor(-433.7276, device='cuda:0')
episode: 111 training return: tensor(-514.8634, device='cuda:0')
epoch: 28 test_true_pfm: 5392.746003066989 sim_pfm: -430.07944919525954
episode: 112 training return: tensor(-388.6016, device='cuda:0')
episode: 113 training return: tensor(-400.6184, device='cuda:0')
episode: 114 training return: tensor(-416.1592, device='cuda:0')
episode: 115 training return: tensor(-448.5347, device='cuda:0')
epoch: 29 test_true_pfm: 5347.857533790192 sim_pfm: -389.5023132638792
episode: 116 training return: tensor(-469.7910, device='cuda:0')
episode: 117 training return: tensor(-428.6413, device='cuda:0')
episode: 118 training return: tensor(-433.2627, device='cuda:0')
episode: 119 training return: tensor(-392.9984, device='cuda:0')
epoch: 30 test_true_pfm: 5335.876533805291 sim_pfm: -392.5626779604063
episode: 120 training return: tensor(-403.6011, device='cuda:0')
episode: 121 training return: tensor(-488.4005, device='cuda:0')
episode: 122 training return: tensor(-427.3167, device='cuda:0')
episode: 123 training return: tensor(-402.5532, device='cuda:0')
epoch: 31 test_true_pfm: 5290.877000955197 sim_pfm: -279.6987748593286
episode: 124 training return: tensor(-500.1623, device='cuda:0')
episode: 125 training return: tensor(-444.3982, device='cuda:0')
episode: 126 training return: tensor(-384.5175, device='cuda:0')
episode: 127 training return: tensor(-483.2448, device='cuda:0')
epoch: 32 test_true_pfm: 5386.266542977188 sim_pfm: -384.72534463575965
episode: 128 training return: tensor(-460.8932, device='cuda:0')
episode: 129 training return: tensor(-437.3855, device='cuda:0')
episode: 130 training return: tensor(-353.2350, device='cuda:0')
episode: 131 training return: tensor(-418.8544, device='cuda:0')
epoch: 33 test_true_pfm: 5289.685555036348 sim_pfm: -353.4379590249737
episode: 132 training return: tensor(-523.2162, device='cuda:0')
episode: 133 training return: tensor(-495.6736, device='cuda:0')
episode: 134 training return: tensor(-388.4819, device='cuda:0')
episode: 135 training return: tensor(-448.5061, device='cuda:0')
epoch: 34 test_true_pfm: 5353.978522879799 sim_pfm: -311.3138246933813
episode: 136 training return: tensor(-446.8769, device='cuda:0')
episode: 137 training return: tensor(-498.3614, device='cuda:0')
episode: 138 training return: tensor(-354.4665, device='cuda:0')
episode: 139 training return: tensor(-489.2501, device='cuda:0')
epoch: 35 test_true_pfm: 5274.369250417469 sim_pfm: -316.02060424608254
episode: 140 training return: tensor(-466.7878, device='cuda:0')
episode: 141 training return: tensor(-434.9547, device='cuda:0')
episode: 142 training return: tensor(-384.7247, device='cuda:0')
episode: 143 training return: tensor(-413.8484, device='cuda:0')
epoch: 36 test_true_pfm: 6135.583243556539 sim_pfm: -325.74612785678863
episode: 144 training return: tensor(-392.8400, device='cuda:0')
episode: 145 training return: tensor(-420.0354, device='cuda:0')
episode: 146 training return: tensor(-399.9327, device='cuda:0')
episode: 147 training return: tensor(-462.1845, device='cuda:0')
epoch: 37 test_true_pfm: 5223.913227501312 sim_pfm: -359.6743665745016
episode: 148 training return: tensor(-439.1801, device='cuda:0')
episode: 149 training return: tensor(-470.6433, device='cuda:0')
episode: 150 training return: tensor(-494.0765, device='cuda:0')
episode: 151 training return: tensor(-435.1956, device='cuda:0')
epoch: 38 test_true_pfm: 5256.745048070107 sim_pfm: -352.2739618301275
episode: 152 training return: tensor(-406.1920, device='cuda:0')
episode: 153 training return: tensor(-422.6431, device='cuda:0')
episode: 154 training return: tensor(-439.4444, device='cuda:0')
episode: 155 training return: tensor(-387.4037, device='cuda:0')
epoch: 39 test_true_pfm: 5253.389635880355 sim_pfm: -356.26328443025704
episode: 156 training return: tensor(-371.5841, device='cuda:0')
episode: 157 training return: tensor(-458.1488, device='cuda:0')
episode: 158 training return: tensor(-428.9766, device='cuda:0')
episode: 159 training return: tensor(-321.3477, device='cuda:0')
epoch: 40 test_true_pfm: 5915.409035068107 sim_pfm: -340.18703417219996
episode: 160 training return: tensor(-318.6530, device='cuda:0')
episode: 161 training return: tensor(-414.5128, device='cuda:0')
episode: 162 training return: tensor(-399.7416, device='cuda:0')
episode: 163 training return: tensor(-443.7681, device='cuda:0')
epoch: 41 test_true_pfm: 5580.1662125974035 sim_pfm: -326.86175931902835
episode: 164 training return: tensor(-469.1386, device='cuda:0')
episode: 165 training return: tensor(-350.7864, device='cuda:0')
episode: 166 training return: tensor(-398.7037, device='cuda:0')
episode: 167 training return: tensor(-463.9312, device='cuda:0')
epoch: 42 test_true_pfm: 5129.232865272609 sim_pfm: -337.71653559363523
episode: 168 training return: tensor(-472.6436, device='cuda:0')
episode: 169 training return: tensor(-544.5150, device='cuda:0')
episode: 170 training return: tensor(-438.9399, device='cuda:0')
episode: 171 training return: tensor(-428.5634, device='cuda:0')
epoch: 43 test_true_pfm: 5384.142753438283 sim_pfm: -352.8542994413583
episode: 172 training return: tensor(-322.0209, device='cuda:0')
episode: 173 training return: tensor(-456.3120, device='cuda:0')
episode: 174 training return: tensor(-395.9787, device='cuda:0')
episode: 175 training return: tensor(-367.5892, device='cuda:0')
epoch: 44 test_true_pfm: 5270.245034701299 sim_pfm: -307.3493238581189
episode: 176 training return: tensor(-247.2900, device='cuda:0')
episode: 177 training return: tensor(-479.4098, device='cuda:0')
episode: 178 training return: tensor(-416.0703, device='cuda:0')
episode: 179 training return: tensor(-397.1114, device='cuda:0')
epoch: 45 test_true_pfm: 5406.649996787485 sim_pfm: -347.62945758155547
episode: 180 training return: tensor(-390.8164, device='cuda:0')
episode: 181 training return: tensor(-418.3067, device='cuda:0')
episode: 182 training return: tensor(-436.6740, device='cuda:0')
episode: 183 training return: tensor(-436.3778, device='cuda:0')
epoch: 46 test_true_pfm: 5426.6920956129325 sim_pfm: -337.60775238434627
episode: 184 training return: tensor(-409.8525, device='cuda:0')
episode: 185 training return: tensor(-430.9725, device='cuda:0')
episode: 186 training return: tensor(-387.5983, device='cuda:0')
episode: 187 training return: tensor(-468.0621, device='cuda:0')
epoch: 47 test_true_pfm: 5328.770015555531 sim_pfm: -362.39837486547185
episode: 188 training return: tensor(-419.3675, device='cuda:0')
episode: 189 training return: tensor(-331.7454, device='cuda:0')
episode: 190 training return: tensor(-390.3967, device='cuda:0')
episode: 191 training return: tensor(-384.6372, device='cuda:0')
epoch: 48 test_true_pfm: 5530.87639009121 sim_pfm: -329.30258253377787
episode: 192 training return: tensor(-371.5639, device='cuda:0')
episode: 193 training return: tensor(-357.0418, device='cuda:0')
episode: 194 training return: tensor(-411.3658, device='cuda:0')
episode: 195 training return: tensor(-425.4561, device='cuda:0')
epoch: 49 test_true_pfm: 5327.159023720615 sim_pfm: -308.7297152230749
episode: 196 training return: tensor(-379.4880, device='cuda:0')
episode: 197 training return: tensor(-413.0188, device='cuda:0')
episode: 198 training return: tensor(-447.7026, device='cuda:0')
episode: 199 training return: tensor(-523.5363, device='cuda:0')
epoch: 50 test_true_pfm: 5470.358638282942 sim_pfm: -319.77808366930304
episode: 200 training return: tensor(-396.5194, device='cuda:0')
episode: 201 training return: tensor(-417.5027, device='cuda:0')
episode: 202 training return: tensor(-472.9558, device='cuda:0')
episode: 203 training return: tensor(-409.8453, device='cuda:0')
epoch: 51 test_true_pfm: 5367.805198281414 sim_pfm: -317.9877769792899
episode: 204 training return: tensor(-431.6722, device='cuda:0')
episode: 205 training return: tensor(-341.2252, device='cuda:0')
episode: 206 training return: tensor(-282.4044, device='cuda:0')
episode: 207 training return: tensor(-367.4648, device='cuda:0')
epoch: 52 test_true_pfm: 5532.587575288881 sim_pfm: -451.3290934151155
episode: 208 training return: tensor(-400.7669, device='cuda:0')
episode: 209 training return: tensor(-350.1834, device='cuda:0')
episode: 210 training return: tensor(-483.0069, device='cuda:0')
episode: 211 training return: tensor(-415.5127, device='cuda:0')
epoch: 53 test_true_pfm: 5786.558933102241 sim_pfm: -322.4476550235898
episode: 212 training return: tensor(-385.8618, device='cuda:0')
episode: 213 training return: tensor(-463.9889, device='cuda:0')
episode: 214 training return: tensor(-349.6703, device='cuda:0')
episode: 215 training return: tensor(-433.8977, device='cuda:0')
epoch: 54 test_true_pfm: 5425.246827706633 sim_pfm: -353.06690099367796
episode: 216 training return: tensor(-441.6632, device='cuda:0')
episode: 217 training return: tensor(-432.0056, device='cuda:0')
episode: 218 training return: tensor(-368.2063, device='cuda:0')
episode: 219 training return: tensor(-423.6038, device='cuda:0')
epoch: 55 test_true_pfm: 5400.033798447527 sim_pfm: -290.1637298418015
episode: 220 training return: tensor(-395.8308, device='cuda:0')
episode: 221 training return: tensor(-444.5638, device='cuda:0')
episode: 222 training return: tensor(-398.7538, device='cuda:0')
episode: 223 training return: tensor(-364.7999, device='cuda:0')
epoch: 56 test_true_pfm: 5417.992926656521 sim_pfm: -352.85298418843496
episode: 224 training return: tensor(-493.6385, device='cuda:0')
episode: 225 training return: tensor(-411.8611, device='cuda:0')
episode: 226 training return: tensor(-401.4504, device='cuda:0')
episode: 227 training return: tensor(-454.8964, device='cuda:0')
epoch: 57 test_true_pfm: 5357.448006549738 sim_pfm: -303.5134941622964
episode: 228 training return: tensor(-403.6943, device='cuda:0')
episode: 229 training return: tensor(-436.6038, device='cuda:0')
episode: 230 training return: tensor(-426.0068, device='cuda:0')
episode: 231 training return: tensor(-468.7769, device='cuda:0')
epoch: 58 test_true_pfm: 5478.896656853906 sim_pfm: -342.6629521998596
episode: 232 training return: tensor(-343.2737, device='cuda:0')
episode: 233 training return: tensor(-386.9217, device='cuda:0')
episode: 234 training return: tensor(-440.0082, device='cuda:0')
episode: 235 training return: tensor(-439.8925, device='cuda:0')
epoch: 59 test_true_pfm: 5419.022138244171 sim_pfm: -324.5789356186676
episode: 236 training return: tensor(-421.7022, device='cuda:0')
episode: 237 training return: tensor(-379.2970, device='cuda:0')
episode: 238 training return: tensor(-381.8331, device='cuda:0')
episode: 239 training return: tensor(-397.6322, device='cuda:0')
epoch: 60 test_true_pfm: 5459.689073081068 sim_pfm: -353.8233571665284
episode: 240 training return: tensor(-367.4099, device='cuda:0')
episode: 241 training return: tensor(-394.8314, device='cuda:0')
episode: 242 training return: tensor(-373.3613, device='cuda:0')
episode: 243 training return: tensor(-427.7710, device='cuda:0')
epoch: 61 test_true_pfm: 5328.915578840284 sim_pfm: -335.5740438234449
episode: 244 training return: tensor(-437.6266, device='cuda:0')
episode: 245 training return: tensor(-390.2670, device='cuda:0')
episode: 246 training return: tensor(-451.9560, device='cuda:0')
episode: 247 training return: tensor(-340.2093, device='cuda:0')
epoch: 62 test_true_pfm: 5345.108761342542 sim_pfm: -254.88971343826657
episode: 248 training return: tensor(-398.5536, device='cuda:0')
episode: 249 training return: tensor(-336.4263, device='cuda:0')
episode: 250 training return: tensor(-388.3696, device='cuda:0')
episode: 251 training return: tensor(-405.7287, device='cuda:0')
epoch: 63 test_true_pfm: 5398.336295556442 sim_pfm: -317.7350957232799
episode: 252 training return: tensor(-418.9868, device='cuda:0')
episode: 253 training return: tensor(-374.2150, device='cuda:0')
episode: 254 training return: tensor(-393.2230, device='cuda:0')
episode: 255 training return: tensor(-447.1264, device='cuda:0')
epoch: 64 test_true_pfm: 5858.35084577839 sim_pfm: -333.7583700944767
episode: 256 training return: tensor(-380.5892, device='cuda:0')
episode: 257 training return: tensor(-430.6460, device='cuda:0')
episode: 258 training return: tensor(-405.8205, device='cuda:0')
episode: 259 training return: tensor(-278.8486, device='cuda:0')
epoch: 65 test_true_pfm: 5474.7722131253495 sim_pfm: -273.4103152504734
episode: 260 training return: tensor(-396.3263, device='cuda:0')
episode: 261 training return: tensor(-393.8917, device='cuda:0')
episode: 262 training return: tensor(-408.3231, device='cuda:0')
episode: 263 training return: tensor(-433.1396, device='cuda:0')
epoch: 66 test_true_pfm: 5846.7095293710045 sim_pfm: -354.0414992371807
episode: 264 training return: tensor(-345.5631, device='cuda:0')
episode: 265 training return: tensor(-482.3007, device='cuda:0')
episode: 266 training return: tensor(-319.3129, device='cuda:0')
episode: 267 training return: tensor(-392.6152, device='cuda:0')
epoch: 67 test_true_pfm: 5713.060974260029 sim_pfm: -313.49378416503896
episode: 268 training return: tensor(-338.3982, device='cuda:0')
episode: 269 training return: tensor(-367.3956, device='cuda:0')
episode: 270 training return: tensor(-412.2460, device='cuda:0')
episode: 271 training return: tensor(-452.7109, device='cuda:0')
epoch: 68 test_true_pfm: 5457.814937277828 sim_pfm: -310.9317700407798
episode: 272 training return: tensor(-409.9477, device='cuda:0')
episode: 273 training return: tensor(-450.0515, device='cuda:0')
episode: 274 training return: tensor(-521.4925, device='cuda:0')
episode: 275 training return: tensor(-392.6779, device='cuda:0')
epoch: 69 test_true_pfm: 5489.145304946792 sim_pfm: -289.8621732197644
episode: 276 training return: tensor(-347.8421, device='cuda:0')
episode: 277 training return: tensor(-421.0336, device='cuda:0')
episode: 278 training return: tensor(-450.5640, device='cuda:0')
episode: 279 training return: tensor(-428.9520, device='cuda:0')
epoch: 70 test_true_pfm: 5356.253251298126 sim_pfm: -323.04860205122776
episode: 280 training return: tensor(-422.8182, device='cuda:0')
episode: 281 training return: tensor(-353.2787, device='cuda:0')
episode: 282 training return: tensor(-357.5399, device='cuda:0')
episode: 283 training return: tensor(-335.7087, device='cuda:0')
epoch: 71 test_true_pfm: 5416.947553993307 sim_pfm: -337.4245524988025
episode: 284 training return: tensor(-349.5043, device='cuda:0')
episode: 285 training return: tensor(-297.2640, device='cuda:0')
episode: 286 training return: tensor(-341.1786, device='cuda:0')
episode: 287 training return: tensor(-439.5090, device='cuda:0')
epoch: 72 test_true_pfm: 5436.4537121707635 sim_pfm: -339.44420805874205
episode: 288 training return: tensor(-376.0176, device='cuda:0')
episode: 289 training return: tensor(-425.2804, device='cuda:0')
episode: 290 training return: tensor(-351.1021, device='cuda:0')
episode: 291 training return: tensor(-427.9810, device='cuda:0')
epoch: 73 test_true_pfm: 5455.826358486331 sim_pfm: -302.00284353548585
episode: 292 training return: tensor(-281.1297, device='cuda:0')
episode: 293 training return: tensor(-377.5049, device='cuda:0')
episode: 294 training return: tensor(-465.0258, device='cuda:0')
episode: 295 training return: tensor(-389.1624, device='cuda:0')
epoch: 74 test_true_pfm: 5373.876550630453 sim_pfm: -316.2760453272106
episode: 296 training return: tensor(-438.4735, device='cuda:0')
episode: 297 training return: tensor(-342.7456, device='cuda:0')
episode: 298 training return: tensor(-363.5400, device='cuda:0')
episode: 299 training return: tensor(-443.2704, device='cuda:0')
epoch: 75 test_true_pfm: 5436.47788829787 sim_pfm: -284.09155322943116
episode: 300 training return: tensor(-415.2186, device='cuda:0')
episode: 301 training return: tensor(-378.2932, device='cuda:0')
episode: 302 training return: tensor(-264.4165, device='cuda:0')
episode: 303 training return: tensor(-373.2376, device='cuda:0')
epoch: 76 test_true_pfm: 5468.359071943552 sim_pfm: -269.31519780962844
episode: 304 training return: tensor(-367.3492, device='cuda:0')
episode: 305 training return: tensor(-335.1427, device='cuda:0')
episode: 306 training return: tensor(-363.3759, device='cuda:0')
episode: 307 training return: tensor(-342.1709, device='cuda:0')
epoch: 77 test_true_pfm: 5423.385130624846 sim_pfm: -267.2903594302479
episode: 308 training return: tensor(-312.3634, device='cuda:0')
episode: 309 training return: tensor(-324.9374, device='cuda:0')
episode: 310 training return: tensor(-425.6266, device='cuda:0')
episode: 311 training return: tensor(-302.5332, device='cuda:0')
epoch: 78 test_true_pfm: 5586.882703820344 sim_pfm: -314.04373928984086
episode: 312 training return: tensor(-394.2132, device='cuda:0')
episode: 313 training return: tensor(-426.9505, device='cuda:0')
episode: 314 training return: tensor(-424.2122, device='cuda:0')
episode: 315 training return: tensor(-383.8165, device='cuda:0')
epoch: 79 test_true_pfm: 5480.932859056275 sim_pfm: -311.3860292780931
episode: 316 training return: tensor(-335.5173, device='cuda:0')
episode: 317 training return: tensor(-454.4633, device='cuda:0')
episode: 318 training return: tensor(-425.5786, device='cuda:0')
episode: 319 training return: tensor(-372.8905, device='cuda:0')
epoch: 80 test_true_pfm: 5476.942174821849 sim_pfm: -320.1892235674507
episode: 320 training return: tensor(-255.6468, device='cuda:0')
episode: 321 training return: tensor(-417.1071, device='cuda:0')
episode: 322 training return: tensor(-322.7922, device='cuda:0')
episode: 323 training return: tensor(-311.4038, device='cuda:0')
epoch: 81 test_true_pfm: 5426.487665431803 sim_pfm: -311.51280663567985
episode: 324 training return: tensor(-337.0317, device='cuda:0')
episode: 325 training return: tensor(-523.3406, device='cuda:0')
episode: 326 training return: tensor(-367.5878, device='cuda:0')
episode: 327 training return: tensor(-368.5199, device='cuda:0')
epoch: 82 test_true_pfm: 5390.36429081464 sim_pfm: -317.69842323468765
episode: 328 training return: tensor(-392.6289, device='cuda:0')
episode: 329 training return: tensor(-432.9058, device='cuda:0')
episode: 330 training return: tensor(-360.4190, device='cuda:0')
episode: 331 training return: tensor(-477.5027, device='cuda:0')
epoch: 83 test_true_pfm: 5537.042902612468 sim_pfm: -234.9859656098512
episode: 332 training return: tensor(-391.0231, device='cuda:0')
episode: 333 training return: tensor(-343.6426, device='cuda:0')
episode: 334 training return: tensor(-300.4561, device='cuda:0')
episode: 335 training return: tensor(-392.3705, device='cuda:0')
epoch: 84 test_true_pfm: 5493.177724550259 sim_pfm: -256.2989138806297
episode: 336 training return: tensor(-359.4099, device='cuda:0')
episode: 337 training return: tensor(-365.7487, device='cuda:0')
episode: 338 training return: tensor(-348.4910, device='cuda:0')
episode: 339 training return: tensor(-383.2810, device='cuda:0')
epoch: 85 test_true_pfm: 5517.9650373853365 sim_pfm: -299.5298014168026
episode: 340 training return: tensor(-366.1756, device='cuda:0')
episode: 341 training return: tensor(-787.3526, device='cuda:0')
episode: 342 training return: tensor(-382.8826, device='cuda:0')
episode: 343 training return: tensor(-304.0166, device='cuda:0')
epoch: 86 test_true_pfm: 5746.1607887207965 sim_pfm: -271.49785492621595
episode: 344 training return: tensor(-480.6872, device='cuda:0')
episode: 345 training return: tensor(-357.2632, device='cuda:0')
episode: 346 training return: tensor(-410.5002, device='cuda:0')
episode: 347 training return: tensor(-372.1934, device='cuda:0')
epoch: 87 test_true_pfm: 5426.959544459144 sim_pfm: -271.51554080768256
episode: 348 training return: tensor(-439.6273, device='cuda:0')
episode: 349 training return: tensor(-350.4756, device='cuda:0')
episode: 350 training return: tensor(-384.8700, device='cuda:0')
episode: 351 training return: tensor(-380.0728, device='cuda:0')
epoch: 88 test_true_pfm: 5309.971003583784 sim_pfm: -290.3477902709662
episode: 352 training return: tensor(-293.1854, device='cuda:0')
episode: 353 training return: tensor(-342.8994, device='cuda:0')
episode: 354 training return: tensor(-311.2602, device='cuda:0')
episode: 355 training return: tensor(-363.4036, device='cuda:0')
epoch: 89 test_true_pfm: 5458.738318590972 sim_pfm: -261.8084420604088
episode: 356 training return: tensor(-200.7266, device='cuda:0')
episode: 357 training return: tensor(-368.7191, device='cuda:0')
episode: 358 training return: tensor(-357.7471, device='cuda:0')
episode: 359 training return: tensor(-366.0984, device='cuda:0')
epoch: 90 test_true_pfm: 5282.989626592048 sim_pfm: -302.357411378917
episode: 360 training return: tensor(-313.7895, device='cuda:0')
episode: 361 training return: tensor(-359.4376, device='cuda:0')
episode: 362 training return: tensor(-333.7282, device='cuda:0')
episode: 363 training return: tensor(-353.4577, device='cuda:0')
epoch: 91 test_true_pfm: 5524.8242945944885 sim_pfm: -295.4546284422492
episode: 364 training return: tensor(-319.7427, device='cuda:0')
episode: 365 training return: tensor(-378.8909, device='cuda:0')
episode: 366 training return: tensor(-261.3586, device='cuda:0')
episode: 367 training return: tensor(-384.3602, device='cuda:0')
epoch: 92 test_true_pfm: 5575.939696253637 sim_pfm: -270.08024760809104
episode: 368 training return: tensor(-364.5419, device='cuda:0')
episode: 369 training return: tensor(-446.5028, device='cuda:0')
episode: 370 training return: tensor(-363.4340, device='cuda:0')
episode: 371 training return: tensor(-293.6508, device='cuda:0')
epoch: 93 test_true_pfm: 5510.69613297279 sim_pfm: -322.17409237006603
episode: 372 training return: tensor(-395.9637, device='cuda:0')
episode: 373 training return: tensor(-412.0800, device='cuda:0')
episode: 374 training return: tensor(-443.7124, device='cuda:0')
episode: 375 training return: tensor(-393.6183, device='cuda:0')
epoch: 94 test_true_pfm: 5064.485556876408 sim_pfm: -306.0976860537776
episode: 376 training return: tensor(-442.4133, device='cuda:0')
episode: 377 training return: tensor(-389.5977, device='cuda:0')
episode: 378 training return: tensor(-278.0142, device='cuda:0')
episode: 379 training return: tensor(-302.5495, device='cuda:0')
epoch: 95 test_true_pfm: 5423.615691090861 sim_pfm: -300.70809539752855
episode: 380 training return: tensor(-357.5491, device='cuda:0')
episode: 381 training return: tensor(-281.2806, device='cuda:0')
episode: 382 training return: tensor(-360.1290, device='cuda:0')
episode: 383 training return: tensor(-359.2102, device='cuda:0')
epoch: 96 test_true_pfm: 5645.510167598976 sim_pfm: -282.2588555623176
episode: 384 training return: tensor(-352.9708, device='cuda:0')
episode: 385 training return: tensor(-365.5804, device='cuda:0')
episode: 386 training return: tensor(-343.6139, device='cuda:0')
episode: 387 training return: tensor(-365.3055, device='cuda:0')
epoch: 97 test_true_pfm: 5612.3603543869785 sim_pfm: -251.77788878045976
episode: 388 training return: tensor(-375.7385, device='cuda:0')
episode: 389 training return: tensor(-453.5518, device='cuda:0')
episode: 390 training return: tensor(-372.6607, device='cuda:0')
episode: 391 training return: tensor(-301.5055, device='cuda:0')
epoch: 98 test_true_pfm: 5424.765668675082 sim_pfm: -284.74140931169194
episode: 392 training return: tensor(-341.8197, device='cuda:0')
episode: 393 training return: tensor(-324.5652, device='cuda:0')
episode: 394 training return: tensor(-393.3862, device='cuda:0')
episode: 395 training return: tensor(-377.9784, device='cuda:0')
epoch: 99 test_true_pfm: 5677.515760470182 sim_pfm: -252.3041212282163
episode: 396 training return: tensor(-305.0309, device='cuda:0')
episode: 397 training return: tensor(-410.1661, device='cuda:0')
episode: 398 training return: tensor(-355.3896, device='cuda:0')
episode: 399 training return: tensor(-396.3185, device='cuda:0')
epoch: 100 test_true_pfm: 5483.370210070864 sim_pfm: -295.52592648738454
episode: 400 training return: tensor(-449.1665, device='cuda:0')
episode: 401 training return: tensor(-440.2932, device='cuda:0')
episode: 402 training return: tensor(-428.6141, device='cuda:0')
episode: 403 training return: tensor(-381.5150, device='cuda:0')
epoch: 101 test_true_pfm: 5505.721633710342 sim_pfm: -214.73508037797487
episode: 404 training return: tensor(-354.0083, device='cuda:0')
episode: 405 training return: tensor(-248.7845, device='cuda:0')
episode: 406 training return: tensor(-435.8938, device='cuda:0')
episode: 407 training return: tensor(-333.5864, device='cuda:0')
epoch: 102 test_true_pfm: 5482.557564019914 sim_pfm: -257.417632161271
episode: 408 training return: tensor(-361.0817, device='cuda:0')
episode: 409 training return: tensor(-382.9823, device='cuda:0')
episode: 410 training return: tensor(-391.6203, device='cuda:0')
episode: 411 training return: tensor(-400.3326, device='cuda:0')
epoch: 103 test_true_pfm: 5411.581018475062 sim_pfm: -288.0511978076247
episode: 412 training return: tensor(-375.5851, device='cuda:0')
episode: 413 training return: tensor(-320.0461, device='cuda:0')
episode: 414 training return: tensor(-369.5193, device='cuda:0')
episode: 415 training return: tensor(-352.6068, device='cuda:0')
epoch: 104 test_true_pfm: 5638.692567288342 sim_pfm: -274.6328766772931
episode: 416 training return: tensor(-322.3584, device='cuda:0')
episode: 417 training return: tensor(-388.7498, device='cuda:0')
episode: 418 training return: tensor(-411.0621, device='cuda:0')
episode: 419 training return: tensor(-443.1122, device='cuda:0')
epoch: 105 test_true_pfm: 5380.047732666662 sim_pfm: -253.66769133201646
episode: 420 training return: tensor(-294.0057, device='cuda:0')
episode: 421 training return: tensor(-337.3160, device='cuda:0')
episode: 422 training return: tensor(-402.4596, device='cuda:0')
episode: 423 training return: tensor(-370.2614, device='cuda:0')
epoch: 106 test_true_pfm: 5399.08771077128 sim_pfm: -239.39455476607932
episode: 424 training return: tensor(-416.4435, device='cuda:0')
episode: 425 training return: tensor(-374.5232, device='cuda:0')
episode: 426 training return: tensor(-289.5590, device='cuda:0')
episode: 427 training return: tensor(-370.3214, device='cuda:0')
epoch: 107 test_true_pfm: 5614.2989339596315 sim_pfm: -271.5752810308186
episode: 428 training return: tensor(-396.7576, device='cuda:0')
episode: 429 training return: tensor(-385.4152, device='cuda:0')
episode: 430 training return: tensor(-420.8426, device='cuda:0')
episode: 431 training return: tensor(-360.2964, device='cuda:0')
epoch: 108 test_true_pfm: 5565.142523001003 sim_pfm: -298.85235914657824
episode: 432 training return: tensor(-329.5202, device='cuda:0')
episode: 433 training return: tensor(-277.9120, device='cuda:0')
episode: 434 training return: tensor(-340.0179, device='cuda:0')
episode: 435 training return: tensor(-352.3954, device='cuda:0')
epoch: 109 test_true_pfm: 5462.425418827023 sim_pfm: -259.6519844973421
episode: 436 training return: tensor(-306.4352, device='cuda:0')
episode: 437 training return: tensor(-380.0292, device='cuda:0')
episode: 438 training return: tensor(-233.1312, device='cuda:0')
episode: 439 training return: tensor(-365.7200, device='cuda:0')
epoch: 110 test_true_pfm: 5451.835822081811 sim_pfm: -213.48774223487513
episode: 440 training return: tensor(-400.7217, device='cuda:0')
episode: 441 training return: tensor(-482.0991, device='cuda:0')
episode: 442 training return: tensor(-315.9183, device='cuda:0')
episode: 443 training return: tensor(-382.4754, device='cuda:0')
epoch: 111 test_true_pfm: 5878.863837582191 sim_pfm: -248.6957843711231
episode: 444 training return: tensor(-364.7144, device='cuda:0')
episode: 445 training return: tensor(-376.4465, device='cuda:0')
episode: 446 training return: tensor(-346.2122, device='cuda:0')
episode: 447 training return: tensor(-382.4388, device='cuda:0')
epoch: 112 test_true_pfm: 5470.465572861555 sim_pfm: -249.74520357335373
episode: 448 training return: tensor(-355.7684, device='cuda:0')
episode: 449 training return: tensor(-353.4579, device='cuda:0')
episode: 450 training return: tensor(-335.3216, device='cuda:0')
episode: 451 training return: tensor(-409.0766, device='cuda:0')
epoch: 113 test_true_pfm: 5639.408377994415 sim_pfm: -276.8979599700445
episode: 452 training return: tensor(-392.3858, device='cuda:0')
episode: 453 training return: tensor(-472.9023, device='cuda:0')
episode: 454 training return: tensor(-284.3697, device='cuda:0')
episode: 455 training return: tensor(-306.8621, device='cuda:0')
epoch: 114 test_true_pfm: 5499.553297082642 sim_pfm: -272.24778631319833
episode: 456 training return: tensor(-347.4317, device='cuda:0')
episode: 457 training return: tensor(-277.4691, device='cuda:0')
episode: 458 training return: tensor(-444.4589, device='cuda:0')
episode: 459 training return: tensor(-361.4353, device='cuda:0')
epoch: 115 test_true_pfm: 5763.821424478982 sim_pfm: -279.2672877900186
episode: 460 training return: tensor(-255.3354, device='cuda:0')
episode: 461 training return: tensor(-414.6464, device='cuda:0')
episode: 462 training return: tensor(-398.3584, device='cuda:0')
episode: 463 training return: tensor(-391.3013, device='cuda:0')
epoch: 116 test_true_pfm: 5400.713238555824 sim_pfm: -244.4950420417978
episode: 464 training return: tensor(-438.3634, device='cuda:0')
episode: 465 training return: tensor(-380.2575, device='cuda:0')
episode: 466 training return: tensor(-362.2815, device='cuda:0')
episode: 467 training return: tensor(-284.6653, device='cuda:0')
epoch: 117 test_true_pfm: 5480.765288923202 sim_pfm: -247.14845634483694
episode: 468 training return: tensor(-358.5051, device='cuda:0')
episode: 469 training return: tensor(-342.9085, device='cuda:0')
episode: 470 training return: tensor(-306.4862, device='cuda:0')
episode: 471 training return: tensor(-412.3990, device='cuda:0')
epoch: 118 test_true_pfm: 5568.397909340955 sim_pfm: -205.39567409422793
episode: 472 training return: tensor(-356.3580, device='cuda:0')
episode: 473 training return: tensor(-301.1968, device='cuda:0')
episode: 474 training return: tensor(-380.5003, device='cuda:0')
episode: 475 training return: tensor(-329.3103, device='cuda:0')
epoch: 119 test_true_pfm: 5518.449530239817 sim_pfm: -288.4671710375308
episode: 476 training return: tensor(-344.7495, device='cuda:0')
episode: 477 training return: tensor(-352.9095, device='cuda:0')
episode: 478 training return: tensor(-272.8213, device='cuda:0')
episode: 479 training return: tensor(-362.0048, device='cuda:0')
epoch: 120 test_true_pfm: 5580.722949049952 sim_pfm: -267.1764817090977
episode: 480 training return: tensor(-376.6378, device='cuda:0')
episode: 481 training return: tensor(-355.8032, device='cuda:0')
episode: 482 training return: tensor(-369.1078, device='cuda:0')
episode: 483 training return: tensor(-399.1490, device='cuda:0')
epoch: 121 test_true_pfm: 5252.621105912708 sim_pfm: -281.2616203191865
episode: 484 training return: tensor(-412.1371, device='cuda:0')
episode: 485 training return: tensor(-345.1902, device='cuda:0')
episode: 486 training return: tensor(-444.1269, device='cuda:0')
episode: 487 training return: tensor(-379.8973, device='cuda:0')
epoch: 122 test_true_pfm: 5447.734514319162 sim_pfm: -291.8649172487203
episode: 488 training return: tensor(-467.0436, device='cuda:0')
episode: 489 training return: tensor(-455.5834, device='cuda:0')
episode: 490 training return: tensor(-419.8192, device='cuda:0')
episode: 491 training return: tensor(-318.2947, device='cuda:0')
epoch: 123 test_true_pfm: 5464.431471451594 sim_pfm: -267.9410164938211
episode: 492 training return: tensor(-398.8049, device='cuda:0')
episode: 493 training return: tensor(-336.9524, device='cuda:0')
episode: 494 training return: tensor(-372.5681, device='cuda:0')
episode: 495 training return: tensor(-375.3520, device='cuda:0')
epoch: 124 test_true_pfm: 5587.373623739207 sim_pfm: -241.81588764062812
episode: 496 training return: tensor(-410.0824, device='cuda:0')
episode: 497 training return: tensor(-421.9120, device='cuda:0')
episode: 498 training return: tensor(-455.3422, device='cuda:0')
episode: 499 training return: tensor(-243.5745, device='cuda:0')
epoch: 125 test_true_pfm: 5422.088081617476 sim_pfm: -253.47325116437665
episode: 500 training return: tensor(-420.7803, device='cuda:0')
episode: 501 training return: tensor(-329.2810, device='cuda:0')
episode: 502 training return: tensor(-270.8773, device='cuda:0')
episode: 503 training return: tensor(-347.5224, device='cuda:0')
epoch: 126 test_true_pfm: 5521.5619621881315 sim_pfm: -301.26950337333255
episode: 504 training return: tensor(-324.4376, device='cuda:0')
episode: 505 training return: tensor(-358.1204, device='cuda:0')
episode: 506 training return: tensor(-306.3354, device='cuda:0')
episode: 507 training return: tensor(-321.8637, device='cuda:0')
epoch: 127 test_true_pfm: 5476.858787603173 sim_pfm: -275.54335608001566
episode: 508 training return: tensor(-424.2846, device='cuda:0')
episode: 509 training return: tensor(-434.5474, device='cuda:0')
episode: 510 training return: tensor(-378.1852, device='cuda:0')
episode: 511 training return: tensor(-344.5746, device='cuda:0')
epoch: 128 test_true_pfm: 5475.166555732074 sim_pfm: -297.90670130832586
episode: 512 training return: tensor(-367.4860, device='cuda:0')
episode: 513 training return: tensor(-297.0162, device='cuda:0')
episode: 514 training return: tensor(-429.4698, device='cuda:0')
episode: 515 training return: tensor(-411.5710, device='cuda:0')
epoch: 129 test_true_pfm: 5531.856624537817 sim_pfm: -290.7907647445293
episode: 516 training return: tensor(-327.0459, device='cuda:0')
episode: 517 training return: tensor(-270.0532, device='cuda:0')
episode: 518 training return: tensor(-417.1932, device='cuda:0')
episode: 519 training return: tensor(-301.4201, device='cuda:0')
epoch: 130 test_true_pfm: 5439.367266100952 sim_pfm: -282.42735985196003
episode: 520 training return: tensor(-395.4111, device='cuda:0')
episode: 521 training return: tensor(-331.9254, device='cuda:0')
episode: 522 training return: tensor(-341.4872, device='cuda:0')
episode: 523 training return: tensor(-276.7687, device='cuda:0')
epoch: 131 test_true_pfm: 4613.770184959188 sim_pfm: -232.58111416207007
episode: 524 training return: tensor(-392.8541, device='cuda:0')
episode: 525 training return: tensor(-349.4078, device='cuda:0')
episode: 526 training return: tensor(-287.4793, device='cuda:0')
episode: 527 training return: tensor(-286.2815, device='cuda:0')
epoch: 132 test_true_pfm: 5508.384266304373 sim_pfm: -271.3990373466416
episode: 528 training return: tensor(-365.4668, device='cuda:0')
episode: 529 training return: tensor(-388.2821, device='cuda:0')
episode: 530 training return: tensor(-320.9564, device='cuda:0')
episode: 531 training return: tensor(-417.0181, device='cuda:0')
epoch: 133 test_true_pfm: 5586.7592712622645 sim_pfm: -249.31560572163048
episode: 532 training return: tensor(-385.6721, device='cuda:0')
episode: 533 training return: tensor(-272.8940, device='cuda:0')
episode: 534 training return: tensor(-344.3871, device='cuda:0')
episode: 535 training return: tensor(-403.2340, device='cuda:0')
epoch: 134 test_true_pfm: 5537.650832873959 sim_pfm: -266.9516372679791
episode: 536 training return: tensor(-290.6743, device='cuda:0')
episode: 537 training return: tensor(-504.5938, device='cuda:0')
episode: 538 training return: tensor(-307.8201, device='cuda:0')
episode: 539 training return: tensor(-398.6896, device='cuda:0')
epoch: 135 test_true_pfm: 5670.400057614432 sim_pfm: -284.7975871961098
episode: 540 training return: tensor(-335.9709, device='cuda:0')
episode: 541 training return: tensor(-363.4458, device='cuda:0')
episode: 542 training return: tensor(-338.8214, device='cuda:0')
episode: 543 training return: tensor(-419.4602, device='cuda:0')
epoch: 136 test_true_pfm: 5497.182122355046 sim_pfm: -278.76809374288615
episode: 544 training return: tensor(-295.0382, device='cuda:0')
episode: 545 training return: tensor(-437.4001, device='cuda:0')
episode: 546 training return: tensor(-378.7990, device='cuda:0')
episode: 547 training return: tensor(-312.3857, device='cuda:0')
epoch: 137 test_true_pfm: 6202.033945209853 sim_pfm: -247.07471305147433
episode: 548 training return: tensor(-375.1685, device='cuda:0')
episode: 549 training return: tensor(-430.3251, device='cuda:0')
episode: 550 training return: tensor(-311.5089, device='cuda:0')
episode: 551 training return: tensor(-297.6119, device='cuda:0')
epoch: 138 test_true_pfm: 5457.4388047620905 sim_pfm: -239.68888255768493
episode: 552 training return: tensor(-341.0384, device='cuda:0')
episode: 553 training return: tensor(-251.7027, device='cuda:0')
episode: 554 training return: tensor(-172.9519, device='cuda:0')
episode: 555 training return: tensor(-353.3179, device='cuda:0')
epoch: 139 test_true_pfm: 5572.146862477679 sim_pfm: -335.7696206468293
episode: 556 training return: tensor(-456.5903, device='cuda:0')
episode: 557 training return: tensor(-366.3056, device='cuda:0')
episode: 558 training return: tensor(-391.9036, device='cuda:0')
episode: 559 training return: tensor(-377.4745, device='cuda:0')
epoch: 140 test_true_pfm: 5920.908447126145 sim_pfm: -236.36770774417286
episode: 560 training return: tensor(-325.1414, device='cuda:0')
episode: 561 training return: tensor(-438.6956, device='cuda:0')
episode: 562 training return: tensor(-282.3991, device='cuda:0')
episode: 563 training return: tensor(-292.6239, device='cuda:0')
epoch: 141 test_true_pfm: 5578.250478126164 sim_pfm: -262.15589437343687
episode: 564 training return: tensor(-377.8619, device='cuda:0')
episode: 565 training return: tensor(-393.8066, device='cuda:0')
episode: 566 training return: tensor(-390.3357, device='cuda:0')
episode: 567 training return: tensor(-387.1324, device='cuda:0')
epoch: 142 test_true_pfm: 5884.831060913158 sim_pfm: -211.91421261476353
episode: 568 training return: tensor(-342.5410, device='cuda:0')
episode: 569 training return: tensor(-403.8902, device='cuda:0')
episode: 570 training return: tensor(-402.1199, device='cuda:0')
episode: 571 training return: tensor(-320.3865, device='cuda:0')
epoch: 143 test_true_pfm: 5408.53577905556 sim_pfm: -249.19764190148757
episode: 572 training return: tensor(-401.6551, device='cuda:0')
episode: 573 training return: tensor(-346.2550, device='cuda:0')
episode: 574 training return: tensor(-300.9703, device='cuda:0')
episode: 575 training return: tensor(-399.4393, device='cuda:0')
epoch: 144 test_true_pfm: 5418.079477110718 sim_pfm: -254.54037740869293
episode: 576 training return: tensor(-349.8780, device='cuda:0')
episode: 577 training return: tensor(-293.1287, device='cuda:0')
episode: 578 training return: tensor(-317.8363, device='cuda:0')
episode: 579 training return: tensor(-402.7404, device='cuda:0')
epoch: 145 test_true_pfm: 5550.77398070105 sim_pfm: -277.5682529316982
episode: 580 training return: tensor(-362.6498, device='cuda:0')
episode: 581 training return: tensor(-402.9689, device='cuda:0')
episode: 582 training return: tensor(-450.4289, device='cuda:0')
episode: 583 training return: tensor(-399.7004, device='cuda:0')
epoch: 146 test_true_pfm: 5867.109095042576 sim_pfm: -288.116659325208
episode: 584 training return: tensor(-349.6694, device='cuda:0')
episode: 585 training return: tensor(-369.0227, device='cuda:0')
episode: 586 training return: tensor(-361.0675, device='cuda:0')
episode: 587 training return: tensor(-323.7098, device='cuda:0')
epoch: 147 test_true_pfm: 5531.638912272337 sim_pfm: -274.00560828854213
episode: 588 training return: tensor(-311.2418, device='cuda:0')
episode: 589 training return: tensor(-367.9085, device='cuda:0')
episode: 590 training return: tensor(-352.8555, device='cuda:0')
episode: 591 training return: tensor(-333.1107, device='cuda:0')
epoch: 148 test_true_pfm: 5484.414359524834 sim_pfm: -227.93630517622418
episode: 592 training return: tensor(-324.2443, device='cuda:0')
episode: 593 training return: tensor(-313.4442, device='cuda:0')
episode: 594 training return: tensor(-436.3985, device='cuda:0')
episode: 595 training return: tensor(-368.7926, device='cuda:0')
epoch: 149 test_true_pfm: 5501.579694101639 sim_pfm: -249.49116393846148
episode: 596 training return: tensor(-317.2656, device='cuda:0')
episode: 597 training return: tensor(-341.4927, device='cuda:0')
episode: 598 training return: tensor(-328.3697, device='cuda:0')
episode: 599 training return: tensor(-296.9123, device='cuda:0')
epoch: 150 test_true_pfm: 5785.812186844745 sim_pfm: -236.46123857346052
