['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'behavior', '--traj', 'medium', '--seed', '3']
epoch: 0 training_loss 0.31609324008226397 test_loss: 0.21973295211791993
epoch: 1 training_loss 0.1755328107625246 test_loss: 0.19944754838943482
epoch: 2 training_loss 0.15789143566042185 test_loss: 0.16182440519332886
epoch: 3 training_loss 0.13005524314939976 test_loss: 0.13285324573516846
epoch: 4 training_loss 0.13769189562648534 test_loss: 0.14285449981689452
epoch: 5 training_loss 0.12348290752619505 test_loss: 0.11878113746643067
epoch: 6 training_loss 0.12383343603461981 test_loss: 0.12932459115982056
epoch: 7 training_loss 0.11496396580711007 test_loss: 0.1128819465637207
epoch: 8 training_loss 0.11538602311164141 test_loss: 0.1201087236404419
epoch: 9 training_loss 0.12129341784864664 test_loss: 0.11329710483551025
epoch: 10 training_loss 0.11253748094663024 test_loss: 0.12153918743133545
epoch: 11 training_loss 0.11455460058525205 test_loss: 0.11507985591888428
epoch: 12 training_loss 0.11476479433476924 test_loss: 0.12211395502090454
epoch: 13 training_loss 0.11547964468598365 test_loss: 0.12097984552383423
epoch: 14 training_loss 0.10786777604371309 test_loss: 0.1219088077545166
epoch: 15 training_loss 0.10967775020748377 test_loss: 0.1137159824371338
epoch: 16 training_loss 0.11596161123365163 test_loss: 0.11421694755554199
epoch: 17 training_loss 0.11085327204316854 test_loss: 0.11387546062469482
epoch: 18 training_loss 0.11303774589672684 test_loss: 0.10701727867126465
epoch: 19 training_loss 0.11182133011519908 test_loss: 0.10741983652114868
epoch: 20 training_loss 0.10628037340939045 test_loss: 0.10609312057495117
epoch: 21 training_loss 0.10499763913452625 test_loss: 0.10035481452941894
epoch: 22 training_loss 0.11406238365918397 test_loss: 0.1367538809776306
epoch: 23 training_loss 0.10933591663837433 test_loss: 0.1109521746635437
epoch: 24 training_loss 0.11969586351886391 test_loss: 0.11346107721328735
epoch: 25 training_loss 0.10546681575477124 test_loss: 0.10002697706222534
epoch: 26 training_loss 0.10876095920801163 test_loss: 0.11965103149414062
epoch: 27 training_loss 0.10712079782038927 test_loss: 0.098035329580307
epoch: 28 training_loss 0.1084440970979631 test_loss: 0.12779144048690796
epoch: 29 training_loss 0.11125313736498356 test_loss: 0.09766188859939576
epoch: 30 training_loss 0.11178731251507998 test_loss: 0.11003675460815429
epoch: 31 training_loss 0.1162631668522954 test_loss: 0.11986736059188843
epoch: 32 training_loss 0.10483534824103118 test_loss: 0.113880455493927
epoch: 33 training_loss 0.10649273527786135 test_loss: 0.09555898308753967
epoch: 34 training_loss 0.11176573932170868 test_loss: 0.11042746305465698
epoch: 35 training_loss 0.10361578125506639 test_loss: 0.12285046577453614
epoch: 36 training_loss 0.106802011243999 test_loss: 0.11503124237060547
epoch: 37 training_loss 0.10913141705095768 test_loss: 0.11015528440475464
epoch: 38 training_loss 0.10528803426772355 test_loss: 0.11373205184936523
epoch: 39 training_loss 0.11081329833716154 test_loss: 0.11761974096298218
epoch: 40 training_loss 0.10489096645265818 test_loss: 0.10828996896743774
epoch: 41 training_loss 0.10701133305206895 test_loss: 0.12634376287460328
epoch: 42 training_loss 0.106205068025738 test_loss: 0.1180687427520752
epoch: 43 training_loss 0.10808268178254365 test_loss: 0.12340919971466065
epoch: 44 training_loss 0.10790864728391171 test_loss: 0.1058726191520691
epoch: 45 training_loss 0.10701000399887561 test_loss: 0.12646176815032958
epoch: 46 training_loss 0.10461980246007442 test_loss: 0.1113247275352478
epoch: 47 training_loss 0.10677945610135793 test_loss: 0.12566096782684327
epoch: 48 training_loss 0.10584862180054187 test_loss: 0.1140708565711975
epoch: 49 training_loss 0.10094179216772318 test_loss: 0.10311063528060913
epoch: 50 training_loss 0.10528559938073158 test_loss: 0.10364046096801757
epoch: 51 training_loss 0.10663283858448266 test_loss: 0.10806370973587036
epoch: 52 training_loss 0.10710085570812225 test_loss: 0.11364805698394775
epoch: 53 training_loss 0.10017395600676536 test_loss: 0.11840474605560303
epoch: 54 training_loss 0.10043442515656352 test_loss: 0.10420305728912353
epoch: 55 training_loss 0.10106892086565494 test_loss: 0.1258711338043213
epoch: 56 training_loss 0.10275890230201185 test_loss: 0.11629072427749634
epoch: 57 training_loss 0.10793129548430443 test_loss: 0.1232805609703064
epoch: 58 training_loss 0.1006354634463787 test_loss: 0.12559641599655152
epoch: 59 training_loss 0.10951030358672142 test_loss: 0.12495814561843872
epoch: 60 training_loss 0.10332737274467946 test_loss: 0.11340198516845704
epoch: 61 training_loss 0.10832243029028177 test_loss: 0.09677848219871521
epoch: 62 training_loss 0.10080147009342909 test_loss: 0.10513358116149903
epoch: 63 training_loss 0.10235435724258422 test_loss: 0.09399355053901673
epoch: 64 training_loss 0.10513500045984983 test_loss: 0.12365715503692627
epoch: 65 training_loss 0.11028480861335993 test_loss: 0.12460912466049194
epoch: 66 training_loss 0.10276547625660896 test_loss: 0.1244060754776001
epoch: 67 training_loss 0.11663935799151659 test_loss: 0.10589693784713745
epoch: 68 training_loss 0.10467760574072599 test_loss: 0.1135301947593689
epoch: 69 training_loss 0.095171436406672 test_loss: 0.11186417341232299
epoch: 70 training_loss 0.10368249986320734 test_loss: 0.11524499654769897
epoch: 71 training_loss 0.10651546690613031 test_loss: 0.1143541693687439
epoch: 72 training_loss 0.1037450798228383 test_loss: 0.1166757583618164
epoch: 73 training_loss 0.1051870073005557 test_loss: 0.1112654447555542
epoch: 74 training_loss 0.09893409367650748 test_loss: 0.12700494527816772
epoch: 75 training_loss 0.10109615758061409 test_loss: 0.1114002823829651
epoch: 76 training_loss 0.09881877269595861 test_loss: 0.09778590202331543
epoch: 77 training_loss 0.10436794571578503 test_loss: 0.13830639123916627
epoch: 78 training_loss 0.10665001571178437 test_loss: 0.10205527544021606
epoch: 79 training_loss 0.10707813188433647 test_loss: 0.11138225793838501
epoch: 80 training_loss 0.10594564843922853 test_loss: 0.10457512140274047
epoch: 81 training_loss 0.1064229086600244 test_loss: 0.11022725105285644
epoch: 82 training_loss 0.10395524276420474 test_loss: 0.12645351886749268
epoch: 83 training_loss 0.10080174895003438 test_loss: 0.10404734611511231
epoch: 84 training_loss 0.10626082986593247 test_loss: 0.12012314796447754
epoch: 85 training_loss 0.10547829620540142 test_loss: 0.10743457078933716
epoch: 86 training_loss 0.09517034545540809 test_loss: 0.09968806505203247
epoch: 87 training_loss 0.1040096565335989 test_loss: 0.09905723929405212
epoch: 88 training_loss 0.1101995668746531 test_loss: 0.11093279123306274
epoch: 89 training_loss 0.10686503212898969 test_loss: 0.1018712878227234
epoch: 90 training_loss 0.1033074539899826 test_loss: 0.11078813076019287
epoch: 91 training_loss 0.09742780076339841 test_loss: 0.11284785270690918
epoch: 92 training_loss 0.10539719074964524 test_loss: 0.12481486797332764
epoch: 93 training_loss 0.11261976029723883 test_loss: 0.10443086624145508
epoch: 94 training_loss 0.11045405827462673 test_loss: 0.11227514743804931
epoch: 95 training_loss 0.10507456183433533 test_loss: 0.1158592939376831
epoch: 96 training_loss 0.10946817345917224 test_loss: 0.11357326507568359
epoch: 97 training_loss 0.10407152865082026 test_loss: 0.11758273839950562
epoch: 98 training_loss 0.10914451148360968 test_loss: 0.11166526079177856
epoch: 99 training_loss 0.1004620186984539 test_loss: 0.09095266461372375
epoch: 100 training_loss 0.1054555549286306 test_loss: 0.1029464840888977
epoch: 101 training_loss 0.10550411421805621 test_loss: 0.10843654870986938
epoch: 102 training_loss 0.10287094615399837 test_loss: 0.09895591139793396
epoch: 103 training_loss 0.10419229397550225 test_loss: 0.10790578126907349
epoch: 104 training_loss 0.11163934633135796 test_loss: 0.11497509479522705
epoch: 105 training_loss 0.10512145165354013 test_loss: 0.09539265036582947
epoch: 106 training_loss 0.10808419624343514 test_loss: 0.11370183229446411
epoch: 107 training_loss 0.10971431817859412 test_loss: 0.11664173603057862
epoch: 108 training_loss 0.10228127190843224 test_loss: 0.11135703325271606
epoch: 109 training_loss 0.09585379698313773 test_loss: 0.10592856407165527
epoch: 110 training_loss 0.0984437170252204 test_loss: 0.10859599113464355
epoch: 111 training_loss 0.1110996238514781 test_loss: 0.12749018669128417
epoch: 112 training_loss 0.10415597032755614 test_loss: 0.12244313955307007
epoch: 113 training_loss 0.10155451234430074 test_loss: 0.12260581254959106
epoch: 114 training_loss 0.10913795325905085 test_loss: 0.09555675983428955
epoch: 115 training_loss 0.10737260889261961 test_loss: 0.10894380807876587
epoch: 116 training_loss 0.10997490670531988 test_loss: 0.10320321321487427
epoch: 117 training_loss 0.09747476402670145 test_loss: 0.10824673175811768
epoch: 118 training_loss 0.09583573658019304 test_loss: 0.10509939193725586
epoch: 119 training_loss 0.10629848297685385 test_loss: 0.09699054956436157
epoch: 120 training_loss 0.10288116479292513 test_loss: 0.12680212259292603
epoch: 121 training_loss 0.10089131742715836 test_loss: 0.10988943576812744
epoch: 122 training_loss 0.10590582529082894 test_loss: 0.10390764474868774
epoch: 123 training_loss 0.10015328258275985 test_loss: 0.10898523330688477
epoch: 124 training_loss 0.10597681604325772 test_loss: 0.11120761632919311
epoch: 125 training_loss 0.10897956948727369 test_loss: 0.09926162362098694
epoch: 126 training_loss 0.10994043604470789 test_loss: 0.11724507808685303
epoch: 127 training_loss 0.10022909872233868 test_loss: 0.11120328903198243
epoch: 128 training_loss 0.10626498306170105 test_loss: 0.11866632699966431
epoch: 129 training_loss 0.10314482908695936 test_loss: 0.1128059983253479
epoch: 130 training_loss 0.10151100337505341 test_loss: 0.08792614340782165
epoch: 131 training_loss 0.10082434181123973 test_loss: 0.11067391633987426
epoch: 132 training_loss 0.10068737057968974 test_loss: 0.10138539075851441
epoch: 133 training_loss 0.10633140679448844 test_loss: 0.11607073545455933
epoch: 134 training_loss 0.09766243997961283 test_loss: 0.0928402602672577
epoch: 135 training_loss 0.10172630434855819 test_loss: 0.1140473484992981
epoch: 136 training_loss 0.10067642614245415 test_loss: 0.1147884726524353
epoch: 137 training_loss 0.10889562677592039 test_loss: 0.1134947657585144
epoch: 138 training_loss 0.10154716188088059 test_loss: 0.10525184869766235
epoch: 139 training_loss 0.10386751156300306 test_loss: 0.11592305898666382
epoch: 140 training_loss 0.10276403615251184 test_loss: 0.10803097486495972
epoch: 141 training_loss 0.09800830271095037 test_loss: 0.10310484170913696
epoch: 142 training_loss 0.10345493914559484 test_loss: 0.09931899309158325
epoch: 143 training_loss 0.1022486450523138 test_loss: 0.10530258417129516
epoch: 144 training_loss 0.10781707096844911 test_loss: 0.10383663177490235
epoch: 145 training_loss 0.11268955029547215 test_loss: 0.09941009879112243
epoch: 146 training_loss 0.10406603895127774 test_loss: 0.11135011911392212
epoch: 147 training_loss 0.10330769181251526 test_loss: 0.11983428001403809
epoch: 148 training_loss 0.10888889458030462 test_loss: 0.10818620920181274
epoch: 149 training_loss 0.10008047934621572 test_loss: 0.1129196047782898
epoch: 0 training_loss 53.65118444442749 test_loss: 25.800674438476562
epoch: 1 training_loss 19.58901803970337 test_loss: 15.939393615722656
epoch: 2 training_loss 13.885833015441895 test_loss: 11.888162994384766
epoch: 3 training_loss 11.004941530227661 test_loss: 10.050373840332032
epoch: 4 training_loss 9.216738376617432 test_loss: 8.383025360107421
epoch: 5 training_loss 7.945341591835022 test_loss: 7.2102195739746096
epoch: 6 training_loss 6.788586115837097 test_loss: 6.4398353576660154
epoch: 7 training_loss 6.075786366462707 test_loss: 5.971397781372071
epoch: 8 training_loss 5.567786335945129 test_loss: 5.23425064086914
epoch: 9 training_loss 5.038600258827209 test_loss: 4.894466781616211
epoch: 10 training_loss 4.837288961410523 test_loss: 4.750145721435547
epoch: 11 training_loss 4.457794888019562 test_loss: 4.3635307312011715
epoch: 12 training_loss 4.353408160209656 test_loss: 4.1782073974609375
epoch: 13 training_loss 4.010313441753388 test_loss: 3.908184051513672
epoch: 14 training_loss 3.801245038509369 test_loss: 3.8294750213623048
epoch: 15 training_loss 3.672254123687744 test_loss: 3.6511154174804688
epoch: 16 training_loss 3.552083230018616 test_loss: 3.676702880859375
epoch: 17 training_loss 3.4251525354385377 test_loss: 3.3766380310058595
epoch: 18 training_loss 3.3400742506980894 test_loss: 3.3693073272705076
epoch: 19 training_loss 3.181802053451538 test_loss: 3.158473587036133
epoch: 20 training_loss 3.119514377117157 test_loss: 3.185007095336914
epoch: 21 training_loss 3.0285534644126892 test_loss: 3.216950607299805
epoch: 22 training_loss 3.0568297410011294 test_loss: 2.9675689697265626
epoch: 23 training_loss 2.996449284553528 test_loss: 2.969290351867676
epoch: 24 training_loss 2.8335003542900083 test_loss: 2.834521675109863
epoch: 25 training_loss 2.8027132153511047 test_loss: 2.701607513427734
epoch: 26 training_loss 2.712931525707245 test_loss: 2.7404497146606444
epoch: 27 training_loss 2.742619309425354 test_loss: 2.819980239868164
epoch: 28 training_loss 2.6771689653396606 test_loss: 2.5566780090332033
epoch: 29 training_loss 2.630603334903717 test_loss: 2.6733394622802735
epoch: 30 training_loss 2.5723282647132875 test_loss: 2.463188362121582
epoch: 31 training_loss 2.526007001399994 test_loss: 2.4712898254394533
epoch: 32 training_loss 2.4624331426620483 test_loss: 2.3554555892944338
epoch: 33 training_loss 2.531761074066162 test_loss: 2.4335086822509764
epoch: 34 training_loss 2.4728322315216062 test_loss: 2.3518798828125
epoch: 35 training_loss 2.3975785505771636 test_loss: 2.330866050720215
epoch: 36 training_loss 2.3625083816051484 test_loss: 2.273628807067871
epoch: 37 training_loss 2.341964980363846 test_loss: 2.336907386779785
epoch: 38 training_loss 2.284748604297638 test_loss: 2.361298370361328
epoch: 39 training_loss 2.2136161279678346 test_loss: 2.3090816497802735
epoch: 40 training_loss 2.221459360122681 test_loss: 2.209373664855957
epoch: 41 training_loss 2.249049115180969 test_loss: 2.1377166748046874
epoch: 42 training_loss 2.2147330236434937 test_loss: 2.197309684753418
epoch: 43 training_loss 2.2026916682720183 test_loss: 2.1926403045654297
epoch: 44 training_loss 2.147526276111603 test_loss: 2.1349313735961912
epoch: 45 training_loss 2.1182153177261354 test_loss: 2.110270309448242
epoch: 46 training_loss 2.1216149914264677 test_loss: 2.1379606246948244
epoch: 47 training_loss 2.1287246739864347 test_loss: 2.0968360900878906
epoch: 48 training_loss 2.103605017662048 test_loss: 2.017530822753906
epoch: 49 training_loss 2.1080142140388487 test_loss: 2.1185415267944334
epoch: 50 training_loss 2.029434109926224 test_loss: 1.9463560104370117
epoch: 51 training_loss 2.0515308141708375 test_loss: 1.988832664489746
epoch: 52 training_loss 2.029191370010376 test_loss: 1.9413091659545898
epoch: 53 training_loss 1.9593176233768463 test_loss: 1.9074493408203126
epoch: 54 training_loss 2.0014922726154327 test_loss: 1.9636659622192383
epoch: 55 training_loss 1.9466946578025819 test_loss: 1.9241193771362304
epoch: 56 training_loss 1.9089433681964874 test_loss: 1.9809904098510742
epoch: 57 training_loss 1.9495634424686432 test_loss: 1.9564929962158204
epoch: 58 training_loss 1.9153606748580934 test_loss: 1.9959421157836914
epoch: 59 training_loss 1.877557932138443 test_loss: 1.9069290161132812
epoch: 60 training_loss 1.8645391964912414 test_loss: 1.8960880279541015
epoch: 61 training_loss 1.8741059923171997 test_loss: 1.8378324508666992
epoch: 62 training_loss 1.8561163699626924 test_loss: 1.880010986328125
epoch: 63 training_loss 1.867540191411972 test_loss: 1.91220703125
epoch: 64 training_loss 1.8636434888839721 test_loss: 1.8595386505126954
epoch: 65 training_loss 1.8596570909023284 test_loss: 1.8222469329833983
epoch: 66 training_loss 1.8259349417686463 test_loss: 1.8260364532470703
epoch: 67 training_loss 1.808851671218872 test_loss: 1.818960189819336
epoch: 68 training_loss 1.8180099880695344 test_loss: 1.9136543273925781
epoch: 69 training_loss 1.7886450278759003 test_loss: 1.9331777572631836
epoch: 70 training_loss 1.805536046028137 test_loss: 1.8223112106323243
epoch: 71 training_loss 1.7638980746269226 test_loss: 1.7429380416870117
epoch: 72 training_loss 1.7744828736782075 test_loss: 1.7784185409545898
epoch: 73 training_loss 1.7810618948936463 test_loss: 1.7055818557739257
epoch: 74 training_loss 1.7955750823020935 test_loss: 1.7844673156738282
epoch: 75 training_loss 1.7365230309963227 test_loss: 1.7996572494506835
epoch: 76 training_loss 1.70513192653656 test_loss: 1.7348039627075196
epoch: 77 training_loss 1.7572232735157014 test_loss: 1.713031005859375
epoch: 78 training_loss 1.7209433972835542 test_loss: 1.758148193359375
epoch: 79 training_loss 1.704130562543869 test_loss: 1.7076412200927735
epoch: 80 training_loss 1.6846497690677642 test_loss: 1.7227426528930665
epoch: 81 training_loss 1.702801855802536 test_loss: 1.6532808303833009
epoch: 82 training_loss 1.683177934885025 test_loss: 1.7211969375610352
epoch: 83 training_loss 1.6951354885101317 test_loss: 1.7104385375976563
epoch: 84 training_loss 1.6789085495471954 test_loss: 1.6963203430175782
epoch: 85 training_loss 1.6902708637714385 test_loss: 1.6666797637939452
epoch: 86 training_loss 1.6967636835575104 test_loss: 1.7051847457885743
epoch: 87 training_loss 1.6871303689479829 test_loss: 1.6790023803710938
epoch: 88 training_loss 1.6881176567077636 test_loss: 1.7109081268310546
epoch: 89 training_loss 1.6369967782497405 test_loss: 1.6539209365844727
epoch: 90 training_loss 1.6553384923934937 test_loss: 1.6330322265625
epoch: 91 training_loss 1.6736054050922393 test_loss: 1.620263671875
epoch: 92 training_loss 1.6297374200820922 test_loss: 1.6104230880737305
epoch: 93 training_loss 1.6440752911567689 test_loss: 1.5855979919433594
epoch: 94 training_loss 1.6261487007141113 test_loss: 1.626578140258789
epoch: 95 training_loss 1.6402052772045135 test_loss: 1.686417579650879
epoch: 96 training_loss 1.6096890008449554 test_loss: 1.5798685073852539
epoch: 97 training_loss 1.6258557963371276 test_loss: 1.5378535270690918
epoch: 98 training_loss 1.584100581407547 test_loss: 1.67291259765625
epoch: 99 training_loss 1.6286409533023833 test_loss: 1.6710935592651368
epoch: 100 training_loss 1.5935917747020723 test_loss: 1.6191635131835938
epoch: 101 training_loss 1.618178162574768 test_loss: 1.6212850570678712
epoch: 102 training_loss 1.5867920744419097 test_loss: 1.6136306762695312
epoch: 103 training_loss 1.5917529726028443 test_loss: 1.5757376670837402
epoch: 104 training_loss 1.5891126894950867 test_loss: 1.6272470474243164
epoch: 105 training_loss 1.577337040901184 test_loss: 1.601845932006836
epoch: 106 training_loss 1.5839574599266053 test_loss: 1.5738334655761719
epoch: 107 training_loss 1.5535122954845428 test_loss: 1.595222282409668
epoch: 108 training_loss 1.5645835745334624 test_loss: 1.594538688659668
epoch: 109 training_loss 1.562076598405838 test_loss: 1.5489441871643066
epoch: 110 training_loss 1.5535569286346436 test_loss: 1.5729910850524902
epoch: 111 training_loss 1.55904687166214 test_loss: 1.6330440521240235
epoch: 112 training_loss 1.5629129111766815 test_loss: 1.5262166023254395
epoch: 113 training_loss 1.5346350598335265 test_loss: 1.534830093383789
epoch: 114 training_loss 1.5549540305137635 test_loss: 1.5434488296508788
epoch: 115 training_loss 1.554293622970581 test_loss: 1.5593979835510254
epoch: 116 training_loss 1.540046787261963 test_loss: 1.600564956665039
epoch: 117 training_loss 1.5604226851463319 test_loss: 1.5330777168273926
epoch: 118 training_loss 1.510136387348175 test_loss: 1.5291656494140624
epoch: 119 training_loss 1.539716168642044 test_loss: 1.5189127922058105
epoch: 120 training_loss 1.5271505475044251 test_loss: 1.5668371200561524
epoch: 121 training_loss 1.5229877638816833 test_loss: 1.5504889488220215
epoch: 122 training_loss 1.5008650851249694 test_loss: 1.5954629898071289
epoch: 123 training_loss 1.5268970441818237 test_loss: 1.5349193572998048
epoch: 124 training_loss 1.5366440391540528 test_loss: 1.495189380645752
epoch: 125 training_loss 1.510728464126587 test_loss: 1.4916543006896972
epoch: 126 training_loss 1.51417182803154 test_loss: 1.4924667358398438
epoch: 127 training_loss 1.5065509104728698 test_loss: 1.4849456787109374
epoch: 128 training_loss 1.5122993755340577 test_loss: 1.569267177581787
epoch: 129 training_loss 1.5071866559982299 test_loss: 1.4991153717041015
epoch: 130 training_loss 1.4802564918994903 test_loss: 1.4537569046020509
epoch: 131 training_loss 1.4974856984615326 test_loss: 1.5245279312133788
epoch: 132 training_loss 1.5007226848602295 test_loss: 1.4885634422302245
epoch: 133 training_loss 1.4914756691455842 test_loss: 1.4764908790588378
epoch: 134 training_loss 1.4986483824253083 test_loss: 1.503082275390625
epoch: 135 training_loss 1.4978644275665283 test_loss: 1.5217251777648926
epoch: 136 training_loss 1.491600205898285 test_loss: 1.4910558700561523
epoch: 137 training_loss 1.4786454021930695 test_loss: 1.4873332023620605
epoch: 138 training_loss 1.4960949170589446 test_loss: 1.4919553756713868
epoch: 139 training_loss 1.475148457288742 test_loss: 1.4362987518310546
epoch: 140 training_loss 1.4883541977405548 test_loss: 1.4736238479614259
epoch: 141 training_loss 1.4952247369289398 test_loss: 1.4878628730773926
epoch: 142 training_loss 1.4760048222541808 test_loss: 1.4539527893066406
epoch: 143 training_loss 1.4595578289031983 test_loss: 1.4925613403320312
epoch: 144 training_loss 1.4722070324420928 test_loss: 1.454012107849121
epoch: 145 training_loss 1.4697681164741516 test_loss: 1.4472307205200194
epoch: 146 training_loss 1.4527037823200226 test_loss: 1.4932687759399415
epoch: 147 training_loss 1.4659318339824676 test_loss: 1.4930703163146972
epoch: 148 training_loss 1.4551685667037964 test_loss: 1.4750035285949707
epoch: 149 training_loss 1.4655694115161895 test_loss: 1.440890407562256
5073.431923617903
episode: 0 training return: tensor(-12.8745, device='cuda:0')
episode: 1 training return: tensor(-107.4339, device='cuda:0')
episode: 2 training return: tensor(-37.4535, device='cuda:0')
episode: 3 training return: tensor(-4.6922, device='cuda:0')
epoch: 1 test_true_pfm: 5055.5042060871765 sim_pfm: -26.929614455691382
episode: 4 training return: tensor(-152.5265, device='cuda:0')
episode: 5 training return: tensor(-145.7722, device='cuda:0')
episode: 6 training return: tensor(-172.2258, device='cuda:0')
episode: 7 training return: tensor(-170.8200, device='cuda:0')
epoch: 2 test_true_pfm: 5092.81241280276 sim_pfm: -1.1371333013715532
episode: 8 training return: tensor(-257.9168, device='cuda:0')
episode: 9 training return: tensor(4.9208, device='cuda:0')
episode: 10 training return: tensor(56.0512, device='cuda:0')
episode: 11 training return: tensor(-142.0815, device='cuda:0')
epoch: 3 test_true_pfm: 5143.696085627263 sim_pfm: 21.218566235387698
episode: 12 training return: tensor(30.0883, device='cuda:0')
episode: 13 training return: tensor(-176.2061, device='cuda:0')
episode: 14 training return: tensor(-101.5687, device='cuda:0')
episode: 15 training return: tensor(27.4782, device='cuda:0')
epoch: 4 test_true_pfm: 5059.851323461074 sim_pfm: -25.701671495926956
episode: 16 training return: tensor(-126.3642, device='cuda:0')
episode: 17 training return: tensor(-133.0284, device='cuda:0')
episode: 18 training return: tensor(-40.4864, device='cuda:0')
episode: 19 training return: tensor(-99.4026, device='cuda:0')
epoch: 5 test_true_pfm: 4999.357801717448 sim_pfm: -32.881673203131264
episode: 20 training return: tensor(-150.7807, device='cuda:0')
episode: 21 training return: tensor(-62.5632, device='cuda:0')
episode: 22 training return: tensor(-126.5637, device='cuda:0')
episode: 23 training return: tensor(45.1617, device='cuda:0')
epoch: 6 test_true_pfm: 5040.70636892034 sim_pfm: 39.56385725728857
episode: 24 training return: tensor(-46.4092, device='cuda:0')
episode: 25 training return: tensor(-62.0417, device='cuda:0')
episode: 26 training return: tensor(4.5524, device='cuda:0')
episode: 27 training return: tensor(-127.1149, device='cuda:0')
epoch: 7 test_true_pfm: 5074.2982697365505 sim_pfm: -10.621965087756204
episode: 28 training return: tensor(-40.2579, device='cuda:0')
episode: 29 training return: tensor(-25.7059, device='cuda:0')
episode: 30 training return: tensor(-266.4517, device='cuda:0')
episode: 31 training return: tensor(-264.8990, device='cuda:0')
epoch: 8 test_true_pfm: 5101.2951647617365 sim_pfm: -96.13027784364142
episode: 32 training return: tensor(-85.1047, device='cuda:0')
episode: 33 training return: tensor(-158.0251, device='cuda:0')
episode: 34 training return: tensor(-28.6590, device='cuda:0')
episode: 35 training return: tensor(-82.5868, device='cuda:0')
epoch: 9 test_true_pfm: 5155.092589711406 sim_pfm: -33.37559098599013
episode: 36 training return: tensor(-6.3617, device='cuda:0')
episode: 37 training return: tensor(-40.0850, device='cuda:0')
episode: 38 training return: tensor(-112.9511, device='cuda:0')
episode: 39 training return: tensor(8.8364, device='cuda:0')
epoch: 10 test_true_pfm: 5221.797142636259 sim_pfm: 46.183886289276415
episode: 40 training return: tensor(-150.4676, device='cuda:0')
episode: 41 training return: tensor(4.1740, device='cuda:0')
episode: 42 training return: tensor(-73.3040, device='cuda:0')
episode: 43 training return: tensor(-105.5084, device='cuda:0')
epoch: 11 test_true_pfm: 5084.630396152185 sim_pfm: 71.71238057629671
episode: 44 training return: tensor(97.6550, device='cuda:0')
episode: 45 training return: tensor(-191.2463, device='cuda:0')
episode: 46 training return: tensor(-46.1088, device='cuda:0')
episode: 47 training return: tensor(72.2215, device='cuda:0')
epoch: 12 test_true_pfm: 5144.080480462287 sim_pfm: -7.199033349393479
episode: 48 training return: tensor(-80.0983, device='cuda:0')
episode: 49 training return: tensor(-15.5269, device='cuda:0')
episode: 50 training return: tensor(46.7848, device='cuda:0')
episode: 51 training return: tensor(23.8290, device='cuda:0')
epoch: 13 test_true_pfm: 5134.658930606768 sim_pfm: 106.19576812563658
episode: 52 training return: tensor(58.2054, device='cuda:0')
episode: 53 training return: tensor(47.9807, device='cuda:0')
episode: 54 training return: tensor(69.4862, device='cuda:0')
episode: 55 training return: tensor(149.3699, device='cuda:0')
epoch: 14 test_true_pfm: 5277.6784749994995 sim_pfm: 114.59658101439709
episode: 56 training return: tensor(6.9252, device='cuda:0')
episode: 57 training return: tensor(-47.5072, device='cuda:0')
episode: 58 training return: tensor(9.8740, device='cuda:0')
episode: 59 training return: tensor(27.3125, device='cuda:0')
epoch: 15 test_true_pfm: 5218.205044774307 sim_pfm: 115.20876538492546
episode: 60 training return: tensor(69.9300, device='cuda:0')
episode: 61 training return: tensor(-29.2734, device='cuda:0')
episode: 62 training return: tensor(6.7135, device='cuda:0')
episode: 63 training return: tensor(-50.1487, device='cuda:0')
epoch: 16 test_true_pfm: 5251.202954200912 sim_pfm: 42.602897732324585
episode: 64 training return: tensor(-163.4286, device='cuda:0')
episode: 65 training return: tensor(47.8943, device='cuda:0')
episode: 66 training return: tensor(46.2508, device='cuda:0')
episode: 67 training return: tensor(-43.5709, device='cuda:0')
epoch: 17 test_true_pfm: 5260.368953312692 sim_pfm: 162.97701260988833
episode: 68 training return: tensor(-148.4884, device='cuda:0')
episode: 69 training return: tensor(89.2933, device='cuda:0')
episode: 70 training return: tensor(-41.9650, device='cuda:0')
episode: 71 training return: tensor(-13.0259, device='cuda:0')
epoch: 18 test_true_pfm: 5317.760851931694 sim_pfm: 160.3138951382813
episode: 72 training return: tensor(57.9889, device='cuda:0')
episode: 73 training return: tensor(-44.9420, device='cuda:0')
episode: 74 training return: tensor(126.9455, device='cuda:0')
episode: 75 training return: tensor(-5.8837, device='cuda:0')
epoch: 19 test_true_pfm: 5237.27299750869 sim_pfm: 96.5589134375914
episode: 76 training return: tensor(57.0332, device='cuda:0')
episode: 77 training return: tensor(87.0390, device='cuda:0')
episode: 78 training return: tensor(97.2093, device='cuda:0')
episode: 79 training return: tensor(-40.4544, device='cuda:0')
epoch: 20 test_true_pfm: 5236.979310041758 sim_pfm: 44.59133994953785
episode: 80 training return: tensor(12.2092, device='cuda:0')
episode: 81 training return: tensor(85.0112, device='cuda:0')
episode: 82 training return: tensor(54.1596, device='cuda:0')
episode: 83 training return: tensor(-32.9911, device='cuda:0')
epoch: 21 test_true_pfm: 5283.449555867516 sim_pfm: 98.38843897232437
episode: 84 training return: tensor(-96.8602, device='cuda:0')
episode: 85 training return: tensor(50.1458, device='cuda:0')
episode: 86 training return: tensor(54.6990, device='cuda:0')
episode: 87 training return: tensor(-21.8301, device='cuda:0')
epoch: 22 test_true_pfm: 5318.888434264479 sim_pfm: 128.10366909535756
episode: 88 training return: tensor(-53.3147, device='cuda:0')
episode: 89 training return: tensor(-38.8663, device='cuda:0')
episode: 90 training return: tensor(102.1238, device='cuda:0')
episode: 91 training return: tensor(113.7454, device='cuda:0')
epoch: 23 test_true_pfm: 5215.361940908074 sim_pfm: 190.3847129730857
episode: 92 training return: tensor(88.7255, device='cuda:0')
episode: 93 training return: tensor(153.8257, device='cuda:0')
episode: 94 training return: tensor(-100.5184, device='cuda:0')
episode: 95 training return: tensor(-21.8704, device='cuda:0')
epoch: 24 test_true_pfm: 5336.703927468978 sim_pfm: 126.30597824021243
episode: 96 training return: tensor(147.0541, device='cuda:0')
episode: 97 training return: tensor(-25.4623, device='cuda:0')
episode: 98 training return: tensor(50.6694, device='cuda:0')
episode: 99 training return: tensor(-0.8730, device='cuda:0')
epoch: 25 test_true_pfm: 5319.029272978441 sim_pfm: 210.23176566325128
episode: 100 training return: tensor(53.4052, device='cuda:0')
episode: 101 training return: tensor(30.5665, device='cuda:0')
episode: 102 training return: tensor(97.8872, device='cuda:0')
episode: 103 training return: tensor(63.0480, device='cuda:0')
epoch: 26 test_true_pfm: 5333.456913544188 sim_pfm: 234.3796292680005
episode: 104 training return: tensor(-37.2587, device='cuda:0')
episode: 105 training return: tensor(105.4394, device='cuda:0')
episode: 106 training return: tensor(96.5993, device='cuda:0')
episode: 107 training return: tensor(12.2700, device='cuda:0')
epoch: 27 test_true_pfm: 5368.676835850261 sim_pfm: 135.65258852636907
episode: 108 training return: tensor(220.8651, device='cuda:0')
episode: 109 training return: tensor(232.1765, device='cuda:0')
episode: 110 training return: tensor(-33.5245, device='cuda:0')
episode: 111 training return: tensor(310.5981, device='cuda:0')
epoch: 28 test_true_pfm: 5439.7979580862275 sim_pfm: 126.72721029918951
episode: 112 training return: tensor(28.4946, device='cuda:0')
episode: 113 training return: tensor(7.4023, device='cuda:0')
episode: 114 training return: tensor(183.0814, device='cuda:0')
episode: 115 training return: tensor(113.6614, device='cuda:0')
epoch: 29 test_true_pfm: 5454.700358455039 sim_pfm: 212.67002016305923
episode: 116 training return: tensor(50.0025, device='cuda:0')
episode: 117 training return: tensor(167.4407, device='cuda:0')
episode: 118 training return: tensor(132.3259, device='cuda:0')
episode: 119 training return: tensor(172.5244, device='cuda:0')
epoch: 30 test_true_pfm: 5338.905527956497 sim_pfm: 224.57825926751443
episode: 120 training return: tensor(23.1048, device='cuda:0')
episode: 121 training return: tensor(54.6222, device='cuda:0')
episode: 122 training return: tensor(105.4936, device='cuda:0')
episode: 123 training return: tensor(254.4400, device='cuda:0')
epoch: 31 test_true_pfm: 5433.361408361742 sim_pfm: 178.33157568417178
episode: 124 training return: tensor(173.4662, device='cuda:0')
episode: 125 training return: tensor(144.4258, device='cuda:0')
episode: 126 training return: tensor(160.6681, device='cuda:0')
episode: 127 training return: tensor(25.0788, device='cuda:0')
epoch: 32 test_true_pfm: 5431.487621226448 sim_pfm: 203.78216349103604
episode: 128 training return: tensor(233.6183, device='cuda:0')
episode: 129 training return: tensor(156.2521, device='cuda:0')
episode: 130 training return: tensor(153.6455, device='cuda:0')
episode: 131 training return: tensor(196.1231, device='cuda:0')
epoch: 33 test_true_pfm: 5433.223110361261 sim_pfm: 126.72330095304642
episode: 132 training return: tensor(75.2740, device='cuda:0')
episode: 133 training return: tensor(25.3597, device='cuda:0')
episode: 134 training return: tensor(109.1126, device='cuda:0')
episode: 135 training return: tensor(206.1467, device='cuda:0')
epoch: 34 test_true_pfm: 5462.984561757646 sim_pfm: 239.0076506638628
episode: 136 training return: tensor(272.7345, device='cuda:0')
episode: 137 training return: tensor(150.2547, device='cuda:0')
episode: 138 training return: tensor(25.6278, device='cuda:0')
episode: 139 training return: tensor(184.2893, device='cuda:0')
epoch: 35 test_true_pfm: 5480.324596031889 sim_pfm: 172.3451039848927
episode: 140 training return: tensor(67.1037, device='cuda:0')
episode: 141 training return: tensor(38.6727, device='cuda:0')
episode: 142 training return: tensor(99.7751, device='cuda:0')
episode: 143 training return: tensor(18.9851, device='cuda:0')
epoch: 36 test_true_pfm: 5346.812196537507 sim_pfm: 151.13823945813542
episode: 144 training return: tensor(162.1911, device='cuda:0')
episode: 145 training return: tensor(116.1760, device='cuda:0')
episode: 146 training return: tensor(66.4374, device='cuda:0')
episode: 147 training return: tensor(-54.9749, device='cuda:0')
epoch: 37 test_true_pfm: 5371.4121925354675 sim_pfm: 227.23697047903747
episode: 148 training return: tensor(258.0128, device='cuda:0')
episode: 149 training return: tensor(188.7247, device='cuda:0')
episode: 150 training return: tensor(149.1676, device='cuda:0')
episode: 151 training return: tensor(64.5962, device='cuda:0')
epoch: 38 test_true_pfm: 5375.163505744779 sim_pfm: 108.27153291396098
episode: 152 training return: tensor(69.2936, device='cuda:0')
episode: 153 training return: tensor(-14.9316, device='cuda:0')
episode: 154 training return: tensor(41.1894, device='cuda:0')
episode: 155 training return: tensor(115.3676, device='cuda:0')
epoch: 39 test_true_pfm: 5458.781169900355 sim_pfm: 199.1755200739523
episode: 156 training return: tensor(242.5744, device='cuda:0')
episode: 157 training return: tensor(118.1388, device='cuda:0')
episode: 158 training return: tensor(173.4123, device='cuda:0')
episode: 159 training return: tensor(129.9195, device='cuda:0')
epoch: 40 test_true_pfm: 5326.1027647823485 sim_pfm: 224.5909580924296
episode: 160 training return: tensor(126.4693, device='cuda:0')
episode: 161 training return: tensor(240.7381, device='cuda:0')
episode: 162 training return: tensor(133.0274, device='cuda:0')
episode: 163 training return: tensor(285.8220, device='cuda:0')
epoch: 41 test_true_pfm: 5397.457425801682 sim_pfm: 221.30317766481312
episode: 164 training return: tensor(196.3371, device='cuda:0')
episode: 165 training return: tensor(73.7514, device='cuda:0')
episode: 166 training return: tensor(98.7405, device='cuda:0')
episode: 167 training return: tensor(155.2922, device='cuda:0')
epoch: 42 test_true_pfm: 5386.4756542359655 sim_pfm: 248.46918580370644
episode: 168 training return: tensor(148.3831, device='cuda:0')
episode: 169 training return: tensor(147.4373, device='cuda:0')
episode: 170 training return: tensor(39.2912, device='cuda:0')
episode: 171 training return: tensor(208.2575, device='cuda:0')
epoch: 43 test_true_pfm: 5502.815186311617 sim_pfm: 180.2986957950343
episode: 172 training return: tensor(72.9714, device='cuda:0')
episode: 173 training return: tensor(12.5605, device='cuda:0')
episode: 174 training return: tensor(139.4939, device='cuda:0')
episode: 175 training return: tensor(47.6901, device='cuda:0')
epoch: 44 test_true_pfm: 5411.946605232814 sim_pfm: 229.82034571873373
episode: 176 training return: tensor(68.5087, device='cuda:0')
episode: 177 training return: tensor(248.7286, device='cuda:0')
episode: 178 training return: tensor(188.4775, device='cuda:0')
episode: 179 training return: tensor(221.2602, device='cuda:0')
epoch: 45 test_true_pfm: 5374.591515065668 sim_pfm: 280.2628270784335
episode: 180 training return: tensor(148.6484, device='cuda:0')
episode: 181 training return: tensor(115.2746, device='cuda:0')
episode: 182 training return: tensor(21.5482, device='cuda:0')
episode: 183 training return: tensor(70.0242, device='cuda:0')
epoch: 46 test_true_pfm: 5502.3747148276025 sim_pfm: 315.6548554174369
episode: 184 training return: tensor(101.8414, device='cuda:0')
episode: 185 training return: tensor(197.5403, device='cuda:0')
episode: 186 training return: tensor(40.1095, device='cuda:0')
episode: 187 training return: tensor(185.6987, device='cuda:0')
epoch: 47 test_true_pfm: 5479.120770144888 sim_pfm: 292.09666122395237
episode: 188 training return: tensor(157.7737, device='cuda:0')
episode: 189 training return: tensor(186.6637, device='cuda:0')
episode: 190 training return: tensor(185.6660, device='cuda:0')
episode: 191 training return: tensor(152.4772, device='cuda:0')
epoch: 48 test_true_pfm: 5435.105253711371 sim_pfm: 205.90496283072085
episode: 192 training return: tensor(137.1710, device='cuda:0')
episode: 193 training return: tensor(208.3622, device='cuda:0')
episode: 194 training return: tensor(111.8754, device='cuda:0')
episode: 195 training return: tensor(195.4203, device='cuda:0')
epoch: 49 test_true_pfm: 5449.160309281216 sim_pfm: 254.09304238923747
episode: 196 training return: tensor(-1.8470, device='cuda:0')
episode: 197 training return: tensor(146.6379, device='cuda:0')
episode: 198 training return: tensor(135.2011, device='cuda:0')
episode: 199 training return: tensor(205.7964, device='cuda:0')
epoch: 50 test_true_pfm: 5411.6661342949565 sim_pfm: 307.2705480219253
episode: 200 training return: tensor(58.4209, device='cuda:0')
episode: 201 training return: tensor(138.7596, device='cuda:0')
episode: 202 training return: tensor(106.5562, device='cuda:0')
episode: 203 training return: tensor(200.3931, device='cuda:0')
epoch: 51 test_true_pfm: 5600.432135924205 sim_pfm: 272.5705821167212
episode: 204 training return: tensor(176.3944, device='cuda:0')
episode: 205 training return: tensor(128.2306, device='cuda:0')
episode: 206 training return: tensor(95.6148, device='cuda:0')
episode: 207 training return: tensor(24.4498, device='cuda:0')
epoch: 52 test_true_pfm: 5460.176962878173 sim_pfm: 205.36903018481098
episode: 208 training return: tensor(140.3688, device='cuda:0')
episode: 209 training return: tensor(131.5599, device='cuda:0')
episode: 210 training return: tensor(143.3812, device='cuda:0')
episode: 211 training return: tensor(253.0078, device='cuda:0')
epoch: 53 test_true_pfm: 5511.389047564017 sim_pfm: 289.57268250271835
episode: 212 training return: tensor(172.5960, device='cuda:0')
episode: 213 training return: tensor(205.6324, device='cuda:0')
episode: 214 training return: tensor(225.6033, device='cuda:0')
episode: 215 training return: tensor(148.7702, device='cuda:0')
epoch: 54 test_true_pfm: 5526.329616371647 sim_pfm: 221.13345968678672
episode: 216 training return: tensor(82.3239, device='cuda:0')
episode: 217 training return: tensor(128.4844, device='cuda:0')
episode: 218 training return: tensor(173.4307, device='cuda:0')
episode: 219 training return: tensor(175.1229, device='cuda:0')
epoch: 55 test_true_pfm: 5536.130612732719 sim_pfm: 260.7587123788544
episode: 220 training return: tensor(208.7083, device='cuda:0')
episode: 221 training return: tensor(121.2715, device='cuda:0')
episode: 222 training return: tensor(227.6430, device='cuda:0')
episode: 223 training return: tensor(176.8753, device='cuda:0')
epoch: 56 test_true_pfm: 5494.587067040138 sim_pfm: 216.71810583401626
episode: 224 training return: tensor(161.3141, device='cuda:0')
episode: 225 training return: tensor(189.2707, device='cuda:0')
episode: 226 training return: tensor(122.7041, device='cuda:0')
episode: 227 training return: tensor(277.5040, device='cuda:0')
epoch: 57 test_true_pfm: 5581.503368571614 sim_pfm: 294.17789173298905
episode: 228 training return: tensor(73.5803, device='cuda:0')
episode: 229 training return: tensor(172.9617, device='cuda:0')
episode: 230 training return: tensor(226.4593, device='cuda:0')
episode: 231 training return: tensor(236.2897, device='cuda:0')
epoch: 58 test_true_pfm: 5534.65158129202 sim_pfm: 268.4497156880777
episode: 232 training return: tensor(239.9225, device='cuda:0')
episode: 233 training return: tensor(120.1931, device='cuda:0')
episode: 234 training return: tensor(23.5969, device='cuda:0')
episode: 235 training return: tensor(223.9561, device='cuda:0')
epoch: 59 test_true_pfm: 5462.963271581865 sim_pfm: 336.63271034434246
episode: 236 training return: tensor(197.2706, device='cuda:0')
episode: 237 training return: tensor(134.4102, device='cuda:0')
episode: 238 training return: tensor(167.5888, device='cuda:0')
episode: 239 training return: tensor(98.4560, device='cuda:0')
epoch: 60 test_true_pfm: 5452.564484697311 sim_pfm: 278.7347811118428
episode: 240 training return: tensor(239.2295, device='cuda:0')
episode: 241 training return: tensor(170.2739, device='cuda:0')
episode: 242 training return: tensor(292.4446, device='cuda:0')
episode: 243 training return: tensor(137.4583, device='cuda:0')
epoch: 61 test_true_pfm: 5627.641922377775 sim_pfm: 291.5594840653551
episode: 244 training return: tensor(143.6507, device='cuda:0')
episode: 245 training return: tensor(234.9797, device='cuda:0')
episode: 246 training return: tensor(256.9450, device='cuda:0')
episode: 247 training return: tensor(198.9565, device='cuda:0')
epoch: 62 test_true_pfm: 5585.865145866138 sim_pfm: 297.62152119219536
episode: 248 training return: tensor(146.1431, device='cuda:0')
episode: 249 training return: tensor(152.8651, device='cuda:0')
episode: 250 training return: tensor(226.2104, device='cuda:0')
episode: 251 training return: tensor(117.1796, device='cuda:0')
epoch: 63 test_true_pfm: 5560.717822678685 sim_pfm: 299.0546477711371
episode: 252 training return: tensor(240.1856, device='cuda:0')
episode: 253 training return: tensor(52.9182, device='cuda:0')
episode: 254 training return: tensor(148.0455, device='cuda:0')
episode: 255 training return: tensor(152.4056, device='cuda:0')
epoch: 64 test_true_pfm: 5534.165869264961 sim_pfm: 318.4219424347393
episode: 256 training return: tensor(258.3701, device='cuda:0')
episode: 257 training return: tensor(138.2311, device='cuda:0')
episode: 258 training return: tensor(172.6606, device='cuda:0')
episode: 259 training return: tensor(189.8330, device='cuda:0')
epoch: 65 test_true_pfm: 5529.334488703052 sim_pfm: 316.653315171774
episode: 260 training return: tensor(303.1893, device='cuda:0')
episode: 261 training return: tensor(251.1552, device='cuda:0')
episode: 262 training return: tensor(211.2644, device='cuda:0')
episode: 263 training return: tensor(162.6492, device='cuda:0')
epoch: 66 test_true_pfm: 5560.257785374753 sim_pfm: 333.605316261887
episode: 264 training return: tensor(255.7770, device='cuda:0')
episode: 265 training return: tensor(321.3540, device='cuda:0')
episode: 266 training return: tensor(211.4899, device='cuda:0')
episode: 267 training return: tensor(94.6056, device='cuda:0')
epoch: 67 test_true_pfm: 5504.6981180886 sim_pfm: 288.4721098187729
episode: 268 training return: tensor(271.2926, device='cuda:0')
episode: 269 training return: tensor(181.4777, device='cuda:0')
episode: 270 training return: tensor(352.5619, device='cuda:0')
episode: 271 training return: tensor(273.7185, device='cuda:0')
epoch: 68 test_true_pfm: 5528.711596749065 sim_pfm: 327.8438991710427
episode: 272 training return: tensor(270.3896, device='cuda:0')
episode: 273 training return: tensor(161.1125, device='cuda:0')
episode: 274 training return: tensor(191.4950, device='cuda:0')
episode: 275 training return: tensor(267.6318, device='cuda:0')
epoch: 69 test_true_pfm: 5552.197532825063 sim_pfm: 355.6816493629788
episode: 276 training return: tensor(62.5113, device='cuda:0')
episode: 277 training return: tensor(229.8995, device='cuda:0')
episode: 278 training return: tensor(233.8998, device='cuda:0')
episode: 279 training return: tensor(195.7232, device='cuda:0')
epoch: 70 test_true_pfm: 5616.209372238962 sim_pfm: 289.4430139681402
episode: 280 training return: tensor(191.3010, device='cuda:0')
episode: 281 training return: tensor(184.3214, device='cuda:0')
episode: 282 training return: tensor(291.1644, device='cuda:0')
episode: 283 training return: tensor(192.2552, device='cuda:0')
epoch: 71 test_true_pfm: 5465.433337847979 sim_pfm: 363.6055314539117
episode: 284 training return: tensor(135.2689, device='cuda:0')
episode: 285 training return: tensor(220.1932, device='cuda:0')
episode: 286 training return: tensor(214.6948, device='cuda:0')
episode: 287 training return: tensor(215.8036, device='cuda:0')
epoch: 72 test_true_pfm: 5528.132705610985 sim_pfm: 301.9527449773159
episode: 288 training return: tensor(270.4605, device='cuda:0')
episode: 289 training return: tensor(170.9218, device='cuda:0')
episode: 290 training return: tensor(330.4735, device='cuda:0')
episode: 291 training return: tensor(233.1963, device='cuda:0')
epoch: 73 test_true_pfm: 5425.082922749935 sim_pfm: 243.1711063531693
episode: 292 training return: tensor(207.0297, device='cuda:0')
episode: 293 training return: tensor(217.8217, device='cuda:0')
episode: 294 training return: tensor(171.2907, device='cuda:0')
episode: 295 training return: tensor(194.0337, device='cuda:0')
epoch: 74 test_true_pfm: 5497.296428607112 sim_pfm: 304.2374975346805
episode: 296 training return: tensor(195.6125, device='cuda:0')
episode: 297 training return: tensor(161.3558, device='cuda:0')
episode: 298 training return: tensor(237.6277, device='cuda:0')
episode: 299 training return: tensor(217.8634, device='cuda:0')
epoch: 75 test_true_pfm: 5554.009690954379 sim_pfm: 202.9676981680192
episode: 300 training return: tensor(201.7433, device='cuda:0')
episode: 301 training return: tensor(153.6523, device='cuda:0')
episode: 302 training return: tensor(262.0403, device='cuda:0')
episode: 303 training return: tensor(197.6039, device='cuda:0')
epoch: 76 test_true_pfm: 5604.827828566241 sim_pfm: 301.86115003504284
episode: 304 training return: tensor(178.9079, device='cuda:0')
episode: 305 training return: tensor(292.7549, device='cuda:0')
episode: 306 training return: tensor(243.5585, device='cuda:0')
episode: 307 training return: tensor(214.1235, device='cuda:0')
epoch: 77 test_true_pfm: 5574.191582144132 sim_pfm: 264.66941863502143
episode: 308 training return: tensor(332.0706, device='cuda:0')
episode: 309 training return: tensor(129.3290, device='cuda:0')
episode: 310 training return: tensor(249.5073, device='cuda:0')
episode: 311 training return: tensor(227.2464, device='cuda:0')
epoch: 78 test_true_pfm: 5615.871766011561 sim_pfm: 312.96914144518087
episode: 312 training return: tensor(240.3539, device='cuda:0')
episode: 313 training return: tensor(203.4723, device='cuda:0')
episode: 314 training return: tensor(296.0843, device='cuda:0')
episode: 315 training return: tensor(164.5834, device='cuda:0')
epoch: 79 test_true_pfm: 5535.681238354581 sim_pfm: 396.83806282722315
episode: 316 training return: tensor(240.9224, device='cuda:0')
episode: 317 training return: tensor(196.3092, device='cuda:0')
episode: 318 training return: tensor(274.1406, device='cuda:0')
episode: 319 training return: tensor(-541.2648, device='cuda:0')
epoch: 80 test_true_pfm: 5609.971438596989 sim_pfm: 297.9901460194572
episode: 320 training return: tensor(318.2706, device='cuda:0')
episode: 321 training return: tensor(178.5752, device='cuda:0')
episode: 322 training return: tensor(144.8492, device='cuda:0')
episode: 323 training return: tensor(175.9592, device='cuda:0')
epoch: 81 test_true_pfm: 5617.067221566628 sim_pfm: 324.93810260877945
episode: 324 training return: tensor(155.2466, device='cuda:0')
episode: 325 training return: tensor(236.9356, device='cuda:0')
episode: 326 training return: tensor(229.1108, device='cuda:0')
episode: 327 training return: tensor(204.3459, device='cuda:0')
epoch: 82 test_true_pfm: 5664.654820154122 sim_pfm: 368.7576480390562
episode: 328 training return: tensor(216.0100, device='cuda:0')
episode: 329 training return: tensor(235.1732, device='cuda:0')
episode: 330 training return: tensor(208.7225, device='cuda:0')
episode: 331 training return: tensor(172.6908, device='cuda:0')
epoch: 83 test_true_pfm: 5493.933869804884 sim_pfm: 328.74970990384463
episode: 332 training return: tensor(281.6158, device='cuda:0')
episode: 333 training return: tensor(174.9568, device='cuda:0')
episode: 334 training return: tensor(321.4527, device='cuda:0')
episode: 335 training return: tensor(215.5661, device='cuda:0')
epoch: 84 test_true_pfm: 5446.0032104603015 sim_pfm: 314.75897750089644
episode: 336 training return: tensor(243.0724, device='cuda:0')
episode: 337 training return: tensor(178.9027, device='cuda:0')
episode: 338 training return: tensor(129.8818, device='cuda:0')
episode: 339 training return: tensor(230.8970, device='cuda:0')
epoch: 85 test_true_pfm: 5575.909223235215 sim_pfm: 391.6343151801557
episode: 340 training return: tensor(231.8171, device='cuda:0')
episode: 341 training return: tensor(258.5180, device='cuda:0')
episode: 342 training return: tensor(152.7205, device='cuda:0')
episode: 343 training return: tensor(305.4184, device='cuda:0')
epoch: 86 test_true_pfm: 5688.562684961192 sim_pfm: 320.0980984895141
episode: 344 training return: tensor(233.1916, device='cuda:0')
episode: 345 training return: tensor(169.6115, device='cuda:0')
episode: 346 training return: tensor(297.4843, device='cuda:0')
episode: 347 training return: tensor(233.4585, device='cuda:0')
epoch: 87 test_true_pfm: 5563.242548133253 sim_pfm: 305.5909689255447
episode: 348 training return: tensor(355.1860, device='cuda:0')
episode: 349 training return: tensor(163.1028, device='cuda:0')
episode: 350 training return: tensor(128.4005, device='cuda:0')
episode: 351 training return: tensor(203.0051, device='cuda:0')
epoch: 88 test_true_pfm: 5598.261402677749 sim_pfm: 342.1166862990164
episode: 352 training return: tensor(305.8498, device='cuda:0')
episode: 353 training return: tensor(135.5557, device='cuda:0')
episode: 354 training return: tensor(147.7660, device='cuda:0')
episode: 355 training return: tensor(376.0352, device='cuda:0')
epoch: 89 test_true_pfm: 5636.644554565362 sim_pfm: 304.7308007859198
episode: 356 training return: tensor(324.0425, device='cuda:0')
episode: 357 training return: tensor(252.9739, device='cuda:0')
episode: 358 training return: tensor(222.4631, device='cuda:0')
episode: 359 training return: tensor(245.2821, device='cuda:0')
epoch: 90 test_true_pfm: 5586.15701882057 sim_pfm: 273.4804257875173
episode: 360 training return: tensor(218.3292, device='cuda:0')
episode: 361 training return: tensor(273.7785, device='cuda:0')
episode: 362 training return: tensor(252.3076, device='cuda:0')
episode: 363 training return: tensor(263.6151, device='cuda:0')
epoch: 91 test_true_pfm: 5514.941244294357 sim_pfm: 363.30517969226156
episode: 364 training return: tensor(216.1856, device='cuda:0')
episode: 365 training return: tensor(234.3860, device='cuda:0')
episode: 366 training return: tensor(226.3849, device='cuda:0')
episode: 367 training return: tensor(86.1686, device='cuda:0')
epoch: 92 test_true_pfm: 5617.200819312214 sim_pfm: 343.5969392012339
episode: 368 training return: tensor(206.2625, device='cuda:0')
episode: 369 training return: tensor(248.9312, device='cuda:0')
episode: 370 training return: tensor(239.0651, device='cuda:0')
episode: 371 training return: tensor(226.7876, device='cuda:0')
epoch: 93 test_true_pfm: 5619.707228721039 sim_pfm: 370.66426741090254
episode: 372 training return: tensor(170.9520, device='cuda:0')
episode: 373 training return: tensor(206.3132, device='cuda:0')
episode: 374 training return: tensor(208.0167, device='cuda:0')
episode: 375 training return: tensor(244.7499, device='cuda:0')
epoch: 94 test_true_pfm: 5593.683754068731 sim_pfm: 328.8749994133832
episode: 376 training return: tensor(266.7886, device='cuda:0')
episode: 377 training return: tensor(191.4257, device='cuda:0')
episode: 378 training return: tensor(189.1464, device='cuda:0')
episode: 379 training return: tensor(242.0504, device='cuda:0')
epoch: 95 test_true_pfm: 5509.167408598819 sim_pfm: 374.81392793551396
episode: 380 training return: tensor(381.0658, device='cuda:0')
episode: 381 training return: tensor(154.1995, device='cuda:0')
episode: 382 training return: tensor(278.5668, device='cuda:0')
episode: 383 training return: tensor(214.4356, device='cuda:0')
epoch: 96 test_true_pfm: 5641.178858944678 sim_pfm: 274.42217088633333
episode: 384 training return: tensor(271.9456, device='cuda:0')
episode: 385 training return: tensor(374.0844, device='cuda:0')
episode: 386 training return: tensor(144.7419, device='cuda:0')
episode: 387 training return: tensor(301.5991, device='cuda:0')
epoch: 97 test_true_pfm: 5617.904008255198 sim_pfm: 322.955688296468
episode: 388 training return: tensor(186.1449, device='cuda:0')
episode: 389 training return: tensor(259.4120, device='cuda:0')
episode: 390 training return: tensor(337.9150, device='cuda:0')
episode: 391 training return: tensor(183.3570, device='cuda:0')
epoch: 98 test_true_pfm: 5679.003121163169 sim_pfm: 403.1226785745045
episode: 392 training return: tensor(208.1466, device='cuda:0')
episode: 393 training return: tensor(409.0558, device='cuda:0')
episode: 394 training return: tensor(266.0665, device='cuda:0')
episode: 395 training return: tensor(276.4470, device='cuda:0')
epoch: 99 test_true_pfm: 5647.051611323871 sim_pfm: 361.64640564029105
episode: 396 training return: tensor(186.7710, device='cuda:0')
episode: 397 training return: tensor(299.8038, device='cuda:0')
episode: 398 training return: tensor(199.4101, device='cuda:0')
episode: 399 training return: tensor(315.1471, device='cuda:0')
epoch: 100 test_true_pfm: 5611.919610717153 sim_pfm: 324.44562520463177
episode: 400 training return: tensor(204.6636, device='cuda:0')
episode: 401 training return: tensor(327.9634, device='cuda:0')
episode: 402 training return: tensor(333.1914, device='cuda:0')
episode: 403 training return: tensor(168.9869, device='cuda:0')
epoch: 101 test_true_pfm: 5653.30009549177 sim_pfm: 407.6933781534511
episode: 404 training return: tensor(285.5371, device='cuda:0')
episode: 405 training return: tensor(311.5131, device='cuda:0')
episode: 406 training return: tensor(245.6829, device='cuda:0')
episode: 407 training return: tensor(199.5425, device='cuda:0')
epoch: 102 test_true_pfm: 5715.672690476055 sim_pfm: 306.11715949351975
episode: 408 training return: tensor(212.8706, device='cuda:0')
episode: 409 training return: tensor(250.5902, device='cuda:0')
episode: 410 training return: tensor(246.0550, device='cuda:0')
episode: 411 training return: tensor(275.0825, device='cuda:0')
epoch: 103 test_true_pfm: 5529.049750785206 sim_pfm: 367.81562718968297
episode: 412 training return: tensor(221.5014, device='cuda:0')
episode: 413 training return: tensor(73.0278, device='cuda:0')
episode: 414 training return: tensor(235.7089, device='cuda:0')
episode: 415 training return: tensor(276.4661, device='cuda:0')
epoch: 104 test_true_pfm: 5636.134801824174 sim_pfm: 404.990183941108
episode: 416 training return: tensor(205.6089, device='cuda:0')
episode: 417 training return: tensor(240.4732, device='cuda:0')
episode: 418 training return: tensor(269.9570, device='cuda:0')
episode: 419 training return: tensor(240.2605, device='cuda:0')
epoch: 105 test_true_pfm: 5560.8706548435 sim_pfm: 399.20141227240674
episode: 420 training return: tensor(301.9791, device='cuda:0')
episode: 421 training return: tensor(132.5937, device='cuda:0')
episode: 422 training return: tensor(259.7639, device='cuda:0')
episode: 423 training return: tensor(141.7611, device='cuda:0')
epoch: 106 test_true_pfm: 5593.332850833639 sim_pfm: 376.7327641856197
episode: 424 training return: tensor(313.7980, device='cuda:0')
episode: 425 training return: tensor(214.4169, device='cuda:0')
episode: 426 training return: tensor(267.1096, device='cuda:0')
episode: 427 training return: tensor(275.6592, device='cuda:0')
epoch: 107 test_true_pfm: 5630.159153194244 sim_pfm: 366.38028190410114
episode: 428 training return: tensor(218.6702, device='cuda:0')
episode: 429 training return: tensor(280.3784, device='cuda:0')
episode: 430 training return: tensor(192.7495, device='cuda:0')
episode: 431 training return: tensor(342.8130, device='cuda:0')
epoch: 108 test_true_pfm: 5674.38001333905 sim_pfm: 338.03674733669806
episode: 432 training return: tensor(176.6699, device='cuda:0')
episode: 433 training return: tensor(208.8125, device='cuda:0')
episode: 434 training return: tensor(137.4424, device='cuda:0')
episode: 435 training return: tensor(331.9365, device='cuda:0')
epoch: 109 test_true_pfm: 5743.930415475362 sim_pfm: 367.61916269435704
episode: 436 training return: tensor(295.4090, device='cuda:0')
episode: 437 training return: tensor(181.4115, device='cuda:0')
episode: 438 training return: tensor(261.6339, device='cuda:0')
episode: 439 training return: tensor(317.9301, device='cuda:0')
epoch: 110 test_true_pfm: 5698.234214798217 sim_pfm: 360.10001953419606
episode: 440 training return: tensor(163.8249, device='cuda:0')
episode: 441 training return: tensor(212.3354, device='cuda:0')
episode: 442 training return: tensor(208.4419, device='cuda:0')
episode: 443 training return: tensor(333.4240, device='cuda:0')
epoch: 111 test_true_pfm: 5585.379301389604 sim_pfm: 346.8265261876998
episode: 444 training return: tensor(123.5902, device='cuda:0')
episode: 445 training return: tensor(230.7840, device='cuda:0')
episode: 446 training return: tensor(284.9650, device='cuda:0')
episode: 447 training return: tensor(377.7361, device='cuda:0')
epoch: 112 test_true_pfm: 5581.092601310491 sim_pfm: 349.39511751781293
episode: 448 training return: tensor(270.3371, device='cuda:0')
episode: 449 training return: tensor(133.0152, device='cuda:0')
episode: 450 training return: tensor(384.9866, device='cuda:0')
episode: 451 training return: tensor(199.2247, device='cuda:0')
epoch: 113 test_true_pfm: 5622.023259409292 sim_pfm: 348.38856623482815
episode: 452 training return: tensor(71.9828, device='cuda:0')
episode: 453 training return: tensor(292.6084, device='cuda:0')
episode: 454 training return: tensor(268.1205, device='cuda:0')
episode: 455 training return: tensor(179.1346, device='cuda:0')
epoch: 114 test_true_pfm: 5610.015130088934 sim_pfm: 303.8314544930569
episode: 456 training return: tensor(249.5532, device='cuda:0')
episode: 457 training return: tensor(108.9698, device='cuda:0')
episode: 458 training return: tensor(322.1299, device='cuda:0')
episode: 459 training return: tensor(234.6484, device='cuda:0')
epoch: 115 test_true_pfm: 5571.73931677513 sim_pfm: 373.2264369429322
episode: 460 training return: tensor(242.3450, device='cuda:0')
episode: 461 training return: tensor(167.4168, device='cuda:0')
episode: 462 training return: tensor(225.0810, device='cuda:0')
episode: 463 training return: tensor(193.5021, device='cuda:0')
epoch: 116 test_true_pfm: 5607.072922786313 sim_pfm: 363.6165298452058
episode: 464 training return: tensor(324.1421, device='cuda:0')
episode: 465 training return: tensor(186.1365, device='cuda:0')
episode: 466 training return: tensor(211.4631, device='cuda:0')
episode: 467 training return: tensor(251.7238, device='cuda:0')
epoch: 117 test_true_pfm: 5608.086269642304 sim_pfm: 310.39212768744136
episode: 468 training return: tensor(301.7369, device='cuda:0')
episode: 469 training return: tensor(156.3563, device='cuda:0')
episode: 470 training return: tensor(246.7147, device='cuda:0')
episode: 471 training return: tensor(173.8157, device='cuda:0')
epoch: 118 test_true_pfm: 5600.231370875138 sim_pfm: 378.57470826113905
episode: 472 training return: tensor(327.1891, device='cuda:0')
episode: 473 training return: tensor(169.6144, device='cuda:0')
episode: 474 training return: tensor(333.0132, device='cuda:0')
episode: 475 training return: tensor(296.1470, device='cuda:0')
epoch: 119 test_true_pfm: 5678.912986986387 sim_pfm: 387.88621527930565
episode: 476 training return: tensor(154.4908, device='cuda:0')
episode: 477 training return: tensor(230.5119, device='cuda:0')
episode: 478 training return: tensor(295.2301, device='cuda:0')
episode: 479 training return: tensor(268.9229, device='cuda:0')
epoch: 120 test_true_pfm: 5661.623093032797 sim_pfm: 394.19340538722463
episode: 480 training return: tensor(243.8218, device='cuda:0')
episode: 481 training return: tensor(250.2503, device='cuda:0')
episode: 482 training return: tensor(128.9769, device='cuda:0')
episode: 483 training return: tensor(276.9171, device='cuda:0')
epoch: 121 test_true_pfm: 5570.979379639549 sim_pfm: 300.9532968090304
episode: 484 training return: tensor(213.9244, device='cuda:0')
episode: 485 training return: tensor(320.8714, device='cuda:0')
episode: 486 training return: tensor(240.1481, device='cuda:0')
episode: 487 training return: tensor(193.7121, device='cuda:0')
epoch: 122 test_true_pfm: 5707.539455043519 sim_pfm: 332.7086154743059
episode: 488 training return: tensor(302.9594, device='cuda:0')
episode: 489 training return: tensor(296.1234, device='cuda:0')
episode: 490 training return: tensor(291.8552, device='cuda:0')
episode: 491 training return: tensor(274.1826, device='cuda:0')
epoch: 123 test_true_pfm: 5652.050240017557 sim_pfm: 409.3392188158662
episode: 492 training return: tensor(319.2876, device='cuda:0')
episode: 493 training return: tensor(219.6716, device='cuda:0')
episode: 494 training return: tensor(242.0860, device='cuda:0')
episode: 495 training return: tensor(310.6682, device='cuda:0')
epoch: 124 test_true_pfm: 5665.508333192447 sim_pfm: 411.7858086974981
episode: 496 training return: tensor(167.8370, device='cuda:0')
episode: 497 training return: tensor(239.7739, device='cuda:0')
episode: 498 training return: tensor(268.4962, device='cuda:0')
episode: 499 training return: tensor(242.5304, device='cuda:0')
epoch: 125 test_true_pfm: 5728.4728756546865 sim_pfm: 412.2915002697264
episode: 500 training return: tensor(239.6966, device='cuda:0')
episode: 501 training return: tensor(191.4183, device='cuda:0')
episode: 502 training return: tensor(307.6633, device='cuda:0')
episode: 503 training return: tensor(228.2255, device='cuda:0')
epoch: 126 test_true_pfm: 5611.699602583559 sim_pfm: 315.18947936959256
episode: 504 training return: tensor(312.0451, device='cuda:0')
episode: 505 training return: tensor(243.3181, device='cuda:0')
episode: 506 training return: tensor(266.7124, device='cuda:0')
episode: 507 training return: tensor(274.5814, device='cuda:0')
epoch: 127 test_true_pfm: 5611.463204943934 sim_pfm: 373.7929958393797
episode: 508 training return: tensor(295.8821, device='cuda:0')
episode: 509 training return: tensor(166.8738, device='cuda:0')
episode: 510 training return: tensor(197.4594, device='cuda:0')
episode: 511 training return: tensor(369.8804, device='cuda:0')
epoch: 128 test_true_pfm: 5626.619832948996 sim_pfm: 365.73900434550404
episode: 512 training return: tensor(273.1124, device='cuda:0')
episode: 513 training return: tensor(306.7540, device='cuda:0')
episode: 514 training return: tensor(417.0457, device='cuda:0')
episode: 515 training return: tensor(184.6041, device='cuda:0')
epoch: 129 test_true_pfm: 5683.845546424357 sim_pfm: 400.0198188777625
episode: 516 training return: tensor(202.7989, device='cuda:0')
episode: 517 training return: tensor(221.5248, device='cuda:0')
episode: 518 training return: tensor(173.5740, device='cuda:0')
episode: 519 training return: tensor(251.6255, device='cuda:0')
epoch: 130 test_true_pfm: 5707.230695935444 sim_pfm: 351.40982689454296
episode: 520 training return: tensor(228.5648, device='cuda:0')
episode: 521 training return: tensor(154.8219, device='cuda:0')
episode: 522 training return: tensor(217.7077, device='cuda:0')
episode: 523 training return: tensor(281.8699, device='cuda:0')
epoch: 131 test_true_pfm: 5611.3432826553835 sim_pfm: 364.1342325648681
episode: 524 training return: tensor(236.9221, device='cuda:0')
episode: 525 training return: tensor(276.1332, device='cuda:0')
episode: 526 training return: tensor(203.0384, device='cuda:0')
episode: 527 training return: tensor(299.0143, device='cuda:0')
epoch: 132 test_true_pfm: 5692.651251899723 sim_pfm: 365.75465475406963
episode: 528 training return: tensor(332.7301, device='cuda:0')
episode: 529 training return: tensor(232.2365, device='cuda:0')
episode: 530 training return: tensor(211.1362, device='cuda:0')
episode: 531 training return: tensor(399.1323, device='cuda:0')
epoch: 133 test_true_pfm: 5702.604003844532 sim_pfm: 349.29078687882674
episode: 532 training return: tensor(267.3925, device='cuda:0')
episode: 533 training return: tensor(285.7685, device='cuda:0')
episode: 534 training return: tensor(158.3802, device='cuda:0')
episode: 535 training return: tensor(251.8112, device='cuda:0')
epoch: 134 test_true_pfm: 5566.360164829166 sim_pfm: 392.055342807939
episode: 536 training return: tensor(233.1758, device='cuda:0')
episode: 537 training return: tensor(213.2042, device='cuda:0')
episode: 538 training return: tensor(310.8911, device='cuda:0')
episode: 539 training return: tensor(276.8290, device='cuda:0')
epoch: 135 test_true_pfm: 5607.3936433206445 sim_pfm: 358.8711249608702
episode: 540 training return: tensor(206.9005, device='cuda:0')
episode: 541 training return: tensor(247.9326, device='cuda:0')
episode: 542 training return: tensor(305.1976, device='cuda:0')
episode: 543 training return: tensor(201.3651, device='cuda:0')
epoch: 136 test_true_pfm: 5618.76262815543 sim_pfm: 358.0540265523984
episode: 544 training return: tensor(308.9931, device='cuda:0')
episode: 545 training return: tensor(247.8032, device='cuda:0')
episode: 546 training return: tensor(214.4231, device='cuda:0')
episode: 547 training return: tensor(185.7060, device='cuda:0')
epoch: 137 test_true_pfm: 5656.030607328521 sim_pfm: 376.7959362013692
episode: 548 training return: tensor(176.4030, device='cuda:0')
episode: 549 training return: tensor(249.4713, device='cuda:0')
episode: 550 training return: tensor(298.3585, device='cuda:0')
episode: 551 training return: tensor(303.8330, device='cuda:0')
epoch: 138 test_true_pfm: 5635.2150422753875 sim_pfm: 381.2677751938657
episode: 552 training return: tensor(177.2874, device='cuda:0')
episode: 553 training return: tensor(260.2902, device='cuda:0')
episode: 554 training return: tensor(179.3978, device='cuda:0')
episode: 555 training return: tensor(173.9342, device='cuda:0')
epoch: 139 test_true_pfm: 5626.547487999211 sim_pfm: 372.9562380775266
episode: 556 training return: tensor(228.0106, device='cuda:0')
episode: 557 training return: tensor(340.3243, device='cuda:0')
episode: 558 training return: tensor(333.4722, device='cuda:0')
episode: 559 training return: tensor(195.3818, device='cuda:0')
epoch: 140 test_true_pfm: 5638.1840747815795 sim_pfm: 393.9367247394791
episode: 560 training return: tensor(242.8036, device='cuda:0')
episode: 561 training return: tensor(137.6412, device='cuda:0')
episode: 562 training return: tensor(219.3528, device='cuda:0')
episode: 563 training return: tensor(314.8689, device='cuda:0')
epoch: 141 test_true_pfm: 5708.540844224019 sim_pfm: 384.2641883432322
episode: 564 training return: tensor(264.5957, device='cuda:0')
episode: 565 training return: tensor(368.8040, device='cuda:0')
episode: 566 training return: tensor(308.3103, device='cuda:0')
episode: 567 training return: tensor(256.0334, device='cuda:0')
epoch: 142 test_true_pfm: 5667.692237877498 sim_pfm: 431.47497037433396
episode: 568 training return: tensor(244.6176, device='cuda:0')
episode: 569 training return: tensor(-327.6130, device='cuda:0')
episode: 570 training return: tensor(258.5423, device='cuda:0')
episode: 571 training return: tensor(226.9721, device='cuda:0')
epoch: 143 test_true_pfm: 5616.143544961879 sim_pfm: 396.51194354700664
episode: 572 training return: tensor(324.9480, device='cuda:0')
episode: 573 training return: tensor(315.4095, device='cuda:0')
episode: 574 training return: tensor(343.1644, device='cuda:0')
episode: 575 training return: tensor(250.3866, device='cuda:0')
epoch: 144 test_true_pfm: 5659.565283538694 sim_pfm: 366.271284962558
episode: 576 training return: tensor(265.2574, device='cuda:0')
episode: 577 training return: tensor(288.3127, device='cuda:0')
episode: 578 training return: tensor(304.1007, device='cuda:0')
episode: 579 training return: tensor(254.6460, device='cuda:0')
epoch: 145 test_true_pfm: 5677.1733105376725 sim_pfm: 365.40451279091457
episode: 580 training return: tensor(122.8992, device='cuda:0')
episode: 581 training return: tensor(242.3787, device='cuda:0')
episode: 582 training return: tensor(272.8419, device='cuda:0')
episode: 583 training return: tensor(251.6657, device='cuda:0')
epoch: 146 test_true_pfm: 5655.611008454499 sim_pfm: 306.09046273304074
episode: 584 training return: tensor(267.3570, device='cuda:0')
episode: 585 training return: tensor(239.8156, device='cuda:0')
episode: 586 training return: tensor(167.3145, device='cuda:0')
episode: 587 training return: tensor(177.6396, device='cuda:0')
epoch: 147 test_true_pfm: 5502.75974206745 sim_pfm: 361.1387307040471
episode: 588 training return: tensor(257.8734, device='cuda:0')
episode: 589 training return: tensor(165.6047, device='cuda:0')
episode: 590 training return: tensor(265.2542, device='cuda:0')
episode: 591 training return: tensor(225.5555, device='cuda:0')
epoch: 148 test_true_pfm: 5653.632572491791 sim_pfm: 406.8121103024071
episode: 592 training return: tensor(181.9661, device='cuda:0')
episode: 593 training return: tensor(233.7021, device='cuda:0')
episode: 594 training return: tensor(297.6741, device='cuda:0')
episode: 595 training return: tensor(265.4579, device='cuda:0')
epoch: 149 test_true_pfm: 5652.99880577197 sim_pfm: 446.1367143761211
episode: 596 training return: tensor(244.2775, device='cuda:0')
episode: 597 training return: tensor(307.0273, device='cuda:0')
episode: 598 training return: tensor(210.6359, device='cuda:0')
episode: 599 training return: tensor(260.7941, device='cuda:0')
epoch: 150 test_true_pfm: 5548.525619359048 sim_pfm: 325.2670343190354
