['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '0', '--data', '10000', '--sub']
epoch: 0 training_loss 0.28298139438033104 test_loss: 0.1902701139450073
epoch: 1 training_loss 0.17410958524793385 test_loss: 0.1517471194267273
epoch: 2 training_loss 0.14019929707050324 test_loss: 0.1253930449485779
epoch: 3 training_loss 0.12344910603016615 test_loss: 0.13268475532531737
epoch: 4 training_loss 0.12336735494434833 test_loss: 0.1218113660812378
epoch: 5 training_loss 0.12066177381202578 test_loss: 0.13879958391189576
epoch: 6 training_loss 0.11588461454957724 test_loss: 0.12272392511367798
epoch: 7 training_loss 0.10583598114550113 test_loss: 0.12635637521743776
epoch: 8 training_loss 0.11386657128110528 test_loss: 0.13047434091567994
epoch: 9 training_loss 0.10691834054887295 test_loss: 0.11073380708694458
epoch: 10 training_loss 0.10149800900369882 test_loss: 0.11755526065826416
epoch: 11 training_loss 0.10346570197492838 test_loss: 0.12874938249588014
epoch: 12 training_loss 0.10871030781418085 test_loss: 0.12827486991882325
epoch: 13 training_loss 0.09732885867357254 test_loss: 0.10993850231170654
epoch: 14 training_loss 0.10172350350767374 test_loss: 0.11104463338851929
epoch: 15 training_loss 0.10462112706154585 test_loss: 0.11114050149917602
epoch: 16 training_loss 0.09174325410276651 test_loss: 0.11086180210113525
epoch: 17 training_loss 0.10251873299479485 test_loss: 0.11445974111557007
epoch: 18 training_loss 0.10835847508162261 test_loss: 0.12342045307159424
epoch: 19 training_loss 0.10492056058719754 test_loss: 0.11652404069900513
epoch: 20 training_loss 0.09296849753707648 test_loss: 0.10561333894729615
epoch: 21 training_loss 0.09817931860685349 test_loss: 0.10563290119171143
epoch: 22 training_loss 0.09262032203376293 test_loss: 0.1112277626991272
epoch: 23 training_loss 0.09707683939486741 test_loss: 0.12029106616973877
epoch: 24 training_loss 0.09675641097128392 test_loss: 0.10534930229187012
epoch: 25 training_loss 0.08923558305948973 test_loss: 0.11532804965972901
epoch: 26 training_loss 0.09889522489160299 test_loss: 0.11054738759994506
epoch: 27 training_loss 0.09206119922921062 test_loss: 0.11993911266326904
epoch: 28 training_loss 0.08509325876832008 test_loss: 0.11566060781478882
epoch: 29 training_loss 0.09599457420408726 test_loss: 0.10378588438034057
epoch: 30 training_loss 0.0902172239869833 test_loss: 0.09654499888420105
epoch: 31 training_loss 0.09245248919352889 test_loss: 0.09935939311981201
epoch: 32 training_loss 0.08419466082006694 test_loss: 0.11976522207260132
epoch: 33 training_loss 0.09773881630972028 test_loss: 0.11168285608291625
epoch: 34 training_loss 0.09987244438380002 test_loss: 0.1097119927406311
epoch: 35 training_loss 0.09358617968857288 test_loss: 0.09312348365783692
epoch: 36 training_loss 0.08809359114617109 test_loss: 0.10389187335968017
epoch: 37 training_loss 0.0867418098077178 test_loss: 0.1101598858833313
epoch: 38 training_loss 0.088605892136693 test_loss: 0.09682068228721619
epoch: 39 training_loss 0.08903950719162822 test_loss: 0.09975308775901795
epoch: 40 training_loss 0.09396277153864503 test_loss: 0.10968027114868165
epoch: 41 training_loss 0.08758201705291867 test_loss: 0.10810370445251465
epoch: 42 training_loss 0.0855403278581798 test_loss: 0.10977853536605835
epoch: 43 training_loss 0.0906806568801403 test_loss: 0.10631382465362549
epoch: 44 training_loss 0.09046091627329587 test_loss: 0.1110978364944458
epoch: 45 training_loss 0.08656678410246968 test_loss: 0.10235896110534667
epoch: 46 training_loss 0.08088353278115391 test_loss: 0.12047629356384278
epoch: 47 training_loss 0.08596006920561194 test_loss: 0.1053263783454895
epoch: 48 training_loss 0.08455550629645586 test_loss: 0.10799940824508666
epoch: 49 training_loss 0.08715673562139273 test_loss: 0.10124155282974243
epoch: 50 training_loss 0.07758596854284405 test_loss: 0.11197147369384766
epoch: 51 training_loss 0.07802384737879038 test_loss: 0.11415692567825317
epoch: 52 training_loss 0.08352490182965994 test_loss: 0.10829979181289673
epoch: 53 training_loss 0.0845931327342987 test_loss: 0.10713448524475097
epoch: 54 training_loss 0.08561572903767228 test_loss: 0.100864839553833
epoch: 55 training_loss 0.07495895916596056 test_loss: 0.11120018959045411
epoch: 56 training_loss 0.08510474476963281 test_loss: 0.1252833604812622
epoch: 57 training_loss 0.08332727991044521 test_loss: 0.10621000528335571
epoch: 58 training_loss 0.08644245248287916 test_loss: 0.10685135126113891
epoch: 59 training_loss 0.0778042871877551 test_loss: 0.11657322645187378
epoch: 60 training_loss 0.08631436482071876 test_loss: 0.11478949785232544
epoch: 61 training_loss 0.08097268261015415 test_loss: 0.11818500757217407
epoch: 62 training_loss 0.08015770275145769 test_loss: 0.11739544868469239
epoch: 63 training_loss 0.07732188979163766 test_loss: 0.10469253063201904
epoch: 64 training_loss 0.07989798782393337 test_loss: 0.11395219564437867
epoch: 65 training_loss 0.08732922187075019 test_loss: 0.10617657899856567
epoch: 66 training_loss 0.0776730745844543 test_loss: 0.10842351913452149
epoch: 67 training_loss 0.07680899530649185 test_loss: 0.12688813209533692
epoch: 68 training_loss 0.07898582836613059 test_loss: 0.12099332809448242
epoch: 69 training_loss 0.07500308817252517 test_loss: 0.11529475450515747
epoch: 70 training_loss 0.0832478385232389 test_loss: 0.11673580408096314
epoch: 71 training_loss 0.07809019483625888 test_loss: 0.1197211503982544
epoch: 72 training_loss 0.07951050486415624 test_loss: 0.12161910533905029
epoch: 73 training_loss 0.07654275737702847 test_loss: 0.12303204536437988
epoch: 74 training_loss 0.07518046025186777 test_loss: 0.12229480743408203
epoch: 75 training_loss 0.07510642915964126 test_loss: 0.114384925365448
epoch: 76 training_loss 0.08116812065243721 test_loss: 0.12856885194778442
epoch: 77 training_loss 0.08304737837985159 test_loss: 0.13371673822402955
epoch: 78 training_loss 0.07717653427273036 test_loss: 0.11821106672286988
epoch: 79 training_loss 0.07468566989526153 test_loss: 0.11169848442077637
epoch: 80 training_loss 0.07482077341526747 test_loss: 0.13114848136901855
epoch: 81 training_loss 0.07094643153250217 test_loss: 0.12030934095382691
epoch: 82 training_loss 0.07372978958301246 test_loss: 0.12775155305862426
epoch: 83 training_loss 0.0750052135437727 test_loss: 0.10150614976882935
epoch: 84 training_loss 0.07507629076018929 test_loss: 0.1222381353378296
epoch: 85 training_loss 0.07430741094052791 test_loss: 0.12662023305892944
epoch: 86 training_loss 0.0779347749426961 test_loss: 0.1255558967590332
epoch: 87 training_loss 0.07085561733692884 test_loss: 0.10969648361206055
epoch: 88 training_loss 0.07852647992782295 test_loss: 0.12066396474838256
epoch: 89 training_loss 0.07448505420237779 test_loss: 0.12867486476898193
epoch: 90 training_loss 0.07759670736268162 test_loss: 0.12349635362625122
epoch: 91 training_loss 0.07603258300572634 test_loss: 0.1354719042778015
epoch: 92 training_loss 0.07307865004986525 test_loss: 0.10594849586486817
epoch: 93 training_loss 0.07370646056719124 test_loss: 0.11920936107635498
epoch: 94 training_loss 0.07336253157816827 test_loss: 0.12292144298553467
epoch: 95 training_loss 0.07435834765434265 test_loss: 0.13718477487564087
epoch: 96 training_loss 0.07261999692767858 test_loss: 0.13237884044647216
epoch: 97 training_loss 0.06852014178410172 test_loss: 0.12439872026443481
epoch: 98 training_loss 0.0675320190191269 test_loss: 0.15072388648986818
epoch: 99 training_loss 0.07264796944335103 test_loss: 0.11635359525680541
epoch: 100 training_loss 0.0745574645139277 test_loss: 0.14252856969833375
epoch: 101 training_loss 0.07355387149378657 test_loss: 0.10149532556533813
epoch: 102 training_loss 0.07848550396040083 test_loss: 0.12894209623336791
epoch: 103 training_loss 0.06990104641765356 test_loss: 0.12798928022384642
epoch: 104 training_loss 0.06771505395881831 test_loss: 0.1394771456718445
epoch: 105 training_loss 0.0777416840940714 test_loss: 0.12151600122451782
epoch: 106 training_loss 0.0690573418326676 test_loss: 0.1365996479988098
epoch: 107 training_loss 0.06482828978449107 test_loss: 0.14000799655914306
epoch: 108 training_loss 0.0692939386330545 test_loss: 0.1324404239654541
epoch: 109 training_loss 0.06988612718880177 test_loss: 0.1286725401878357
epoch: 110 training_loss 0.06624153249897063 test_loss: 0.13693563938140868
epoch: 111 training_loss 0.07408123104833067 test_loss: 0.12949122190475465
epoch: 112 training_loss 0.07248211860656738 test_loss: 0.12021034955978394
epoch: 113 training_loss 0.06782205624505877 test_loss: 0.133362877368927
epoch: 114 training_loss 0.0643382588494569 test_loss: 0.12182347774505616
epoch: 115 training_loss 0.066187090780586 test_loss: 0.13585288524627687
epoch: 116 training_loss 0.06889942344278097 test_loss: 0.11312543153762818
epoch: 117 training_loss 0.06593784077093005 test_loss: 0.11602276563644409
epoch: 118 training_loss 0.06314722249284387 test_loss: 0.14636526107788086
epoch: 119 training_loss 0.06412532478570938 test_loss: 0.1239740014076233
epoch: 120 training_loss 0.06640132823958993 test_loss: 0.12964088916778566
epoch: 121 training_loss 0.06751127498224378 test_loss: 0.14051129817962646
epoch: 122 training_loss 0.06322500895708799 test_loss: 0.15533895492553712
epoch: 123 training_loss 0.06871851962059736 test_loss: 0.13188340663909912
epoch: 124 training_loss 0.06530359024181961 test_loss: 0.13241761922836304
epoch: 125 training_loss 0.07357783503830433 test_loss: 0.13077521324157715
epoch: 126 training_loss 0.06706098846159875 test_loss: 0.1475623369216919
epoch: 127 training_loss 0.06582524260506034 test_loss: 0.14994045495986938
epoch: 128 training_loss 0.06512006636708975 test_loss: 0.13092529773712158
epoch: 129 training_loss 0.05649071640335023 test_loss: 0.12511072158813477
epoch: 130 training_loss 0.06482974335551261 test_loss: 0.14407482147216796
epoch: 131 training_loss 0.07105832951143384 test_loss: 0.1449694275856018
epoch: 132 training_loss 0.06460640854202211 test_loss: 0.14702355861663818
epoch: 133 training_loss 0.06908507980406284 test_loss: 0.1571468472480774
epoch: 134 training_loss 0.062074010157957674 test_loss: 0.12814202308654785
epoch: 135 training_loss 0.060169831402599815 test_loss: 0.1450181007385254
epoch: 136 training_loss 0.06501905330456793 test_loss: 0.15027258396148682
epoch: 137 training_loss 0.06289994289167225 test_loss: 0.15843346118927001
epoch: 138 training_loss 0.0694173192512244 test_loss: 0.13962055444717408
epoch: 139 training_loss 0.06838553343899548 test_loss: 0.144422447681427
epoch: 140 training_loss 0.0675758877210319 test_loss: 0.13647689819335937
epoch: 141 training_loss 0.06691344942897558 test_loss: 0.13702205419540406
epoch: 142 training_loss 0.06484272884204983 test_loss: 0.14891085624694825
epoch: 143 training_loss 0.057628508396446704 test_loss: 0.14654228687286378
epoch: 144 training_loss 0.06064921551384032 test_loss: 0.1367879629135132
epoch: 145 training_loss 0.06740292848087848 test_loss: 0.16535576581954955
epoch: 146 training_loss 0.05963786019012332 test_loss: 0.14295650720596315
epoch: 147 training_loss 0.0637125483341515 test_loss: 0.13990474939346315
epoch: 148 training_loss 0.05652003362774849 test_loss: 0.17329734563827515
epoch: 149 training_loss 0.0679490083642304 test_loss: 0.13554444313049316
epoch: 0 training_loss 39.205977783203124 test_loss: 20.383523559570314
epoch: 1 training_loss 16.446769189834594 test_loss: 13.552113342285157
epoch: 2 training_loss 12.346634016036987 test_loss: 11.077687835693359
epoch: 3 training_loss 10.531375980377197 test_loss: 9.726941680908203
epoch: 4 training_loss 9.06383071422577 test_loss: 8.742149353027344
epoch: 5 training_loss 8.337149071693421 test_loss: 7.98071060180664
epoch: 6 training_loss 7.562576622962951 test_loss: 7.264727783203125
epoch: 7 training_loss 7.294085845947266 test_loss: 6.851885223388672
epoch: 8 training_loss 6.65840184211731 test_loss: 6.660711669921875
epoch: 9 training_loss 6.357545604705811 test_loss: 6.1403148651123045
epoch: 10 training_loss 6.096725406646729 test_loss: 6.060890579223633
epoch: 11 training_loss 5.808657636642456 test_loss: 5.846274566650391
epoch: 12 training_loss 5.604298596382141 test_loss: 5.357547760009766
epoch: 13 training_loss 5.354544773101806 test_loss: 5.494469451904297
epoch: 14 training_loss 5.068064520359039 test_loss: 5.045000076293945
epoch: 15 training_loss 4.859298627376557 test_loss: 5.014299774169922
epoch: 16 training_loss 4.753356704711914 test_loss: 4.862553405761719
epoch: 17 training_loss 4.718950257301331 test_loss: 4.777052307128907
epoch: 18 training_loss 4.581758165359497 test_loss: 4.632598876953125
epoch: 19 training_loss 4.453078641891479 test_loss: 4.553516387939453
epoch: 20 training_loss 4.3028797030448915 test_loss: 4.4707283020019535
epoch: 21 training_loss 4.269853637218476 test_loss: 4.313732147216797
epoch: 22 training_loss 4.120122892856598 test_loss: 4.294851303100586
epoch: 23 training_loss 4.200677847862243 test_loss: 4.120091247558594
epoch: 24 training_loss 3.9247184443473815 test_loss: 4.120983505249024
epoch: 25 training_loss 3.901116659641266 test_loss: 4.06890869140625
epoch: 26 training_loss 3.806750979423523 test_loss: 4.006912612915039
epoch: 27 training_loss 3.8452263045310975 test_loss: 4.000529861450195
epoch: 28 training_loss 3.7298708033561705 test_loss: 3.8452690124511717
epoch: 29 training_loss 3.7105231785774233 test_loss: 3.8288768768310546
epoch: 30 training_loss 3.670209367275238 test_loss: 3.801932144165039
epoch: 31 training_loss 3.634554615020752 test_loss: 3.6302215576171877
epoch: 32 training_loss 3.540195758342743 test_loss: 3.5898799896240234
epoch: 33 training_loss 3.476393163204193 test_loss: 3.5250198364257814
epoch: 34 training_loss 3.3715247535705566 test_loss: 3.5321006774902344
epoch: 35 training_loss 3.3530854916572572 test_loss: 3.6122745513916015
epoch: 36 training_loss 3.3929990220069883 test_loss: 3.446327972412109
epoch: 37 training_loss 3.3505149269104004 test_loss: 3.454473876953125
epoch: 38 training_loss 3.361930661201477 test_loss: 3.3551357269287108
epoch: 39 training_loss 3.2711092472076415 test_loss: 3.3443050384521484
epoch: 40 training_loss 3.2063030123710634 test_loss: 3.3227386474609375
epoch: 41 training_loss 3.179684658050537 test_loss: 3.2987464904785155
epoch: 42 training_loss 3.2360724449157714 test_loss: 3.265248489379883
epoch: 43 training_loss 3.077588095664978 test_loss: 3.2891471862792967
epoch: 44 training_loss 3.1061840271949768 test_loss: 3.138563346862793
epoch: 45 training_loss 3.0571268343925477 test_loss: 3.1505105972290037
epoch: 46 training_loss 2.996375572681427 test_loss: 3.2052047729492186
epoch: 47 training_loss 3.0585737586021424 test_loss: 3.0639341354370115
epoch: 48 training_loss 2.922292354106903 test_loss: 3.160502052307129
epoch: 49 training_loss 3.056629276275635 test_loss: 3.203062057495117
epoch: 50 training_loss 2.9290098690986635 test_loss: 3.043595314025879
epoch: 51 training_loss 2.8690051507949828 test_loss: 3.106709098815918
epoch: 52 training_loss 2.9609522795677186 test_loss: 3.0392715454101564
epoch: 53 training_loss 2.9001279711723327 test_loss: 3.078852081298828
epoch: 54 training_loss 2.8872305488586427 test_loss: 3.074744987487793
epoch: 55 training_loss 2.8397604775428773 test_loss: 3.0405479431152345
epoch: 56 training_loss 2.8291668486595154 test_loss: 2.9021202087402345
epoch: 57 training_loss 2.795278563499451 test_loss: 2.955063056945801
epoch: 58 training_loss 2.747052404880524 test_loss: 2.9427881240844727
epoch: 59 training_loss 2.77141646027565 test_loss: 3.062027931213379
epoch: 60 training_loss 2.711787898540497 test_loss: 2.932580757141113
epoch: 61 training_loss 2.774506504535675 test_loss: 2.9380186080932615
epoch: 62 training_loss 2.7412292945384977 test_loss: 2.8756555557250976
epoch: 63 training_loss 2.7438826751708985 test_loss: 2.800790214538574
epoch: 64 training_loss 2.678720996379852 test_loss: 2.8073347091674803
epoch: 65 training_loss 2.606128318309784 test_loss: 2.8643753051757814
epoch: 66 training_loss 2.68933806180954 test_loss: 2.7597827911376953
epoch: 67 training_loss 2.6415673828125 test_loss: 2.7864191055297853
epoch: 68 training_loss 2.6247933983802794 test_loss: 2.73598690032959
epoch: 69 training_loss 2.637153227329254 test_loss: 2.7913116455078124
epoch: 70 training_loss 2.5457608485221863 test_loss: 2.736475372314453
epoch: 71 training_loss 2.624716272354126 test_loss: 2.760365676879883
epoch: 72 training_loss 2.595222542285919 test_loss: 2.787558937072754
epoch: 73 training_loss 2.5718978571891786 test_loss: 2.6797405242919923
epoch: 74 training_loss 2.560082859992981 test_loss: 2.738519477844238
epoch: 75 training_loss 2.5997058963775634 test_loss: 2.694424057006836
epoch: 76 training_loss 2.5644806361198427 test_loss: 2.7636505126953126
epoch: 77 training_loss 2.539466580152512 test_loss: 2.698456573486328
epoch: 78 training_loss 2.5660017323493958 test_loss: 2.7733882904052733
epoch: 79 training_loss 2.5472767305374147 test_loss: 2.6546234130859374
epoch: 80 training_loss 2.47445583820343 test_loss: 2.6545221328735353
epoch: 81 training_loss 2.4474335873126982 test_loss: 2.6230405807495116
epoch: 82 training_loss 2.4908973491191864 test_loss: 2.6433996200561523
epoch: 83 training_loss 2.4391347622871398 test_loss: 2.528985595703125
epoch: 84 training_loss 2.498926887512207 test_loss: 2.598095512390137
epoch: 85 training_loss 2.4759301066398622 test_loss: 2.6580785751342773
epoch: 86 training_loss 2.447919911146164 test_loss: 2.5575246810913086
epoch: 87 training_loss 2.474882493019104 test_loss: 2.640852928161621
epoch: 88 training_loss 2.490259175300598 test_loss: 2.681050109863281
epoch: 89 training_loss 2.4020265340805054 test_loss: 2.6164165496826173
epoch: 90 training_loss 2.4212441170215606 test_loss: 2.5749944686889648
epoch: 91 training_loss 2.416720322370529 test_loss: 2.5930673599243166
epoch: 92 training_loss 2.4077143871784212 test_loss: 2.5657979965209963
epoch: 93 training_loss 2.4632113540172575 test_loss: 2.5603874206542967
epoch: 94 training_loss 2.422716853618622 test_loss: 2.635198783874512
epoch: 95 training_loss 2.397130914926529 test_loss: 2.671921157836914
epoch: 96 training_loss 2.456096204519272 test_loss: 2.615677070617676
epoch: 97 training_loss 2.3721748650074006 test_loss: 2.573367691040039
epoch: 98 training_loss 2.397871459722519 test_loss: 2.526112937927246
epoch: 99 training_loss 2.384857473373413 test_loss: 2.6465217590332033
epoch: 100 training_loss 2.3897119259834287 test_loss: 2.545193099975586
epoch: 101 training_loss 2.3415262949466706 test_loss: 2.568835639953613
epoch: 102 training_loss 2.355147684812546 test_loss: 2.639848518371582
epoch: 103 training_loss 2.3939494466781617 test_loss: 2.5185945510864256
epoch: 104 training_loss 2.3711193013191223 test_loss: 2.5247650146484375
epoch: 105 training_loss 2.3460020411014555 test_loss: 2.4769920349121093
epoch: 106 training_loss 2.2909555792808534 test_loss: 2.5278635025024414
epoch: 107 training_loss 2.297815291881561 test_loss: 2.545395278930664
epoch: 108 training_loss 2.2990597748756407 test_loss: 2.556710433959961
epoch: 109 training_loss 2.347246928215027 test_loss: 2.5055572509765627
epoch: 110 training_loss 2.298411641120911 test_loss: 2.4485708236694337
epoch: 111 training_loss 2.2738908135890963 test_loss: 2.413669395446777
epoch: 112 training_loss 2.36602903008461 test_loss: 2.4855314254760743
epoch: 113 training_loss 2.2640833401679994 test_loss: 2.520295524597168
epoch: 114 training_loss 2.3178391647338867 test_loss: 2.5266517639160155
epoch: 115 training_loss 2.248684104681015 test_loss: 2.4753324508666994
epoch: 116 training_loss 2.2898541676998136 test_loss: 2.51544246673584
epoch: 117 training_loss 2.2674269115924837 test_loss: 2.3646873474121093
epoch: 118 training_loss 2.2291476023197174 test_loss: 2.5924362182617187
epoch: 119 training_loss 2.3120607924461365 test_loss: 2.4418874740600587
epoch: 120 training_loss 2.248262903690338 test_loss: 2.52761173248291
epoch: 121 training_loss 2.291595792770386 test_loss: 2.5530017852783202
epoch: 122 training_loss 2.238454964160919 test_loss: 2.498457336425781
epoch: 123 training_loss 2.2076160073280335 test_loss: 2.4673885345458983
epoch: 124 training_loss 2.2723998010158537 test_loss: 2.442276382446289
epoch: 125 training_loss 2.2146092045307157 test_loss: 2.381717872619629
epoch: 126 training_loss 2.2588671708106993 test_loss: 2.4928150177001953
epoch: 127 training_loss 2.220823447704315 test_loss: 2.442269134521484
epoch: 128 training_loss 2.2356175172328947 test_loss: 2.4706226348876954
epoch: 129 training_loss 2.218072121143341 test_loss: 2.4789466857910156
epoch: 130 training_loss 2.2511623108386996 test_loss: 2.4191104888916017
epoch: 131 training_loss 2.2364629709720614 test_loss: 2.391615867614746
epoch: 132 training_loss 2.2218864965438843 test_loss: 2.341783142089844
epoch: 133 training_loss 2.1970586764812468 test_loss: 2.398876190185547
epoch: 134 training_loss 2.161929129362106 test_loss: 2.3707996368408204
epoch: 135 training_loss 2.1611904275417326 test_loss: 2.3885566711425783
epoch: 136 training_loss 2.2755973958969116 test_loss: 2.3565526962280274
epoch: 137 training_loss 2.217493054866791 test_loss: 2.3939390182495117
epoch: 138 training_loss 2.185766988992691 test_loss: 2.395455169677734
epoch: 139 training_loss 2.2329569172859194 test_loss: 2.399244689941406
epoch: 140 training_loss 2.1978410589694977 test_loss: 2.3901336669921873
epoch: 141 training_loss 2.145283213853836 test_loss: 2.453777313232422
epoch: 142 training_loss 2.147920986413956 test_loss: 2.366363525390625
epoch: 143 training_loss 2.1870943105220793 test_loss: 2.3674365997314455
epoch: 144 training_loss 2.1817554378509523 test_loss: 2.3491397857666017
epoch: 145 training_loss 2.125968123674393 test_loss: 2.3214078903198243
epoch: 146 training_loss 2.1477522194385528 test_loss: 2.336097526550293
epoch: 147 training_loss 2.216533529758453 test_loss: 2.4223875045776366
epoch: 148 training_loss 2.190271112918854 test_loss: 2.352423667907715
epoch: 149 training_loss 2.1692441737651826 test_loss: 2.357348823547363
2763.7104405037817
episode: 0 training return: tensor(-328.9250, device='cuda:0')
episode: 1 training return: tensor(160.8359, device='cuda:0')
episode: 2 training return: tensor(82.1849, device='cuda:0')
episode: 3 training return: tensor(79.3674, device='cuda:0')
epoch: 1 test_true_pfm: 2147.3551960874393 sim_pfm: -56.48108188414093
episode: 4 training return: tensor(149.9354, device='cuda:0')
episode: 5 training return: tensor(109.4822, device='cuda:0')
episode: 6 training return: tensor(0.4575, device='cuda:0')
episode: 7 training return: tensor(-69.7468, device='cuda:0')
epoch: 2 test_true_pfm: 2271.7114849139243 sim_pfm: -118.18343026887548
episode: 8 training return: tensor(102.2155, device='cuda:0')
episode: 9 training return: tensor(145.3463, device='cuda:0')
episode: 10 training return: tensor(259.0947, device='cuda:0')
episode: 11 training return: tensor(152.6698, device='cuda:0')
epoch: 3 test_true_pfm: 3029.596656742257 sim_pfm: 88.12375969129305
episode: 12 training return: tensor(-117.9621, device='cuda:0')
episode: 13 training return: tensor(115.5993, device='cuda:0')
episode: 14 training return: tensor(-411.7755, device='cuda:0')
episode: 15 training return: tensor(89.3572, device='cuda:0')
epoch: 4 test_true_pfm: 3085.3511782716973 sim_pfm: 156.06642082457742
episode: 16 training return: tensor(206.7387, device='cuda:0')
episode: 17 training return: tensor(-342.4763, device='cuda:0')
episode: 18 training return: tensor(-341.0488, device='cuda:0')
episode: 19 training return: tensor(166.3851, device='cuda:0')
epoch: 5 test_true_pfm: 2762.653421329831 sim_pfm: -97.74529795746396
episode: 20 training return: tensor(137.8802, device='cuda:0')
episode: 21 training return: tensor(341.8508, device='cuda:0')
episode: 22 training return: tensor(135.7190, device='cuda:0')
episode: 23 training return: tensor(134.7545, device='cuda:0')
epoch: 6 test_true_pfm: 2005.64217040665 sim_pfm: -3.209303234141165
episode: 24 training return: tensor(154.3288, device='cuda:0')
episode: 25 training return: tensor(-375.3496, device='cuda:0')
episode: 26 training return: tensor(200.6743, device='cuda:0')
episode: 27 training return: tensor(161.5765, device='cuda:0')
epoch: 7 test_true_pfm: 2787.4166927725473 sim_pfm: 96.83389297502193
episode: 28 training return: tensor(7.0064, device='cuda:0')
episode: 29 training return: tensor(-421.4682, device='cuda:0')
episode: 30 training return: tensor(160.3200, device='cuda:0')
episode: 31 training return: tensor(-184.1070, device='cuda:0')
epoch: 8 test_true_pfm: 2751.044147247414 sim_pfm: 183.66873078144272
episode: 32 training return: tensor(-130.8712, device='cuda:0')
episode: 33 training return: tensor(-85.1926, device='cuda:0')
episode: 34 training return: tensor(-475.6421, device='cuda:0')
episode: 35 training return: tensor(64.9278, device='cuda:0')
epoch: 9 test_true_pfm: 2649.582846892642 sim_pfm: -236.80138878388485
episode: 36 training return: tensor(-57.7121, device='cuda:0')
episode: 37 training return: tensor(-371.2220, device='cuda:0')
episode: 38 training return: tensor(-241.5063, device='cuda:0')
episode: 39 training return: tensor(-304.5088, device='cuda:0')
epoch: 10 test_true_pfm: 2907.5945252953193 sim_pfm: 219.52357275427008
episode: 40 training return: tensor(-140.4941, device='cuda:0')
episode: 41 training return: tensor(220.6501, device='cuda:0')
episode: 42 training return: tensor(65.2543, device='cuda:0')
episode: 43 training return: tensor(-474.9580, device='cuda:0')
epoch: 11 test_true_pfm: 2992.6036917874735 sim_pfm: -245.83334202778255
episode: 44 training return: tensor(-377.8135, device='cuda:0')
episode: 45 training return: tensor(60.8130, device='cuda:0')
episode: 46 training return: tensor(-371.5096, device='cuda:0')
episode: 47 training return: tensor(-291.6754, device='cuda:0')
epoch: 12 test_true_pfm: 2822.161637552062 sim_pfm: 235.30521595025007
episode: 48 training return: tensor(-372.3130, device='cuda:0')
episode: 49 training return: tensor(-40.4416, device='cuda:0')
episode: 50 training return: tensor(127.1786, device='cuda:0')
episode: 51 training return: tensor(104.3287, device='cuda:0')
epoch: 13 test_true_pfm: 2788.9648215810244 sim_pfm: 55.384642532929625
episode: 52 training return: tensor(213.5129, device='cuda:0')
episode: 53 training return: tensor(116.1382, device='cuda:0')
episode: 54 training return: tensor(170.6457, device='cuda:0')
episode: 55 training return: tensor(171.2744, device='cuda:0')
epoch: 14 test_true_pfm: 2661.4812114281744 sim_pfm: -13.736536345861774
episode: 56 training return: tensor(162.6638, device='cuda:0')
episode: 57 training return: tensor(-201.8810, device='cuda:0')
episode: 58 training return: tensor(199.5509, device='cuda:0')
episode: 59 training return: tensor(-62.5467, device='cuda:0')
epoch: 15 test_true_pfm: 3197.0009322737346 sim_pfm: -20.259839809693705
episode: 60 training return: tensor(-171.2846, device='cuda:0')
episode: 61 training return: tensor(-283.3279, device='cuda:0')
episode: 62 training return: tensor(177.6918, device='cuda:0')
episode: 63 training return: tensor(151.8266, device='cuda:0')
epoch: 16 test_true_pfm: 3380.2171762694493 sim_pfm: 84.1920212242015
episode: 64 training return: tensor(173.4413, device='cuda:0')
episode: 65 training return: tensor(154.2932, device='cuda:0')
episode: 66 training return: tensor(226.6678, device='cuda:0')
episode: 67 training return: tensor(-242.4292, device='cuda:0')
epoch: 17 test_true_pfm: 3041.6389394226157 sim_pfm: -60.91864606757493
episode: 68 training return: tensor(91.4330, device='cuda:0')
episode: 69 training return: tensor(-234.0248, device='cuda:0')
episode: 70 training return: tensor(212.3827, device='cuda:0')
episode: 71 training return: tensor(-97.3622, device='cuda:0')
epoch: 18 test_true_pfm: 3390.9645254241236 sim_pfm: 151.57990141400174
episode: 72 training return: tensor(237.1036, device='cuda:0')
episode: 73 training return: tensor(-31.2289, device='cuda:0')
episode: 74 training return: tensor(-334.7048, device='cuda:0')
episode: 75 training return: tensor(120.7382, device='cuda:0')
epoch: 19 test_true_pfm: 2614.2818864689843 sim_pfm: 222.2541097185264
episode: 76 training return: tensor(168.1504, device='cuda:0')
episode: 77 training return: tensor(172.2086, device='cuda:0')
episode: 78 training return: tensor(-126.8334, device='cuda:0')
episode: 79 training return: tensor(-329.2913, device='cuda:0')
epoch: 20 test_true_pfm: 2890.221841960167 sim_pfm: 0.873048884406065
episode: 80 training return: tensor(233.4104, device='cuda:0')
episode: 81 training return: tensor(175.5330, device='cuda:0')
episode: 82 training return: tensor(85.1320, device='cuda:0')
episode: 83 training return: tensor(168.4730, device='cuda:0')
epoch: 21 test_true_pfm: 3192.8360785297996 sim_pfm: 161.51135027287333
episode: 84 training return: tensor(215.0551, device='cuda:0')
episode: 85 training return: tensor(-347.7010, device='cuda:0')
episode: 86 training return: tensor(-278.1189, device='cuda:0')
episode: 87 training return: tensor(-35.2032, device='cuda:0')
epoch: 22 test_true_pfm: 2862.134539621508 sim_pfm: -144.6796766872867
episode: 88 training return: tensor(234.1525, device='cuda:0')
episode: 89 training return: tensor(212.1213, device='cuda:0')
episode: 90 training return: tensor(147.9623, device='cuda:0')
episode: 91 training return: tensor(-210.4322, device='cuda:0')
epoch: 23 test_true_pfm: 3361.0140514660484 sim_pfm: 142.9956497886548
episode: 92 training return: tensor(198.1874, device='cuda:0')
episode: 93 training return: tensor(142.4464, device='cuda:0')
episode: 94 training return: tensor(182.3487, device='cuda:0')
episode: 95 training return: tensor(148.6638, device='cuda:0')
epoch: 24 test_true_pfm: 2881.0306570030566 sim_pfm: 100.25765899281639
episode: 96 training return: tensor(76.3997, device='cuda:0')
episode: 97 training return: tensor(205.8095, device='cuda:0')
episode: 98 training return: tensor(54.4926, device='cuda:0')
episode: 99 training return: tensor(136.2497, device='cuda:0')
epoch: 25 test_true_pfm: 3412.4359045051274 sim_pfm: 50.34962847889013
episode: 100 training return: tensor(24.1488, device='cuda:0')
episode: 101 training return: tensor(298.4419, device='cuda:0')
episode: 102 training return: tensor(177.1967, device='cuda:0')
episode: 103 training return: tensor(242.2701, device='cuda:0')
epoch: 26 test_true_pfm: 3330.9901780574237 sim_pfm: 164.31401600240497
episode: 104 training return: tensor(-85.2207, device='cuda:0')
episode: 105 training return: tensor(130.9446, device='cuda:0')
episode: 106 training return: tensor(153.6102, device='cuda:0')
episode: 107 training return: tensor(241.9912, device='cuda:0')
epoch: 27 test_true_pfm: 3312.8508761619655 sim_pfm: 193.288154362177
episode: 108 training return: tensor(-302.4355, device='cuda:0')
episode: 109 training return: tensor(210.7333, device='cuda:0')
episode: 110 training return: tensor(183.0571, device='cuda:0')
episode: 111 training return: tensor(340.8736, device='cuda:0')
epoch: 28 test_true_pfm: 3371.772473680869 sim_pfm: 190.85792200336195
episode: 112 training return: tensor(177.2821, device='cuda:0')
episode: 113 training return: tensor(104.9104, device='cuda:0')
episode: 114 training return: tensor(189.5061, device='cuda:0')
episode: 115 training return: tensor(99.8130, device='cuda:0')
epoch: 29 test_true_pfm: 3342.26896114074 sim_pfm: 161.50337567858514
episode: 116 training return: tensor(258.7903, device='cuda:0')
episode: 117 training return: tensor(188.6408, device='cuda:0')
episode: 118 training return: tensor(216.8069, device='cuda:0')
episode: 119 training return: tensor(178.1559, device='cuda:0')
epoch: 30 test_true_pfm: 3355.3370323517247 sim_pfm: 105.87443422091503
episode: 120 training return: tensor(92.2510, device='cuda:0')
episode: 121 training return: tensor(111.5230, device='cuda:0')
episode: 122 training return: tensor(184.4756, device='cuda:0')
episode: 123 training return: tensor(-176.1807, device='cuda:0')
epoch: 31 test_true_pfm: 3400.12072240234 sim_pfm: 195.08612670211974
episode: 124 training return: tensor(170.8245, device='cuda:0')
episode: 125 training return: tensor(-240.4895, device='cuda:0')
episode: 126 training return: tensor(171.1685, device='cuda:0')
episode: 127 training return: tensor(149.9910, device='cuda:0')
epoch: 32 test_true_pfm: 3413.3335567831996 sim_pfm: 168.3850248066495
episode: 128 training return: tensor(215.4286, device='cuda:0')
episode: 129 training return: tensor(-17.2704, device='cuda:0')
episode: 130 training return: tensor(123.3171, device='cuda:0')
episode: 131 training return: tensor(137.2417, device='cuda:0')
epoch: 33 test_true_pfm: 3342.6448027529295 sim_pfm: 177.81396806268216
episode: 132 training return: tensor(111.9558, device='cuda:0')
episode: 133 training return: tensor(109.8161, device='cuda:0')
episode: 134 training return: tensor(176.5185, device='cuda:0')
episode: 135 training return: tensor(146.1832, device='cuda:0')
epoch: 34 test_true_pfm: 2717.7227943280413 sim_pfm: 85.57327319618587
episode: 136 training return: tensor(189.7828, device='cuda:0')
episode: 137 training return: tensor(216.7771, device='cuda:0')
episode: 138 training return: tensor(169.1893, device='cuda:0')
episode: 139 training return: tensor(234.1844, device='cuda:0')
epoch: 35 test_true_pfm: 3361.402986615389 sim_pfm: 203.69908787568178
episode: 140 training return: tensor(202.8809, device='cuda:0')
episode: 141 training return: tensor(150.4573, device='cuda:0')
episode: 142 training return: tensor(136.3934, device='cuda:0')
episode: 143 training return: tensor(230.9262, device='cuda:0')
epoch: 36 test_true_pfm: 3172.180055388178 sim_pfm: 160.6933537926816
episode: 144 training return: tensor(-53.9180, device='cuda:0')
episode: 145 training return: tensor(-123.5805, device='cuda:0')
episode: 146 training return: tensor(16.6706, device='cuda:0')
episode: 147 training return: tensor(203.3125, device='cuda:0')
epoch: 37 test_true_pfm: 3423.242079377746 sim_pfm: 224.58688564695572
episode: 148 training return: tensor(21.8963, device='cuda:0')
episode: 149 training return: tensor(-48.9056, device='cuda:0')
episode: 150 training return: tensor(196.2392, device='cuda:0')
episode: 151 training return: tensor(163.5515, device='cuda:0')
epoch: 38 test_true_pfm: 3262.9870168616203 sim_pfm: 181.77937233884586
episode: 152 training return: tensor(147.0648, device='cuda:0')
episode: 153 training return: tensor(133.6574, device='cuda:0')
episode: 154 training return: tensor(172.9393, device='cuda:0')
episode: 155 training return: tensor(13.8452, device='cuda:0')
epoch: 39 test_true_pfm: 3392.9511770935 sim_pfm: 193.11472422175575
episode: 156 training return: tensor(83.3766, device='cuda:0')
episode: 157 training return: tensor(166.9823, device='cuda:0')
episode: 158 training return: tensor(28.4615, device='cuda:0')
episode: 159 training return: tensor(189.2296, device='cuda:0')
epoch: 40 test_true_pfm: 3341.7639441192227 sim_pfm: 104.2923902796465
episode: 160 training return: tensor(178.7732, device='cuda:0')
episode: 161 training return: tensor(135.4084, device='cuda:0')
episode: 162 training return: tensor(-14.5394, device='cuda:0')
episode: 163 training return: tensor(174.1521, device='cuda:0')
epoch: 41 test_true_pfm: 3340.1999056600202 sim_pfm: 117.49078657601301
episode: 164 training return: tensor(46.8543, device='cuda:0')
episode: 165 training return: tensor(198.7626, device='cuda:0')
episode: 166 training return: tensor(193.2548, device='cuda:0')
episode: 167 training return: tensor(205.2517, device='cuda:0')
epoch: 42 test_true_pfm: 3375.7301664901042 sim_pfm: 201.85400279055466
episode: 168 training return: tensor(104.9895, device='cuda:0')
episode: 169 training return: tensor(166.6948, device='cuda:0')
episode: 170 training return: tensor(133.2398, device='cuda:0')
episode: 171 training return: tensor(229.6788, device='cuda:0')
epoch: 43 test_true_pfm: 3387.3656415409373 sim_pfm: -32.81398074098009
episode: 172 training return: tensor(89.4938, device='cuda:0')
episode: 173 training return: tensor(156.2366, device='cuda:0')
episode: 174 training return: tensor(222.7460, device='cuda:0')
episode: 175 training return: tensor(273.3817, device='cuda:0')
epoch: 44 test_true_pfm: 3336.9781677962833 sim_pfm: 212.50219393817437
episode: 176 training return: tensor(-306.7912, device='cuda:0')
episode: 177 training return: tensor(199.6741, device='cuda:0')
episode: 178 training return: tensor(176.9210, device='cuda:0')
episode: 179 training return: tensor(265.2238, device='cuda:0')
epoch: 45 test_true_pfm: 1537.7016249231174 sim_pfm: 19.224990199921496
episode: 180 training return: tensor(100.5063, device='cuda:0')
episode: 181 training return: tensor(160.2563, device='cuda:0')
episode: 182 training return: tensor(202.8369, device='cuda:0')
episode: 183 training return: tensor(299.6615, device='cuda:0')
epoch: 46 test_true_pfm: 3387.609715794945 sim_pfm: 149.9029085888178
episode: 184 training return: tensor(185.7123, device='cuda:0')
episode: 185 training return: tensor(176.6246, device='cuda:0')
episode: 186 training return: tensor(-221.7545, device='cuda:0')
episode: 187 training return: tensor(170.7200, device='cuda:0')
epoch: 47 test_true_pfm: 3354.275663869965 sim_pfm: 199.66243801261103
episode: 188 training return: tensor(237.0833, device='cuda:0')
episode: 189 training return: tensor(-82.2259, device='cuda:0')
episode: 190 training return: tensor(-320.2164, device='cuda:0')
episode: 191 training return: tensor(156.7478, device='cuda:0')
epoch: 48 test_true_pfm: 3052.677508026072 sim_pfm: 69.0223375368223
episode: 192 training return: tensor(212.4519, device='cuda:0')
episode: 193 training return: tensor(154.9092, device='cuda:0')
episode: 194 training return: tensor(295.8990, device='cuda:0')
episode: 195 training return: tensor(-144.1635, device='cuda:0')
epoch: 49 test_true_pfm: 3424.905424653614 sim_pfm: 219.0837998269708
episode: 196 training return: tensor(153.8002, device='cuda:0')
episode: 197 training return: tensor(32.1473, device='cuda:0')
episode: 198 training return: tensor(108.2834, device='cuda:0')
episode: 199 training return: tensor(206.4035, device='cuda:0')
epoch: 50 test_true_pfm: 3373.7911017787615 sim_pfm: 207.90940355863617
episode: 200 training return: tensor(177.6437, device='cuda:0')
episode: 201 training return: tensor(334.3647, device='cuda:0')
episode: 202 training return: tensor(223.2452, device='cuda:0')
episode: 203 training return: tensor(-238.2240, device='cuda:0')
epoch: 51 test_true_pfm: 2896.9581007214824 sim_pfm: -5.098943038571936
episode: 204 training return: tensor(258.5073, device='cuda:0')
episode: 205 training return: tensor(262.4749, device='cuda:0')
episode: 206 training return: tensor(-147.3691, device='cuda:0')
episode: 207 training return: tensor(80.4150, device='cuda:0')
epoch: 52 test_true_pfm: 3366.9296541833087 sim_pfm: 162.39272826339584
episode: 208 training return: tensor(159.0743, device='cuda:0')
episode: 209 training return: tensor(156.9128, device='cuda:0')
episode: 210 training return: tensor(194.8345, device='cuda:0')
episode: 211 training return: tensor(214.8026, device='cuda:0')
epoch: 53 test_true_pfm: 3431.2706992634135 sim_pfm: 77.25671931677304
episode: 212 training return: tensor(173.5223, device='cuda:0')
episode: 213 training return: tensor(123.4000, device='cuda:0')
episode: 214 training return: tensor(116.3678, device='cuda:0')
episode: 215 training return: tensor(262.6085, device='cuda:0')
epoch: 54 test_true_pfm: 3361.3947100335586 sim_pfm: 214.7276688986361
episode: 216 training return: tensor(159.0608, device='cuda:0')
episode: 217 training return: tensor(153.4595, device='cuda:0')
episode: 218 training return: tensor(-42.6793, device='cuda:0')
episode: 219 training return: tensor(136.4285, device='cuda:0')
epoch: 55 test_true_pfm: 3322.213357831679 sim_pfm: 190.20860411947555
episode: 220 training return: tensor(-191.0272, device='cuda:0')
episode: 221 training return: tensor(151.6895, device='cuda:0')
episode: 222 training return: tensor(217.1306, device='cuda:0')
episode: 223 training return: tensor(166.5579, device='cuda:0')
epoch: 56 test_true_pfm: 3079.8510934384044 sim_pfm: 243.43165292784883
episode: 224 training return: tensor(205.3019, device='cuda:0')
episode: 225 training return: tensor(145.7729, device='cuda:0')
episode: 226 training return: tensor(114.0202, device='cuda:0')
episode: 227 training return: tensor(120.4000, device='cuda:0')
epoch: 57 test_true_pfm: 3375.719940392757 sim_pfm: 186.07600674585169
episode: 228 training return: tensor(-108.8988, device='cuda:0')
episode: 229 training return: tensor(-315.7344, device='cuda:0')
episode: 230 training return: tensor(41.9871, device='cuda:0')
episode: 231 training return: tensor(154.1300, device='cuda:0')
epoch: 58 test_true_pfm: 3391.194360365282 sim_pfm: 228.22587190475315
episode: 232 training return: tensor(233.5457, device='cuda:0')
episode: 233 training return: tensor(176.4820, device='cuda:0')
episode: 234 training return: tensor(231.6105, device='cuda:0')
episode: 235 training return: tensor(78.6801, device='cuda:0')
epoch: 59 test_true_pfm: 3306.155826159793 sim_pfm: 233.5109439821293
episode: 236 training return: tensor(-80.7252, device='cuda:0')
episode: 237 training return: tensor(164.9800, device='cuda:0')
episode: 238 training return: tensor(-363.3956, device='cuda:0')
episode: 239 training return: tensor(161.6747, device='cuda:0')
epoch: 60 test_true_pfm: 3401.7506735501142 sim_pfm: 201.53888844883963
episode: 240 training return: tensor(128.9355, device='cuda:0')
episode: 241 training return: tensor(179.2520, device='cuda:0')
episode: 242 training return: tensor(212.6194, device='cuda:0')
episode: 243 training return: tensor(156.5076, device='cuda:0')
epoch: 61 test_true_pfm: 3319.0497145610166 sim_pfm: 207.63959486275175
episode: 244 training return: tensor(155.9875, device='cuda:0')
episode: 245 training return: tensor(188.3254, device='cuda:0')
episode: 246 training return: tensor(209.6945, device='cuda:0')
episode: 247 training return: tensor(202.2148, device='cuda:0')
epoch: 62 test_true_pfm: 3409.7015124129302 sim_pfm: 227.98923907137942
episode: 248 training return: tensor(201.0312, device='cuda:0')
episode: 249 training return: tensor(251.4466, device='cuda:0')
episode: 250 training return: tensor(-393.7682, device='cuda:0')
episode: 251 training return: tensor(188.9182, device='cuda:0')
epoch: 63 test_true_pfm: 3408.916322831463 sim_pfm: 116.25127871640143
episode: 252 training return: tensor(190.3840, device='cuda:0')
episode: 253 training return: tensor(280.4445, device='cuda:0')
episode: 254 training return: tensor(175.0657, device='cuda:0')
episode: 255 training return: tensor(-385.9187, device='cuda:0')
epoch: 64 test_true_pfm: 3408.603617912908 sim_pfm: 200.7405888986929
episode: 256 training return: tensor(193.7099, device='cuda:0')
episode: 257 training return: tensor(-220.3032, device='cuda:0')
episode: 258 training return: tensor(116.0191, device='cuda:0')
episode: 259 training return: tensor(17.8259, device='cuda:0')
epoch: 65 test_true_pfm: 3401.241222058345 sim_pfm: 235.78459386720593
episode: 260 training return: tensor(153.3762, device='cuda:0')
episode: 261 training return: tensor(-335.6810, device='cuda:0')
episode: 262 training return: tensor(223.4036, device='cuda:0')
episode: 263 training return: tensor(203.7783, device='cuda:0')
epoch: 66 test_true_pfm: 3307.9871070225295 sim_pfm: 199.59036875263942
episode: 264 training return: tensor(168.4718, device='cuda:0')
episode: 265 training return: tensor(193.4088, device='cuda:0')
episode: 266 training return: tensor(248.7960, device='cuda:0')
episode: 267 training return: tensor(231.7373, device='cuda:0')
epoch: 67 test_true_pfm: 3415.806096847738 sim_pfm: 243.63897366049545
episode: 268 training return: tensor(-330.9386, device='cuda:0')
episode: 269 training return: tensor(159.7705, device='cuda:0')
episode: 270 training return: tensor(281.7040, device='cuda:0')
episode: 271 training return: tensor(199.2822, device='cuda:0')
epoch: 68 test_true_pfm: 3464.629047063554 sim_pfm: 277.28086569919833
episode: 272 training return: tensor(-24.8111, device='cuda:0')
episode: 273 training return: tensor(24.6691, device='cuda:0')
episode: 274 training return: tensor(-247.5061, device='cuda:0')
episode: 275 training return: tensor(96.5428, device='cuda:0')
epoch: 69 test_true_pfm: 3299.4632553862243 sim_pfm: 204.4571232375456
episode: 276 training return: tensor(258.6632, device='cuda:0')
episode: 277 training return: tensor(133.6425, device='cuda:0')
episode: 278 training return: tensor(249.6188, device='cuda:0')
episode: 279 training return: tensor(205.4868, device='cuda:0')
epoch: 70 test_true_pfm: 2823.92498878351 sim_pfm: 231.29442901590178
episode: 280 training return: tensor(245.6441, device='cuda:0')
episode: 281 training return: tensor(233.0612, device='cuda:0')
episode: 282 training return: tensor(241.7819, device='cuda:0')
episode: 283 training return: tensor(-119.7343, device='cuda:0')
epoch: 71 test_true_pfm: 2647.2958276895206 sim_pfm: 55.739791398208276
episode: 284 training return: tensor(236.1143, device='cuda:0')
episode: 285 training return: tensor(-290.3038, device='cuda:0')
episode: 286 training return: tensor(197.9951, device='cuda:0')
episode: 287 training return: tensor(165.6640, device='cuda:0')
epoch: 72 test_true_pfm: 3307.9165227926856 sim_pfm: 123.94702910831741
episode: 288 training return: tensor(207.2386, device='cuda:0')
episode: 289 training return: tensor(162.0045, device='cuda:0')
episode: 290 training return: tensor(180.1024, device='cuda:0')
episode: 291 training return: tensor(-99.2148, device='cuda:0')
epoch: 73 test_true_pfm: 3377.494504115269 sim_pfm: 195.04252118622148
episode: 292 training return: tensor(164.1604, device='cuda:0')
episode: 293 training return: tensor(263.4421, device='cuda:0')
episode: 294 training return: tensor(168.1785, device='cuda:0')
episode: 295 training return: tensor(215.2845, device='cuda:0')
epoch: 74 test_true_pfm: 3048.387170590353 sim_pfm: 120.46657259219016
episode: 296 training return: tensor(142.3499, device='cuda:0')
episode: 297 training return: tensor(66.1502, device='cuda:0')
episode: 298 training return: tensor(146.8710, device='cuda:0')
episode: 299 training return: tensor(228.4671, device='cuda:0')
epoch: 75 test_true_pfm: 3363.8326281257814 sim_pfm: 269.84133219030144
episode: 300 training return: tensor(-215.7342, device='cuda:0')
episode: 301 training return: tensor(191.2914, device='cuda:0')
episode: 302 training return: tensor(163.4089, device='cuda:0')
episode: 303 training return: tensor(214.4700, device='cuda:0')
epoch: 76 test_true_pfm: 3434.1378209544196 sim_pfm: 245.8195363478153
episode: 304 training return: tensor(184.5803, device='cuda:0')
episode: 305 training return: tensor(214.1048, device='cuda:0')
episode: 306 training return: tensor(-71.8809, device='cuda:0')
episode: 307 training return: tensor(-170.2016, device='cuda:0')
epoch: 77 test_true_pfm: 2981.1031077712487 sim_pfm: 219.0659465532323
episode: 308 training return: tensor(202.4332, device='cuda:0')
episode: 309 training return: tensor(157.6049, device='cuda:0')
episode: 310 training return: tensor(-60.4698, device='cuda:0')
episode: 311 training return: tensor(206.7345, device='cuda:0')
epoch: 78 test_true_pfm: 3039.5991862301253 sim_pfm: -252.896458373211
episode: 312 training return: tensor(196.7950, device='cuda:0')
episode: 313 training return: tensor(230.3618, device='cuda:0')
episode: 314 training return: tensor(145.1409, device='cuda:0')
episode: 315 training return: tensor(77.1810, device='cuda:0')
epoch: 79 test_true_pfm: 3262.0828355581275 sim_pfm: 244.6217958149597
episode: 316 training return: tensor(193.9064, device='cuda:0')
episode: 317 training return: tensor(304.7415, device='cuda:0')
episode: 318 training return: tensor(284.9086, device='cuda:0')
episode: 319 training return: tensor(277.6465, device='cuda:0')
epoch: 80 test_true_pfm: 3445.5185213929776 sim_pfm: 214.65971492867297
episode: 320 training return: tensor(214.7404, device='cuda:0')
episode: 321 training return: tensor(142.5098, device='cuda:0')
episode: 322 training return: tensor(183.8411, device='cuda:0')
episode: 323 training return: tensor(183.7956, device='cuda:0')
epoch: 81 test_true_pfm: 3288.03963226302 sim_pfm: 101.35832586626445
episode: 324 training return: tensor(-14.5616, device='cuda:0')
episode: 325 training return: tensor(159.0542, device='cuda:0')
episode: 326 training return: tensor(277.4557, device='cuda:0')
episode: 327 training return: tensor(59.8168, device='cuda:0')
epoch: 82 test_true_pfm: 3243.8667118767494 sim_pfm: 218.59195968773565
episode: 328 training return: tensor(215.0422, device='cuda:0')
episode: 329 training return: tensor(190.4807, device='cuda:0')
episode: 330 training return: tensor(241.3257, device='cuda:0')
episode: 331 training return: tensor(262.0726, device='cuda:0')
epoch: 83 test_true_pfm: 3435.116084529664 sim_pfm: 253.90497180138482
episode: 332 training return: tensor(376.9897, device='cuda:0')
episode: 333 training return: tensor(288.9500, device='cuda:0')
episode: 334 training return: tensor(241.8717, device='cuda:0')
episode: 335 training return: tensor(253.5471, device='cuda:0')
epoch: 84 test_true_pfm: 3410.1339256795454 sim_pfm: 278.51314535136527
episode: 336 training return: tensor(228.4834, device='cuda:0')
episode: 337 training return: tensor(11.6929, device='cuda:0')
episode: 338 training return: tensor(62.6793, device='cuda:0')
episode: 339 training return: tensor(184.2962, device='cuda:0')
epoch: 85 test_true_pfm: 2922.786008767158 sim_pfm: 209.15795938209826
episode: 340 training return: tensor(-371.9272, device='cuda:0')
episode: 341 training return: tensor(-249.8804, device='cuda:0')
episode: 342 training return: tensor(-173.9566, device='cuda:0')
episode: 343 training return: tensor(36.2282, device='cuda:0')
epoch: 86 test_true_pfm: 3434.930803340753 sim_pfm: 142.49369783926522
episode: 344 training return: tensor(200.0693, device='cuda:0')
episode: 345 training return: tensor(197.8606, device='cuda:0')
episode: 346 training return: tensor(167.0015, device='cuda:0')
episode: 347 training return: tensor(236.6189, device='cuda:0')
epoch: 87 test_true_pfm: 3394.6199496304257 sim_pfm: 201.60142430191627
episode: 348 training return: tensor(220.9695, device='cuda:0')
episode: 349 training return: tensor(291.0311, device='cuda:0')
episode: 350 training return: tensor(99.8147, device='cuda:0')
episode: 351 training return: tensor(158.7271, device='cuda:0')
epoch: 88 test_true_pfm: 3489.8454361952004 sim_pfm: 269.0965337131444
episode: 352 training return: tensor(317.8423, device='cuda:0')
episode: 353 training return: tensor(113.0335, device='cuda:0')
episode: 354 training return: tensor(191.3555, device='cuda:0')
episode: 355 training return: tensor(112.9556, device='cuda:0')
epoch: 89 test_true_pfm: 3421.236205323501 sim_pfm: 200.43963000612953
episode: 356 training return: tensor(7.4030, device='cuda:0')
episode: 357 training return: tensor(218.5964, device='cuda:0')
episode: 358 training return: tensor(190.1594, device='cuda:0')
episode: 359 training return: tensor(140.0873, device='cuda:0')
epoch: 90 test_true_pfm: 3373.5440658620632 sim_pfm: 230.58578069596356
episode: 360 training return: tensor(165.1268, device='cuda:0')
episode: 361 training return: tensor(258.4161, device='cuda:0')
episode: 362 training return: tensor(209.9182, device='cuda:0')
episode: 363 training return: tensor(-326.0958, device='cuda:0')
epoch: 91 test_true_pfm: 3351.305730030616 sim_pfm: 276.9752147340817
episode: 364 training return: tensor(222.1844, device='cuda:0')
episode: 365 training return: tensor(218.2122, device='cuda:0')
episode: 366 training return: tensor(220.5577, device='cuda:0')
episode: 367 training return: tensor(232.1233, device='cuda:0')
epoch: 92 test_true_pfm: 3433.315777080332 sim_pfm: 220.7374488665955
episode: 368 training return: tensor(-34.8368, device='cuda:0')
episode: 369 training return: tensor(319.5497, device='cuda:0')
episode: 370 training return: tensor(250.9991, device='cuda:0')
episode: 371 training return: tensor(279.9409, device='cuda:0')
epoch: 93 test_true_pfm: 3447.230423983836 sim_pfm: 185.23505144088026
episode: 372 training return: tensor(247.5308, device='cuda:0')
episode: 373 training return: tensor(106.0587, device='cuda:0')
episode: 374 training return: tensor(255.9114, device='cuda:0')
episode: 375 training return: tensor(202.1101, device='cuda:0')
epoch: 94 test_true_pfm: 3425.552360680636 sim_pfm: 178.41914250690024
episode: 376 training return: tensor(232.0417, device='cuda:0')
episode: 377 training return: tensor(195.9729, device='cuda:0')
episode: 378 training return: tensor(164.9777, device='cuda:0')
episode: 379 training return: tensor(165.5867, device='cuda:0')
epoch: 95 test_true_pfm: 3486.9160946454626 sim_pfm: 282.6448030147779
episode: 380 training return: tensor(193.7932, device='cuda:0')
episode: 381 training return: tensor(163.0896, device='cuda:0')
episode: 382 training return: tensor(237.4645, device='cuda:0')
episode: 383 training return: tensor(-222.3875, device='cuda:0')
epoch: 96 test_true_pfm: 3407.7675294704327 sim_pfm: 236.4420636597788
episode: 384 training return: tensor(-320.8024, device='cuda:0')
episode: 385 training return: tensor(233.4086, device='cuda:0')
episode: 386 training return: tensor(258.0133, device='cuda:0')
episode: 387 training return: tensor(229.6567, device='cuda:0')
epoch: 97 test_true_pfm: 3403.2650232500037 sim_pfm: 240.0658455832163
episode: 388 training return: tensor(233.1374, device='cuda:0')
episode: 389 training return: tensor(200.7087, device='cuda:0')
episode: 390 training return: tensor(251.6300, device='cuda:0')
episode: 391 training return: tensor(159.2510, device='cuda:0')
epoch: 98 test_true_pfm: 3396.2401147943133 sim_pfm: 277.9342860144
episode: 392 training return: tensor(195.6951, device='cuda:0')
episode: 393 training return: tensor(-440.1958, device='cuda:0')
episode: 394 training return: tensor(-41.9513, device='cuda:0')
episode: 395 training return: tensor(183.7455, device='cuda:0')
epoch: 99 test_true_pfm: 3388.4706837778053 sim_pfm: 253.81665788896498
episode: 396 training return: tensor(200.6128, device='cuda:0')
episode: 397 training return: tensor(256.3429, device='cuda:0')
episode: 398 training return: tensor(143.6443, device='cuda:0')
episode: 399 training return: tensor(-247.6506, device='cuda:0')
epoch: 100 test_true_pfm: 3482.4709536312766 sim_pfm: 276.56023752414814
episode: 400 training return: tensor(234.9074, device='cuda:0')
episode: 401 training return: tensor(-30.5037, device='cuda:0')
episode: 402 training return: tensor(240.0265, device='cuda:0')
episode: 403 training return: tensor(189.3309, device='cuda:0')
epoch: 101 test_true_pfm: 2989.0228137331674 sim_pfm: 97.21878374717198
episode: 404 training return: tensor(-66.3583, device='cuda:0')
episode: 405 training return: tensor(187.7568, device='cuda:0')
episode: 406 training return: tensor(214.0370, device='cuda:0')
episode: 407 training return: tensor(-159.0492, device='cuda:0')
epoch: 102 test_true_pfm: 3028.4632865432322 sim_pfm: 140.4144462356926
episode: 408 training return: tensor(175.9330, device='cuda:0')
episode: 409 training return: tensor(142.0848, device='cuda:0')
episode: 410 training return: tensor(260.6068, device='cuda:0')
episode: 411 training return: tensor(209.7566, device='cuda:0')
epoch: 103 test_true_pfm: 3422.347667849965 sim_pfm: 194.82158824514286
episode: 412 training return: tensor(-80.9600, device='cuda:0')
episode: 413 training return: tensor(163.1665, device='cuda:0')
episode: 414 training return: tensor(220.1613, device='cuda:0')
episode: 415 training return: tensor(200.9038, device='cuda:0')
epoch: 104 test_true_pfm: 3474.8184745146696 sim_pfm: 217.66888722082754
episode: 416 training return: tensor(264.4162, device='cuda:0')
episode: 417 training return: tensor(171.7620, device='cuda:0')
episode: 418 training return: tensor(216.4464, device='cuda:0')
episode: 419 training return: tensor(211.9162, device='cuda:0')
epoch: 105 test_true_pfm: 3452.1610012481756 sim_pfm: 291.22934560945333
episode: 420 training return: tensor(-369.5596, device='cuda:0')
episode: 421 training return: tensor(186.2280, device='cuda:0')
episode: 422 training return: tensor(285.5246, device='cuda:0')
episode: 423 training return: tensor(233.6138, device='cuda:0')
epoch: 106 test_true_pfm: 3402.6256033763384 sim_pfm: 266.91963327565463
episode: 424 training return: tensor(227.9704, device='cuda:0')
episode: 425 training return: tensor(98.9556, device='cuda:0')
episode: 426 training return: tensor(246.9988, device='cuda:0')
episode: 427 training return: tensor(228.7465, device='cuda:0')
epoch: 107 test_true_pfm: 3424.1969851557756 sim_pfm: 217.74048328694576
episode: 428 training return: tensor(31.9740, device='cuda:0')
episode: 429 training return: tensor(269.6419, device='cuda:0')
episode: 430 training return: tensor(280.6688, device='cuda:0')
episode: 431 training return: tensor(-249.9851, device='cuda:0')
epoch: 108 test_true_pfm: 2911.0758929257586 sim_pfm: 235.9368510322432
episode: 432 training return: tensor(214.9388, device='cuda:0')
episode: 433 training return: tensor(-43.9316, device='cuda:0')
episode: 434 training return: tensor(263.8180, device='cuda:0')
episode: 435 training return: tensor(155.9189, device='cuda:0')
epoch: 109 test_true_pfm: 3449.5691389350436 sim_pfm: 108.42319569927834
episode: 436 training return: tensor(153.3733, device='cuda:0')
episode: 437 training return: tensor(286.2146, device='cuda:0')
episode: 438 training return: tensor(148.2699, device='cuda:0')
episode: 439 training return: tensor(-50.7570, device='cuda:0')
epoch: 110 test_true_pfm: 3411.306125847139 sim_pfm: 256.14789988513803
episode: 440 training return: tensor(147.0007, device='cuda:0')
episode: 441 training return: tensor(229.3354, device='cuda:0')
episode: 442 training return: tensor(257.8133, device='cuda:0')
episode: 443 training return: tensor(325.0987, device='cuda:0')
epoch: 111 test_true_pfm: 3382.262454369303 sim_pfm: 232.73912001305143
episode: 444 training return: tensor(-325.9081, device='cuda:0')
episode: 445 training return: tensor(322.0667, device='cuda:0')
episode: 446 training return: tensor(206.3266, device='cuda:0')
episode: 447 training return: tensor(311.1598, device='cuda:0')
epoch: 112 test_true_pfm: 3411.930965640004 sim_pfm: 223.95393636024286
episode: 448 training return: tensor(195.0952, device='cuda:0')
episode: 449 training return: tensor(-115.9554, device='cuda:0')
episode: 450 training return: tensor(196.1956, device='cuda:0')
episode: 451 training return: tensor(32.1918, device='cuda:0')
epoch: 113 test_true_pfm: 3380.6987545820757 sim_pfm: 129.0206463135352
episode: 452 training return: tensor(244.0607, device='cuda:0')
episode: 453 training return: tensor(204.4155, device='cuda:0')
episode: 454 training return: tensor(187.5547, device='cuda:0')
episode: 455 training return: tensor(262.3481, device='cuda:0')
epoch: 114 test_true_pfm: 3385.0349691905926 sim_pfm: 222.06777765099346
episode: 456 training return: tensor(263.8758, device='cuda:0')
episode: 457 training return: tensor(258.3410, device='cuda:0')
episode: 458 training return: tensor(211.6345, device='cuda:0')
episode: 459 training return: tensor(-84.5269, device='cuda:0')
epoch: 115 test_true_pfm: 3476.127806650478 sim_pfm: 252.53861275033947
episode: 460 training return: tensor(229.6885, device='cuda:0')
episode: 461 training return: tensor(267.9476, device='cuda:0')
episode: 462 training return: tensor(224.1543, device='cuda:0')
episode: 463 training return: tensor(191.3546, device='cuda:0')
epoch: 116 test_true_pfm: 3343.6989800923725 sim_pfm: 145.94598526874324
episode: 464 training return: tensor(180.6372, device='cuda:0')
episode: 465 training return: tensor(185.8554, device='cuda:0')
episode: 466 training return: tensor(-347.3553, device='cuda:0')
episode: 467 training return: tensor(206.5868, device='cuda:0')
epoch: 117 test_true_pfm: 3092.3898871423976 sim_pfm: 283.57847340465133
episode: 468 training return: tensor(223.8287, device='cuda:0')
episode: 469 training return: tensor(191.6370, device='cuda:0')
episode: 470 training return: tensor(211.6574, device='cuda:0')
episode: 471 training return: tensor(176.4607, device='cuda:0')
epoch: 118 test_true_pfm: 3415.2158184188233 sim_pfm: 230.51635193181573
episode: 472 training return: tensor(271.5689, device='cuda:0')
episode: 473 training return: tensor(244.2827, device='cuda:0')
episode: 474 training return: tensor(260.8534, device='cuda:0')
episode: 475 training return: tensor(-335.3731, device='cuda:0')
epoch: 119 test_true_pfm: 3305.080951226127 sim_pfm: 239.56473189790267
episode: 476 training return: tensor(161.7494, device='cuda:0')
episode: 477 training return: tensor(252.4627, device='cuda:0')
episode: 478 training return: tensor(-183.9382, device='cuda:0')
episode: 479 training return: tensor(205.4499, device='cuda:0')
epoch: 120 test_true_pfm: 3008.1116985994736 sim_pfm: 228.50979066628497
episode: 480 training return: tensor(286.1918, device='cuda:0')
episode: 481 training return: tensor(136.0253, device='cuda:0')
episode: 482 training return: tensor(36.0332, device='cuda:0')
episode: 483 training return: tensor(270.6170, device='cuda:0')
epoch: 121 test_true_pfm: 3445.8091193471696 sim_pfm: 242.8582581600058
episode: 484 training return: tensor(269.8319, device='cuda:0')
episode: 485 training return: tensor(-88.0133, device='cuda:0')
episode: 486 training return: tensor(298.3620, device='cuda:0')
episode: 487 training return: tensor(262.1573, device='cuda:0')
epoch: 122 test_true_pfm: 3408.676457865646 sim_pfm: 171.72792804440056
episode: 488 training return: tensor(228.0189, device='cuda:0')
episode: 489 training return: tensor(317.9514, device='cuda:0')
episode: 490 training return: tensor(247.2621, device='cuda:0')
episode: 491 training return: tensor(187.9982, device='cuda:0')
epoch: 123 test_true_pfm: 3433.6122681380743 sim_pfm: 256.42646521072794
episode: 492 training return: tensor(255.9290, device='cuda:0')
episode: 493 training return: tensor(193.0223, device='cuda:0')
episode: 494 training return: tensor(208.9913, device='cuda:0')
episode: 495 training return: tensor(146.4571, device='cuda:0')
epoch: 124 test_true_pfm: 3404.793769664471 sim_pfm: 230.70388783844342
episode: 496 training return: tensor(154.6662, device='cuda:0')
episode: 497 training return: tensor(174.9032, device='cuda:0')
episode: 498 training return: tensor(220.0969, device='cuda:0')
episode: 499 training return: tensor(241.8118, device='cuda:0')
epoch: 125 test_true_pfm: 3453.8822631277003 sim_pfm: 105.90216942490467
episode: 500 training return: tensor(173.0739, device='cuda:0')
episode: 501 training return: tensor(168.2292, device='cuda:0')
episode: 502 training return: tensor(169.4229, device='cuda:0')
episode: 503 training return: tensor(310.6922, device='cuda:0')
epoch: 126 test_true_pfm: 3491.364142254019 sim_pfm: 286.24166684691835
episode: 504 training return: tensor(163.2966, device='cuda:0')
episode: 505 training return: tensor(59.4319, device='cuda:0')
episode: 506 training return: tensor(210.8334, device='cuda:0')
episode: 507 training return: tensor(291.6404, device='cuda:0')
epoch: 127 test_true_pfm: 3479.5359937056687 sim_pfm: 193.44069670457006
episode: 508 training return: tensor(60.5130, device='cuda:0')
episode: 509 training return: tensor(251.6229, device='cuda:0')
episode: 510 training return: tensor(108.8006, device='cuda:0')
episode: 511 training return: tensor(160.6599, device='cuda:0')
epoch: 128 test_true_pfm: 3475.3789745933777 sim_pfm: 284.4053320903913
episode: 512 training return: tensor(-146.3215, device='cuda:0')
episode: 513 training return: tensor(261.7049, device='cuda:0')
episode: 514 training return: tensor(271.9168, device='cuda:0')
episode: 515 training return: tensor(210.4820, device='cuda:0')
epoch: 129 test_true_pfm: 3506.9689031575417 sim_pfm: 159.1424133821371
episode: 516 training return: tensor(254.3064, device='cuda:0')
episode: 517 training return: tensor(256.9455, device='cuda:0')
episode: 518 training return: tensor(260.6814, device='cuda:0')
episode: 519 training return: tensor(52.3795, device='cuda:0')
epoch: 130 test_true_pfm: 3454.922648858477 sim_pfm: 299.7055283713659
episode: 520 training return: tensor(-77.0425, device='cuda:0')
episode: 521 training return: tensor(190.5080, device='cuda:0')
episode: 522 training return: tensor(-405.0133, device='cuda:0')
episode: 523 training return: tensor(-228.4587, device='cuda:0')
epoch: 131 test_true_pfm: 3413.942166127439 sim_pfm: 209.17055732483277
episode: 524 training return: tensor(184.7341, device='cuda:0')
episode: 525 training return: tensor(56.8706, device='cuda:0')
episode: 526 training return: tensor(-15.2881, device='cuda:0')
episode: 527 training return: tensor(212.2991, device='cuda:0')
epoch: 132 test_true_pfm: 3310.252318188062 sim_pfm: 256.6758310289394
episode: 528 training return: tensor(262.6179, device='cuda:0')
episode: 529 training return: tensor(244.0425, device='cuda:0')
episode: 530 training return: tensor(280.5947, device='cuda:0')
episode: 531 training return: tensor(278.6595, device='cuda:0')
epoch: 133 test_true_pfm: 3436.1023131448806 sim_pfm: 237.50193056246886
episode: 532 training return: tensor(225.5124, device='cuda:0')
episode: 533 training return: tensor(206.5412, device='cuda:0')
episode: 534 training return: tensor(214.3268, device='cuda:0')
episode: 535 training return: tensor(218.3503, device='cuda:0')
epoch: 134 test_true_pfm: 3463.447089130473 sim_pfm: 262.1259781062351
episode: 536 training return: tensor(211.4024, device='cuda:0')
episode: 537 training return: tensor(19.7642, device='cuda:0')
episode: 538 training return: tensor(312.1195, device='cuda:0')
episode: 539 training return: tensor(216.9478, device='cuda:0')
epoch: 135 test_true_pfm: 3393.499304863993 sim_pfm: 247.5974429761991
episode: 540 training return: tensor(244.3954, device='cuda:0')
episode: 541 training return: tensor(274.4027, device='cuda:0')
episode: 542 training return: tensor(230.1385, device='cuda:0')
episode: 543 training return: tensor(268.9228, device='cuda:0')
epoch: 136 test_true_pfm: 3396.309075442161 sim_pfm: 235.53352513131298
episode: 544 training return: tensor(260.8222, device='cuda:0')
episode: 545 training return: tensor(271.5154, device='cuda:0')
episode: 546 training return: tensor(273.7790, device='cuda:0')
episode: 547 training return: tensor(185.3747, device='cuda:0')
epoch: 137 test_true_pfm: 3454.9443183485487 sim_pfm: 221.30665610174765
episode: 548 training return: tensor(232.2267, device='cuda:0')
episode: 549 training return: tensor(289.7817, device='cuda:0')
episode: 550 training return: tensor(215.3835, device='cuda:0')
episode: 551 training return: tensor(186.9756, device='cuda:0')
epoch: 138 test_true_pfm: 3448.6700115652243 sim_pfm: 240.58521317062937
episode: 552 training return: tensor(-91.6325, device='cuda:0')
episode: 553 training return: tensor(198.0873, device='cuda:0')
episode: 554 training return: tensor(210.1024, device='cuda:0')
episode: 555 training return: tensor(235.5521, device='cuda:0')
epoch: 139 test_true_pfm: 3490.838843124932 sim_pfm: 260.57452501769876
episode: 556 training return: tensor(215.4422, device='cuda:0')
episode: 557 training return: tensor(218.3514, device='cuda:0')
episode: 558 training return: tensor(193.1093, device='cuda:0')
episode: 559 training return: tensor(300.2491, device='cuda:0')
epoch: 140 test_true_pfm: 3411.511242864935 sim_pfm: 272.85963473271113
episode: 560 training return: tensor(241.7062, device='cuda:0')
episode: 561 training return: tensor(205.8212, device='cuda:0')
episode: 562 training return: tensor(232.6450, device='cuda:0')
episode: 563 training return: tensor(259.1832, device='cuda:0')
epoch: 141 test_true_pfm: 3435.7098908516055 sim_pfm: 151.9022295367613
episode: 564 training return: tensor(36.9414, device='cuda:0')
episode: 565 training return: tensor(199.2369, device='cuda:0')
episode: 566 training return: tensor(178.4172, device='cuda:0')
episode: 567 training return: tensor(-227.6274, device='cuda:0')
epoch: 142 test_true_pfm: 3401.946337796029 sim_pfm: 232.3356857978894
episode: 568 training return: tensor(285.2752, device='cuda:0')
episode: 569 training return: tensor(-215.2570, device='cuda:0')
episode: 570 training return: tensor(260.4152, device='cuda:0')
episode: 571 training return: tensor(179.5835, device='cuda:0')
epoch: 143 test_true_pfm: 3418.6845080122002 sim_pfm: 197.03771639589104
episode: 572 training return: tensor(247.1152, device='cuda:0')
episode: 573 training return: tensor(147.3483, device='cuda:0')
episode: 574 training return: tensor(-16.8286, device='cuda:0')
episode: 575 training return: tensor(276.8388, device='cuda:0')
epoch: 144 test_true_pfm: 3361.847099820809 sim_pfm: 224.93236396597544
episode: 576 training return: tensor(-300.6359, device='cuda:0')
episode: 577 training return: tensor(209.4756, device='cuda:0')
episode: 578 training return: tensor(217.6048, device='cuda:0')
episode: 579 training return: tensor(216.9246, device='cuda:0')
epoch: 145 test_true_pfm: 3379.889791738758 sim_pfm: 202.00352002863656
episode: 580 training return: tensor(251.8567, device='cuda:0')
episode: 581 training return: tensor(131.1778, device='cuda:0')
episode: 582 training return: tensor(199.5748, device='cuda:0')
episode: 583 training return: tensor(192.9329, device='cuda:0')
epoch: 146 test_true_pfm: 3428.0005456960735 sim_pfm: 257.66393861713976
episode: 584 training return: tensor(237.9812, device='cuda:0')
episode: 585 training return: tensor(-340.7974, device='cuda:0')
episode: 586 training return: tensor(206.3441, device='cuda:0')
episode: 587 training return: tensor(-144.0275, device='cuda:0')
epoch: 147 test_true_pfm: 3399.930260326413 sim_pfm: 202.1184595489273
episode: 588 training return: tensor(192.1902, device='cuda:0')
episode: 589 training return: tensor(230.7414, device='cuda:0')
episode: 590 training return: tensor(230.1688, device='cuda:0')
episode: 591 training return: tensor(211.0197, device='cuda:0')
epoch: 148 test_true_pfm: 3425.209829924832 sim_pfm: 240.74154507244626
episode: 592 training return: tensor(-303.4373, device='cuda:0')
episode: 593 training return: tensor(290.0909, device='cuda:0')
episode: 594 training return: tensor(212.7882, device='cuda:0')
episode: 595 training return: tensor(251.4355, device='cuda:0')
epoch: 149 test_true_pfm: 3456.9151208793974 sim_pfm: 252.36895658931462
episode: 596 training return: tensor(216.8956, device='cuda:0')
episode: 597 training return: tensor(267.0596, device='cuda:0')
episode: 598 training return: tensor(215.2898, device='cuda:0')
episode: 599 training return: tensor(174.9516, device='cuda:0')
epoch: 150 test_true_pfm: 3463.8966268207027 sim_pfm: 246.8685375249673
