['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '2']
epoch: 0 training_loss 0.23873786352574825 test_loss: 0.21930432319641113
epoch: 1 training_loss 0.20625046506524086 test_loss: 0.19181286096572875
epoch: 2 training_loss 0.19645639434456824 test_loss: 0.20122456550598145
epoch: 3 training_loss 0.1962898004055023 test_loss: 0.19176963567733765
epoch: 4 training_loss 0.18587602719664573 test_loss: 0.18810900449752807
epoch: 5 training_loss 0.18653616953641175 test_loss: 0.19390615224838256
epoch: 6 training_loss 0.1943249949067831 test_loss: 0.18489223718643188
epoch: 7 training_loss 0.18841619908809662 test_loss: 0.1877669334411621
epoch: 8 training_loss 0.1883782897144556 test_loss: 0.19751925468444825
epoch: 9 training_loss 0.18137908168137074 test_loss: 0.16458579301834106
epoch: 10 training_loss 0.18562193408608438 test_loss: 0.19717707633972167
epoch: 11 training_loss 0.1863697098940611 test_loss: 0.18363702297210693
epoch: 12 training_loss 0.17730872847139836 test_loss: 0.1895938515663147
epoch: 13 training_loss 0.19093342550098896 test_loss: 0.19356757402420044
epoch: 14 training_loss 0.18303498379886152 test_loss: 0.18173232078552246
epoch: 15 training_loss 0.18422857850790023 test_loss: 0.19386376142501832
epoch: 16 training_loss 0.18172848291695118 test_loss: 0.1795991063117981
epoch: 17 training_loss 0.18349808134138584 test_loss: 0.19163979291915895
epoch: 18 training_loss 0.17807389572262763 test_loss: 0.18632711172103883
epoch: 19 training_loss 0.18350101687014103 test_loss: 0.17630352973937988
epoch: 20 training_loss 0.18472449183464051 test_loss: 0.188133704662323
epoch: 21 training_loss 0.1785721206665039 test_loss: 0.18403586149215698
epoch: 22 training_loss 0.18950829334557057 test_loss: 0.1854089617729187
epoch: 23 training_loss 0.18388435773551465 test_loss: 0.18074454069137574
epoch: 24 training_loss 0.184566155821085 test_loss: 0.18050141334533693
epoch: 25 training_loss 0.17764295935630797 test_loss: 0.18257211446762084
epoch: 26 training_loss 0.17993837811052799 test_loss: 0.2044077157974243
epoch: 27 training_loss 0.17959296010434628 test_loss: 0.18108718395233153
epoch: 28 training_loss 0.1886013789474964 test_loss: 0.18475308418273925
epoch: 29 training_loss 0.18033542148768902 test_loss: 0.18236184120178223
epoch: 30 training_loss 0.18406434074044228 test_loss: 0.17730314731597902
epoch: 31 training_loss 0.1816028906404972 test_loss: 0.18437730073928832
epoch: 32 training_loss 0.18791257679462434 test_loss: 0.17481616735458375
epoch: 33 training_loss 0.18067599833011627 test_loss: 0.1678004026412964
epoch: 34 training_loss 0.18811424590647222 test_loss: 0.17212677001953125
epoch: 35 training_loss 0.18237144455313684 test_loss: 0.19258933067321776
epoch: 36 training_loss 0.1820848400890827 test_loss: 0.18176900148391723
epoch: 37 training_loss 0.17905704006552697 test_loss: 0.21056752204895018
epoch: 38 training_loss 0.18505575522780418 test_loss: 0.17026432752609252
epoch: 39 training_loss 0.18257362715899944 test_loss: 0.19134597778320311
epoch: 40 training_loss 0.1908585859835148 test_loss: 0.18814674615859986
epoch: 41 training_loss 0.1873818728327751 test_loss: 0.18508991003036498
epoch: 42 training_loss 0.18180617958307266 test_loss: 0.17657798528671265
epoch: 43 training_loss 0.1823119765520096 test_loss: 0.16862199306488038
epoch: 44 training_loss 0.17806526370346545 test_loss: 0.1608884572982788
epoch: 45 training_loss 0.1775208105146885 test_loss: 0.17347309589385987
epoch: 46 training_loss 0.17778426237404346 test_loss: 0.18681726455688477
epoch: 47 training_loss 0.1759924693405628 test_loss: 0.18910478353500365
epoch: 48 training_loss 0.17972911775112152 test_loss: 0.17428504228591918
epoch: 49 training_loss 0.18543787091970443 test_loss: 0.1789410948753357
epoch: 50 training_loss 0.17856764681637288 test_loss: 0.16400066614151002
epoch: 51 training_loss 0.17916529141366483 test_loss: 0.18414704799652098
epoch: 52 training_loss 0.1786297857016325 test_loss: 0.1983805775642395
epoch: 53 training_loss 0.17818670324981212 test_loss: 0.1874448537826538
epoch: 54 training_loss 0.1806030734628439 test_loss: 0.18187758922576905
epoch: 55 training_loss 0.1772543731331825 test_loss: 0.18704595565795898
epoch: 56 training_loss 0.175870800614357 test_loss: 0.1953532099723816
epoch: 57 training_loss 0.18384553402662276 test_loss: 0.1713621735572815
epoch: 58 training_loss 0.18185663983225822 test_loss: 0.1783615231513977
epoch: 59 training_loss 0.18334840163588523 test_loss: 0.18210840225219727
epoch: 60 training_loss 0.17972352422773838 test_loss: 0.18435598611831666
epoch: 61 training_loss 0.18335725262761116 test_loss: 0.18012053966522218
epoch: 62 training_loss 0.17547218956053257 test_loss: 0.18386768102645873
epoch: 63 training_loss 0.18160330258309842 test_loss: 0.19366413354873657
epoch: 64 training_loss 0.17504328034818173 test_loss: 0.18814499378204347
epoch: 65 training_loss 0.1802967470139265 test_loss: 0.19232404232025146
epoch: 66 training_loss 0.17734565414488315 test_loss: 0.18161978721618652
epoch: 67 training_loss 0.18221638947725297 test_loss: 0.1839248299598694
epoch: 68 training_loss 0.17946779921650888 test_loss: 0.175493586063385
epoch: 69 training_loss 0.17519831657409668 test_loss: 0.1751147985458374
epoch: 70 training_loss 0.1810486476868391 test_loss: 0.1862505555152893
epoch: 71 training_loss 0.1787810155004263 test_loss: 0.16830190420150756
epoch: 72 training_loss 0.18238588772714137 test_loss: 0.18056951761245726
epoch: 73 training_loss 0.17685019299387933 test_loss: 0.1703296422958374
epoch: 74 training_loss 0.17557644218206406 test_loss: 0.1761623740196228
epoch: 75 training_loss 0.1893643918633461 test_loss: 0.1797891616821289
epoch: 76 training_loss 0.18267540641129018 test_loss: 0.18391429185867308
epoch: 77 training_loss 0.17675980925559998 test_loss: 0.17132058143615722
epoch: 78 training_loss 0.16993270702660085 test_loss: 0.192869234085083
epoch: 79 training_loss 0.17481646552681923 test_loss: 0.1944156289100647
epoch: 80 training_loss 0.18424391746520996 test_loss: 0.17460190057754515
epoch: 81 training_loss 0.1760583321750164 test_loss: 0.1915154218673706
epoch: 82 training_loss 0.17798874288797378 test_loss: 0.19039090871810913
epoch: 83 training_loss 0.18029229633510113 test_loss: 0.18589791059494018
epoch: 84 training_loss 0.17222431540489197 test_loss: 0.17398220300674438
epoch: 85 training_loss 0.1739766924828291 test_loss: 0.17244493961334229
epoch: 86 training_loss 0.17495509885251523 test_loss: 0.182076358795166
epoch: 87 training_loss 0.17641108408570289 test_loss: 0.17663724422454835
epoch: 88 training_loss 0.1765226626396179 test_loss: 0.1798829674720764
epoch: 89 training_loss 0.18213577926158905 test_loss: 0.19876413345336913
epoch: 90 training_loss 0.18253720670938492 test_loss: 0.18468750715255738
epoch: 91 training_loss 0.1719081585109234 test_loss: 0.18663291931152343
epoch: 92 training_loss 0.17971346780657768 test_loss: 0.1791370391845703
epoch: 93 training_loss 0.17629676949232817 test_loss: 0.1713263750076294
epoch: 94 training_loss 0.18071201466023923 test_loss: 0.18776215314865113
epoch: 95 training_loss 0.17743615433573723 test_loss: 0.185140860080719
epoch: 96 training_loss 0.17724824115633964 test_loss: 0.17377389669418336
epoch: 97 training_loss 0.1806454236805439 test_loss: 0.18426029682159423
epoch: 98 training_loss 0.17903962813317775 test_loss: 0.17587248086929322
epoch: 99 training_loss 0.18798440270125866 test_loss: 0.18954081535339357
epoch: 100 training_loss 0.16808522872626783 test_loss: 0.20036008358001708
epoch: 101 training_loss 0.18183589473366737 test_loss: 0.18412294387817382
epoch: 102 training_loss 0.1799002755433321 test_loss: 0.1704744815826416
epoch: 103 training_loss 0.17758799731731414 test_loss: 0.1619456648826599
epoch: 104 training_loss 0.1741578634083271 test_loss: 0.17929980754852295
epoch: 105 training_loss 0.1831241462379694 test_loss: 0.19588502645492553
epoch: 106 training_loss 0.17808046486228704 test_loss: 0.18261829614639283
epoch: 107 training_loss 0.18384229592978954 test_loss: 0.19112688302993774
epoch: 108 training_loss 0.18196194507181646 test_loss: 0.1729024052619934
epoch: 109 training_loss 0.1842358599603176 test_loss: 0.1803066611289978
epoch: 110 training_loss 0.17632962621748446 test_loss: 0.18308324813842775
epoch: 111 training_loss 0.18400831937789916 test_loss: 0.17751836776733398
epoch: 112 training_loss 0.17354410354048014 test_loss: 0.18086671829223633
epoch: 113 training_loss 0.17490626998245717 test_loss: 0.1896979570388794
epoch: 114 training_loss 0.18145639177411796 test_loss: 0.1778058886528015
epoch: 115 training_loss 0.1834596274793148 test_loss: 0.18568840026855468
epoch: 116 training_loss 0.17767056435346604 test_loss: 0.1702286720275879
epoch: 117 training_loss 0.1719335065037012 test_loss: 0.18753433227539062
epoch: 118 training_loss 0.1836093782633543 test_loss: 0.17535200119018554
epoch: 119 training_loss 0.17146544359624386 test_loss: 0.17000268697738646
epoch: 120 training_loss 0.1834119588881731 test_loss: 0.17239779233932495
epoch: 121 training_loss 0.17847745589911937 test_loss: 0.16929415464401246
epoch: 122 training_loss 0.17569838255643844 test_loss: 0.18072437047958373
epoch: 123 training_loss 0.17365136861801148 test_loss: 0.189952552318573
epoch: 124 training_loss 0.1812051698565483 test_loss: 0.1847728967666626
epoch: 125 training_loss 0.17086288012564183 test_loss: 0.19592024087905885
epoch: 126 training_loss 0.17718281760811805 test_loss: 0.19288960695266724
epoch: 127 training_loss 0.18309777706861496 test_loss: 0.17364890575408937
epoch: 128 training_loss 0.1809107008576393 test_loss: 0.15961453914642335
epoch: 129 training_loss 0.17154621563851832 test_loss: 0.19166767597198486
epoch: 130 training_loss 0.1831187279522419 test_loss: 0.18198885917663574
epoch: 131 training_loss 0.17994745396077633 test_loss: 0.17440036535263062
epoch: 132 training_loss 0.17099739365279676 test_loss: 0.18573614358901977
epoch: 133 training_loss 0.17020223125815392 test_loss: 0.19247690439224244
epoch: 134 training_loss 0.17799401454627514 test_loss: 0.18384859561920167
epoch: 135 training_loss 0.18331177428364753 test_loss: 0.17603193521499633
epoch: 136 training_loss 0.1740794875472784 test_loss: 0.1798074722290039
epoch: 137 training_loss 0.18273802418261767 test_loss: 0.1912372350692749
epoch: 138 training_loss 0.1815874980390072 test_loss: 0.17940282821655273
epoch: 139 training_loss 0.17602266982197762 test_loss: 0.16558003425598145
epoch: 140 training_loss 0.17562551140785218 test_loss: 0.18544427156448365
epoch: 141 training_loss 0.17599218264222144 test_loss: 0.18468772172927855
epoch: 142 training_loss 0.18656386367976666 test_loss: 0.18846466541290283
epoch: 143 training_loss 0.17512831710278987 test_loss: 0.15848807096481324
epoch: 144 training_loss 0.181192886531353 test_loss: 0.1894541025161743
epoch: 145 training_loss 0.18225901812314987 test_loss: 0.18468456268310546
epoch: 146 training_loss 0.1845662224292755 test_loss: 0.17358318567276002
epoch: 147 training_loss 0.17715713053941726 test_loss: 0.18038307428359984
epoch: 148 training_loss 0.17690340906381607 test_loss: 0.1732232928276062
epoch: 149 training_loss 0.1836304260790348 test_loss: 0.17755788564682007
epoch: 0 training_loss 8.917442197799682 test_loss: 5.4532115936279295
epoch: 1 training_loss 4.2086835694313045 test_loss: 3.311627960205078
epoch: 2 training_loss 2.8356977891921997 test_loss: 2.4229631423950195
epoch: 3 training_loss 2.224341516494751 test_loss: 2.068836784362793
epoch: 4 training_loss 1.9074685859680176 test_loss: 1.7620010375976562
epoch: 5 training_loss 1.745248512029648 test_loss: 1.6428438186645509
epoch: 6 training_loss 1.553086165189743 test_loss: 1.5214166641235352
epoch: 7 training_loss 1.442579607963562 test_loss: 1.4297147750854493
epoch: 8 training_loss 1.361301258802414 test_loss: 1.3419000625610351
epoch: 9 training_loss 1.287895019054413 test_loss: 1.2473904609680175
epoch: 10 training_loss 1.2289282965660095 test_loss: 1.2562463760375977
epoch: 11 training_loss 1.193579807281494 test_loss: 1.1568151473999024
epoch: 12 training_loss 1.1555083626508713 test_loss: 1.1509276390075684
epoch: 13 training_loss 1.1238358622789384 test_loss: 1.1077077865600586
epoch: 14 training_loss 1.0826750963926315 test_loss: 1.0805869102478027
epoch: 15 training_loss 1.055270089507103 test_loss: 1.0244830131530762
epoch: 16 training_loss 1.0258041042089463 test_loss: 0.9772804260253907
epoch: 17 training_loss 1.0040716105699539 test_loss: 0.9889703750610351
epoch: 18 training_loss 0.9767177772521972 test_loss: 0.9552745819091797
epoch: 19 training_loss 0.962032618522644 test_loss: 0.9155729293823243
epoch: 20 training_loss 0.9637123310565948 test_loss: 0.9295085906982422
epoch: 21 training_loss 0.9047501075267792 test_loss: 0.9393988609313965
epoch: 22 training_loss 0.9131648558378219 test_loss: 0.8846563339233399
epoch: 23 training_loss 0.9038071256875991 test_loss: 0.8982892990112304
epoch: 24 training_loss 0.8883854883909226 test_loss: 0.8647113800048828
epoch: 25 training_loss 0.870090371966362 test_loss: 0.9117125511169434
epoch: 26 training_loss 0.8758932608366012 test_loss: 0.8256690025329589
epoch: 27 training_loss 0.8537687265872955 test_loss: 0.9149610519409179
epoch: 28 training_loss 0.8354885149002075 test_loss: 0.8314126014709473
epoch: 29 training_loss 0.8352617847919465 test_loss: 0.8245566368103028
epoch: 30 training_loss 0.817854146361351 test_loss: 0.8134035110473633
epoch: 31 training_loss 0.8181898677349091 test_loss: 0.8095941543579102
epoch: 32 training_loss 0.7931353169679641 test_loss: 0.7828942775726319
epoch: 33 training_loss 0.7836493742465973 test_loss: 0.809318733215332
epoch: 34 training_loss 0.8111336618661881 test_loss: 0.7684750556945801
epoch: 35 training_loss 0.7772908574342727 test_loss: 0.7697917461395264
epoch: 36 training_loss 0.7651257115602493 test_loss: 0.7828577995300293
epoch: 37 training_loss 0.7777316898107529 test_loss: 0.7662394046783447
epoch: 38 training_loss 0.7443513506650925 test_loss: 0.747564697265625
epoch: 39 training_loss 0.7526998674869537 test_loss: 0.7833523273468017
epoch: 40 training_loss 0.7420375108718872 test_loss: 0.7522319793701172
epoch: 41 training_loss 0.7580463749170303 test_loss: 0.7188438415527344
epoch: 42 training_loss 0.7339820611476898 test_loss: 0.7132468223571777
epoch: 43 training_loss 0.7196864849328994 test_loss: 0.7312161922454834
epoch: 44 training_loss 0.7259356904029847 test_loss: 0.7308613300323487
epoch: 45 training_loss 0.7194038617610932 test_loss: 0.7402278900146484
epoch: 46 training_loss 0.7128065997362136 test_loss: 0.7301858425140381
epoch: 47 training_loss 0.710423714518547 test_loss: 0.6899546623229981
epoch: 48 training_loss 0.7009663766622544 test_loss: 0.7144739151000976
epoch: 49 training_loss 0.6952699518203735 test_loss: 0.6923344135284424
epoch: 50 training_loss 0.6948046851158142 test_loss: 0.693745756149292
epoch: 51 training_loss 0.7080827063322067 test_loss: 0.6777132034301758
epoch: 52 training_loss 0.679400742650032 test_loss: 0.6722061634063721
epoch: 53 training_loss 0.683720759153366 test_loss: 0.6661415576934815
epoch: 54 training_loss 0.6650680327415466 test_loss: 0.6755329132080078
epoch: 55 training_loss 0.6849040198326111 test_loss: 0.6448064327239991
epoch: 56 training_loss 0.6609659945964813 test_loss: 0.6588722229003906
epoch: 57 training_loss 0.670248208642006 test_loss: 0.6788917064666748
epoch: 58 training_loss 0.670153174996376 test_loss: 0.6638332366943359
epoch: 59 training_loss 0.6748661202192306 test_loss: 0.643631887435913
epoch: 60 training_loss 0.6524089300632476 test_loss: 0.655907678604126
epoch: 61 training_loss 0.6677647870779038 test_loss: 0.6927079200744629
epoch: 62 training_loss 0.6636317175626755 test_loss: 0.6483072757720947
epoch: 63 training_loss 0.647926670908928 test_loss: 0.6330135345458985
epoch: 64 training_loss 0.6350134462118149 test_loss: 0.6485084056854248
epoch: 65 training_loss 0.6569164061546325 test_loss: 0.6244439125061035
epoch: 66 training_loss 0.6415540295839309 test_loss: 0.640149450302124
epoch: 67 training_loss 0.6285687464475632 test_loss: 0.6216925144195556
epoch: 68 training_loss 0.6442912846803666 test_loss: 0.6297205448150635
epoch: 69 training_loss 0.6375780266523361 test_loss: 0.6513394355773926
epoch: 70 training_loss 0.6440469908714295 test_loss: 0.6268587112426758
epoch: 71 training_loss 0.6298670411109925 test_loss: 0.6334670066833497
epoch: 72 training_loss 0.6149596744775772 test_loss: 0.6225197315216064
epoch: 73 training_loss 0.6243522709608078 test_loss: 0.6114454746246338
epoch: 74 training_loss 0.6274484688043594 test_loss: 0.6121432781219482
epoch: 75 training_loss 0.6173328185081481 test_loss: 0.6013921737670899
epoch: 76 training_loss 0.6098056733608246 test_loss: 0.601844310760498
epoch: 77 training_loss 0.6034416890144348 test_loss: 0.6244406223297119
epoch: 78 training_loss 0.6119596165418625 test_loss: 0.6213894844055176
epoch: 79 training_loss 0.6070224505662918 test_loss: 0.6258044719696045
epoch: 80 training_loss 0.5984068763256073 test_loss: 0.6154109001159668
epoch: 81 training_loss 0.602815546989441 test_loss: 0.590280294418335
epoch: 82 training_loss 0.5913641476631164 test_loss: 0.6003653526306152
epoch: 83 training_loss 0.6047773867845535 test_loss: 0.6295106887817383
epoch: 84 training_loss 0.6161352264881134 test_loss: 0.5785907745361328
epoch: 85 training_loss 0.5931928098201752 test_loss: 0.5690301418304443
epoch: 86 training_loss 0.5938004821538925 test_loss: 0.6035483360290528
epoch: 87 training_loss 0.5970826059579849 test_loss: 0.5700914859771729
epoch: 88 training_loss 0.5925608366727829 test_loss: 0.5781724929809571
epoch: 89 training_loss 0.5953590905666352 test_loss: 0.5606518268585206
epoch: 90 training_loss 0.5804623878002166 test_loss: 0.5740837574005127
epoch: 91 training_loss 0.5890186375379562 test_loss: 0.607216215133667
epoch: 92 training_loss 0.5783382725715637 test_loss: 0.6082730293273926
epoch: 93 training_loss 0.5844071817398071 test_loss: 0.5994887351989746
epoch: 94 training_loss 0.5852996683120728 test_loss: 0.5825277328491211
epoch: 95 training_loss 0.5840229046344757 test_loss: 0.6352973937988281
epoch: 96 training_loss 0.5876900148391724 test_loss: 0.5875569820404053
epoch: 97 training_loss 0.5779175740480423 test_loss: 0.5525542259216308
epoch: 98 training_loss 0.5757789671421051 test_loss: 0.6052024364471436
epoch: 99 training_loss 0.5693764466047287 test_loss: 0.5406349658966064
epoch: 100 training_loss 0.5615226092934609 test_loss: 0.5558248519897461
epoch: 101 training_loss 0.5718434610962868 test_loss: 0.564776611328125
epoch: 102 training_loss 0.5660557106137276 test_loss: 0.5642785549163818
epoch: 103 training_loss 0.5669692242145539 test_loss: 0.602082347869873
epoch: 104 training_loss 0.5704288554191589 test_loss: 0.566280746459961
epoch: 105 training_loss 0.5591140082478523 test_loss: 0.5572424411773682
epoch: 106 training_loss 0.5587804207205772 test_loss: 0.5375726699829102
epoch: 107 training_loss 0.5712310749292374 test_loss: 0.5554862022399902
epoch: 108 training_loss 0.5726262751221657 test_loss: 0.5350275039672852
epoch: 109 training_loss 0.5518019437789917 test_loss: 0.5663797855377197
epoch: 110 training_loss 0.5581980031728745 test_loss: 0.5338231563568115
epoch: 111 training_loss 0.5620205813646316 test_loss: 0.5672458171844482
epoch: 112 training_loss 0.5619893604516983 test_loss: 0.5446995735168457
epoch: 113 training_loss 0.560169757604599 test_loss: 0.5471282958984375
epoch: 114 training_loss 0.5509268909692764 test_loss: 0.5683238506317139
epoch: 115 training_loss 0.5479535520076751 test_loss: 0.557755708694458
epoch: 116 training_loss 0.5669817972183228 test_loss: 0.5601040840148925
epoch: 117 training_loss 0.5465308526158332 test_loss: 0.5376543998718262
epoch: 118 training_loss 0.5456797844171524 test_loss: 0.6040606021881103
epoch: 119 training_loss 0.5689758384227752 test_loss: 0.5516393184661865
epoch: 120 training_loss 0.5450246489048004 test_loss: 0.5345429897308349
epoch: 121 training_loss 0.5427564913034439 test_loss: 0.5348194599151611
epoch: 122 training_loss 0.5465229731798172 test_loss: 0.533314561843872
epoch: 123 training_loss 0.5433439087867736 test_loss: 0.5306209087371826
epoch: 124 training_loss 0.5388787856698036 test_loss: 0.5328375816345214
epoch: 125 training_loss 0.5606594854593276 test_loss: 0.5465943813323975
epoch: 126 training_loss 0.5490041783452034 test_loss: 0.5995832920074463
epoch: 127 training_loss 0.5432548996806145 test_loss: 0.5224120616912842
epoch: 128 training_loss 0.5277854576706886 test_loss: 0.5287396907806396
epoch: 129 training_loss 0.54725213855505 test_loss: 0.5452305793762207
epoch: 130 training_loss 0.5371137610077859 test_loss: 0.5398211002349853
epoch: 131 training_loss 0.5449206185340881 test_loss: 0.5358973979949951
epoch: 132 training_loss 0.5364058488607406 test_loss: 0.5579054355621338
epoch: 133 training_loss 0.5300238612294197 test_loss: 0.5191257953643799
epoch: 134 training_loss 0.5332102128863334 test_loss: 0.5364693641662598
epoch: 135 training_loss 0.5290545326471329 test_loss: 0.5241124153137207
epoch: 136 training_loss 0.5247397556900978 test_loss: 0.5132172584533692
epoch: 137 training_loss 0.5327540874481201 test_loss: 0.5415952682495118
epoch: 138 training_loss 0.5419566944241524 test_loss: 0.5330423355102539
epoch: 139 training_loss 0.5245673701167106 test_loss: 0.5304216861724853
epoch: 140 training_loss 0.5175672712922096 test_loss: 0.5146722793579102
epoch: 141 training_loss 0.5212699735164642 test_loss: 0.5273575782775879
epoch: 142 training_loss 0.5378326404094697 test_loss: 0.5121986389160156
epoch: 143 training_loss 0.5269330102205276 test_loss: 0.5271396160125732
epoch: 144 training_loss 0.5207142424583435 test_loss: 0.5176709651947021
epoch: 145 training_loss 0.5288294127583504 test_loss: 0.5451256275177002
epoch: 146 training_loss 0.5295603150129318 test_loss: 0.5412302494049073
epoch: 147 training_loss 0.5198347589373589 test_loss: 0.5344282150268554
epoch: 148 training_loss 0.5181019100546836 test_loss: 0.5322065353393555
epoch: 149 training_loss 0.5165195009112358 test_loss: 0.5077231407165528
2489.2522657571217
episode: 0 training return: tensor(295.0811, device='cuda:0')
episode: 1 training return: tensor(-243.1180, device='cuda:0')
episode: 2 training return: tensor(354.0963, device='cuda:0')
episode: 3 training return: tensor(-298.3529, device='cuda:0')
epoch: 1 test_true_pfm: 2733.262042810308 sim_pfm: 183.77518152166158
episode: 4 training return: tensor(346.3348, device='cuda:0')
episode: 5 training return: tensor(-186.3469, device='cuda:0')
episode: 6 training return: tensor(-293.9430, device='cuda:0')
episode: 7 training return: tensor(343.6824, device='cuda:0')
epoch: 2 test_true_pfm: 2790.9860375305434 sim_pfm: -14.379233194515109
episode: 8 training return: tensor(356.6383, device='cuda:0')
episode: 9 training return: tensor(331.2432, device='cuda:0')
episode: 10 training return: tensor(-269.4446, device='cuda:0')
episode: 11 training return: tensor(-284.0278, device='cuda:0')
epoch: 3 test_true_pfm: 2568.38227188095 sim_pfm: 133.02630131629607
episode: 12 training return: tensor(-350.7986, device='cuda:0')
episode: 13 training return: tensor(-23.0742, device='cuda:0')
episode: 14 training return: tensor(-304.3075, device='cuda:0')
episode: 15 training return: tensor(-90.4630, device='cuda:0')
epoch: 4 test_true_pfm: 1804.292535576208 sim_pfm: 116.4124895259738
episode: 16 training return: tensor(-265.2277, device='cuda:0')
episode: 17 training return: tensor(302.5777, device='cuda:0')
episode: 18 training return: tensor(224.6017, device='cuda:0')
episode: 19 training return: tensor(22.0635, device='cuda:0')
epoch: 5 test_true_pfm: 2748.1648925096556 sim_pfm: 15.240719448775053
episode: 20 training return: tensor(-251.1945, device='cuda:0')
episode: 21 training return: tensor(-50.9205, device='cuda:0')
episode: 22 training return: tensor(345.4038, device='cuda:0')
episode: 23 training return: tensor(-299.0276, device='cuda:0')
epoch: 6 test_true_pfm: 3285.9982727559895 sim_pfm: -238.70097247185186
episode: 24 training return: tensor(387.5696, device='cuda:0')
episode: 25 training return: tensor(-111.3066, device='cuda:0')
episode: 26 training return: tensor(-191.8378, device='cuda:0')
episode: 27 training return: tensor(335.1763, device='cuda:0')
epoch: 7 test_true_pfm: 2737.6448670701634 sim_pfm: -32.515559378856175
episode: 28 training return: tensor(-5.1112, device='cuda:0')
episode: 29 training return: tensor(359.2279, device='cuda:0')
episode: 30 training return: tensor(-237.7094, device='cuda:0')
episode: 31 training return: tensor(-298.8124, device='cuda:0')
epoch: 8 test_true_pfm: 2064.4738283765782 sim_pfm: 302.2173612106514
episode: 32 training return: tensor(-244.5497, device='cuda:0')
episode: 33 training return: tensor(-296.1009, device='cuda:0')
episode: 34 training return: tensor(-148.2365, device='cuda:0')
episode: 35 training return: tensor(391.1003, device='cuda:0')
epoch: 9 test_true_pfm: 2744.6785976234496 sim_pfm: 45.86932307241174
episode: 36 training return: tensor(40.5469, device='cuda:0')
episode: 37 training return: tensor(-99.1077, device='cuda:0')
episode: 38 training return: tensor(351.6465, device='cuda:0')
episode: 39 training return: tensor(-66.7240, device='cuda:0')
epoch: 10 test_true_pfm: 2376.605891208153 sim_pfm: -127.26887559068079
episode: 40 training return: tensor(294.4968, device='cuda:0')
episode: 41 training return: tensor(202.3016, device='cuda:0')
episode: 42 training return: tensor(-294.3388, device='cuda:0')
episode: 43 training return: tensor(-239.6754, device='cuda:0')
epoch: 11 test_true_pfm: 1974.401300147814 sim_pfm: -153.7885367324343
episode: 44 training return: tensor(-279.1613, device='cuda:0')
episode: 45 training return: tensor(120.2598, device='cuda:0')
episode: 46 training return: tensor(72.5822, device='cuda:0')
episode: 47 training return: tensor(-210.7879, device='cuda:0')
epoch: 12 test_true_pfm: 2544.4441411346484 sim_pfm: 163.2480928149695
episode: 48 training return: tensor(-116.7608, device='cuda:0')
episode: 49 training return: tensor(-235.1387, device='cuda:0')
episode: 50 training return: tensor(361.8858, device='cuda:0')
episode: 51 training return: tensor(-34.1827, device='cuda:0')
epoch: 13 test_true_pfm: 3055.3859631030477 sim_pfm: -0.5354899847103903
episode: 52 training return: tensor(247.6283, device='cuda:0')
episode: 53 training return: tensor(-344.4066, device='cuda:0')
episode: 54 training return: tensor(274.1002, device='cuda:0')
episode: 55 training return: tensor(-217.9611, device='cuda:0')
epoch: 14 test_true_pfm: 2438.0442419852075 sim_pfm: 35.43724568533556
episode: 56 training return: tensor(204.9843, device='cuda:0')
episode: 57 training return: tensor(-32.6224, device='cuda:0')
episode: 58 training return: tensor(327.3523, device='cuda:0')
episode: 59 training return: tensor(-29.8362, device='cuda:0')
epoch: 15 test_true_pfm: 2716.2026297969296 sim_pfm: 266.7848243491414
episode: 60 training return: tensor(368.1121, device='cuda:0')
episode: 61 training return: tensor(-239.0677, device='cuda:0')
episode: 62 training return: tensor(-327.4387, device='cuda:0')
episode: 63 training return: tensor(345.8345, device='cuda:0')
epoch: 16 test_true_pfm: 2715.3389426318263 sim_pfm: -26.094010495435214
episode: 64 training return: tensor(364.5052, device='cuda:0')
episode: 65 training return: tensor(196.2993, device='cuda:0')
episode: 66 training return: tensor(-98.0362, device='cuda:0')
episode: 67 training return: tensor(-11.2813, device='cuda:0')
epoch: 17 test_true_pfm: 2533.9589349699927 sim_pfm: 136.2825019895487
episode: 68 training return: tensor(-249.7729, device='cuda:0')
episode: 69 training return: tensor(-207.4484, device='cuda:0')
episode: 70 training return: tensor(-336.5013, device='cuda:0')
episode: 71 training return: tensor(-146.9268, device='cuda:0')
epoch: 18 test_true_pfm: 2324.3959128764836 sim_pfm: 178.90444133492807
episode: 72 training return: tensor(-232.1217, device='cuda:0')
episode: 73 training return: tensor(-272.7887, device='cuda:0')
episode: 74 training return: tensor(77.8275, device='cuda:0')
episode: 75 training return: tensor(359.5098, device='cuda:0')
epoch: 19 test_true_pfm: 2264.559199322844 sim_pfm: 63.90136306079997
episode: 76 training return: tensor(-240.4242, device='cuda:0')
episode: 77 training return: tensor(-192.3239, device='cuda:0')
episode: 78 training return: tensor(19.5336, device='cuda:0')
episode: 79 training return: tensor(-12.9167, device='cuda:0')
epoch: 20 test_true_pfm: 3214.615632861307 sim_pfm: 118.97483599015202
episode: 80 training return: tensor(388.2975, device='cuda:0')
episode: 81 training return: tensor(355.7320, device='cuda:0')
episode: 82 training return: tensor(-286.4948, device='cuda:0')
episode: 83 training return: tensor(95.9309, device='cuda:0')
epoch: 21 test_true_pfm: 2210.0283585007637 sim_pfm: 269.75348718913546
episode: 84 training return: tensor(76.3684, device='cuda:0')
episode: 85 training return: tensor(-230.6419, device='cuda:0')
episode: 86 training return: tensor(-41.7856, device='cuda:0')
episode: 87 training return: tensor(339.9305, device='cuda:0')
epoch: 22 test_true_pfm: 2038.263392735873 sim_pfm: 227.24882635404356
episode: 88 training return: tensor(292.5640, device='cuda:0')
episode: 89 training return: tensor(338.8445, device='cuda:0')
episode: 90 training return: tensor(328.9308, device='cuda:0')
episode: 91 training return: tensor(358.2366, device='cuda:0')
epoch: 23 test_true_pfm: 2675.7269781081272 sim_pfm: -25.539973417510435
episode: 92 training return: tensor(107.5447, device='cuda:0')
episode: 93 training return: tensor(-199.9384, device='cuda:0')
episode: 94 training return: tensor(384.2347, device='cuda:0')
episode: 95 training return: tensor(-198.6765, device='cuda:0')
epoch: 24 test_true_pfm: 2560.5261124562167 sim_pfm: 355.67735676149215
episode: 96 training return: tensor(-232.2679, device='cuda:0')
episode: 97 training return: tensor(-129.8015, device='cuda:0')
episode: 98 training return: tensor(90.1926, device='cuda:0')
episode: 99 training return: tensor(-77.0613, device='cuda:0')
epoch: 25 test_true_pfm: 3303.5177651786857 sim_pfm: 155.30623629650412
episode: 100 training return: tensor(201.4398, device='cuda:0')
episode: 101 training return: tensor(-137.8954, device='cuda:0')
episode: 102 training return: tensor(-56.6592, device='cuda:0')
episode: 103 training return: tensor(-262.1042, device='cuda:0')
epoch: 26 test_true_pfm: 3098.029606653914 sim_pfm: 265.1271851028626
episode: 104 training return: tensor(-292.6453, device='cuda:0')
episode: 105 training return: tensor(8.4073, device='cuda:0')
episode: 106 training return: tensor(-167.0924, device='cuda:0')
episode: 107 training return: tensor(-190.4920, device='cuda:0')
epoch: 27 test_true_pfm: 3003.0201114020674 sim_pfm: -26.596206140859675
episode: 108 training return: tensor(385.4908, device='cuda:0')
episode: 109 training return: tensor(188.0673, device='cuda:0')
episode: 110 training return: tensor(-95.4692, device='cuda:0')
episode: 111 training return: tensor(237.0001, device='cuda:0')
epoch: 28 test_true_pfm: 2182.419964586682 sim_pfm: 45.83312263529903
episode: 112 training return: tensor(-340.2822, device='cuda:0')
episode: 113 training return: tensor(-183.5675, device='cuda:0')
episode: 114 training return: tensor(-133.9136, device='cuda:0')
episode: 115 training return: tensor(-84.0255, device='cuda:0')
epoch: 29 test_true_pfm: 2179.745760847633 sim_pfm: -73.48622962262016
episode: 116 training return: tensor(-46.6844, device='cuda:0')
episode: 117 training return: tensor(91.2145, device='cuda:0')
episode: 118 training return: tensor(392.0897, device='cuda:0')
episode: 119 training return: tensor(348.9290, device='cuda:0')
epoch: 30 test_true_pfm: 2574.3984855936405 sim_pfm: -58.410467675110944
episode: 120 training return: tensor(387.9201, device='cuda:0')
episode: 121 training return: tensor(-210.5200, device='cuda:0')
episode: 122 training return: tensor(332.5664, device='cuda:0')
episode: 123 training return: tensor(32.7500, device='cuda:0')
epoch: 31 test_true_pfm: 3228.471215715645 sim_pfm: -201.8136544742932
episode: 124 training return: tensor(-157.4283, device='cuda:0')
episode: 125 training return: tensor(341.3360, device='cuda:0')
episode: 126 training return: tensor(264.3956, device='cuda:0')
episode: 127 training return: tensor(-177.1092, device='cuda:0')
epoch: 32 test_true_pfm: 2176.549365878192 sim_pfm: -1.054522361800385
episode: 128 training return: tensor(351.3963, device='cuda:0')
episode: 129 training return: tensor(370.8435, device='cuda:0')
episode: 130 training return: tensor(-245.6769, device='cuda:0')
episode: 131 training return: tensor(-225.5340, device='cuda:0')
epoch: 33 test_true_pfm: 2258.7243230081017 sim_pfm: 197.52803957702903
episode: 132 training return: tensor(304.9285, device='cuda:0')
episode: 133 training return: tensor(-164.1075, device='cuda:0')
episode: 134 training return: tensor(63.3826, device='cuda:0')
episode: 135 training return: tensor(358.9540, device='cuda:0')
epoch: 34 test_true_pfm: 2188.117466459598 sim_pfm: -28.94692798909576
episode: 136 training return: tensor(136.5645, device='cuda:0')
episode: 137 training return: tensor(338.2808, device='cuda:0')
episode: 138 training return: tensor(97.5933, device='cuda:0')
episode: 139 training return: tensor(349.4677, device='cuda:0')
epoch: 35 test_true_pfm: 1900.2919605137074 sim_pfm: 163.85244529601187
episode: 140 training return: tensor(13.7479, device='cuda:0')
episode: 141 training return: tensor(62.4662, device='cuda:0')
episode: 142 training return: tensor(356.0744, device='cuda:0')
episode: 143 training return: tensor(-245.1222, device='cuda:0')
epoch: 36 test_true_pfm: 2390.967484065598 sim_pfm: 52.03623674251139
episode: 144 training return: tensor(-56.9389, device='cuda:0')
episode: 145 training return: tensor(-343.9920, device='cuda:0')
episode: 146 training return: tensor(232.4724, device='cuda:0')
episode: 147 training return: tensor(-344.1805, device='cuda:0')
epoch: 37 test_true_pfm: 2227.0711822136577 sim_pfm: 340.4773501930176
episode: 148 training return: tensor(156.4684, device='cuda:0')
episode: 149 training return: tensor(44.3358, device='cuda:0')
episode: 150 training return: tensor(349.2005, device='cuda:0')
episode: 151 training return: tensor(-34.7959, device='cuda:0')
epoch: 38 test_true_pfm: 2774.4608778594743 sim_pfm: -32.91446614839757
episode: 152 training return: tensor(-7.9005, device='cuda:0')
episode: 153 training return: tensor(-184.1855, device='cuda:0')
episode: 154 training return: tensor(105.2611, device='cuda:0')
episode: 155 training return: tensor(279.1795, device='cuda:0')
epoch: 39 test_true_pfm: 2153.5541281635738 sim_pfm: -102.3967889380292
episode: 156 training return: tensor(61.1088, device='cuda:0')
episode: 157 training return: tensor(-305.7172, device='cuda:0')
episode: 158 training return: tensor(-75.5186, device='cuda:0')
episode: 159 training return: tensor(221.2266, device='cuda:0')
epoch: 40 test_true_pfm: 2855.382088361828 sim_pfm: -3.4698257260024548
episode: 160 training return: tensor(-178.5444, device='cuda:0')
episode: 161 training return: tensor(-207.8212, device='cuda:0')
episode: 162 training return: tensor(135.9683, device='cuda:0')
episode: 163 training return: tensor(422.3199, device='cuda:0')
epoch: 41 test_true_pfm: 2197.379611175628 sim_pfm: 8.375182780437171
episode: 164 training return: tensor(129.2295, device='cuda:0')
episode: 165 training return: tensor(375.9930, device='cuda:0')
episode: 166 training return: tensor(46.2880, device='cuda:0')
episode: 167 training return: tensor(28.9684, device='cuda:0')
epoch: 42 test_true_pfm: 2886.4322504311226 sim_pfm: -14.25593954968887
episode: 168 training return: tensor(339.7563, device='cuda:0')
episode: 169 training return: tensor(-101.1692, device='cuda:0')
episode: 170 training return: tensor(250.0157, device='cuda:0')
episode: 171 training return: tensor(389.4494, device='cuda:0')
epoch: 43 test_true_pfm: 2261.9808072216665 sim_pfm: -14.029715426266193
episode: 172 training return: tensor(310.5911, device='cuda:0')
episode: 173 training return: tensor(-224.2681, device='cuda:0')
episode: 174 training return: tensor(-62.5409, device='cuda:0')
episode: 175 training return: tensor(351.5627, device='cuda:0')
epoch: 44 test_true_pfm: 3113.6630793512923 sim_pfm: 57.65286444399195
episode: 176 training return: tensor(-312.6010, device='cuda:0')
episode: 177 training return: tensor(58.7173, device='cuda:0')
episode: 178 training return: tensor(-240.2418, device='cuda:0')
episode: 179 training return: tensor(-281.8098, device='cuda:0')
epoch: 45 test_true_pfm: 2683.4516537863083 sim_pfm: -56.047868116564736
episode: 180 training return: tensor(-342.8976, device='cuda:0')
episode: 181 training return: tensor(-168.5991, device='cuda:0')
episode: 182 training return: tensor(328.9843, device='cuda:0')
episode: 183 training return: tensor(182.3853, device='cuda:0')
epoch: 46 test_true_pfm: 2000.0978576154164 sim_pfm: 145.9623262961783
episode: 184 training return: tensor(-111.8635, device='cuda:0')
episode: 185 training return: tensor(-70.4482, device='cuda:0')
episode: 186 training return: tensor(267.2709, device='cuda:0')
episode: 187 training return: tensor(387.6030, device='cuda:0')
epoch: 47 test_true_pfm: 2715.2493851333015 sim_pfm: -75.65544307488017
episode: 188 training return: tensor(-229.3365, device='cuda:0')
episode: 189 training return: tensor(366.6790, device='cuda:0')
episode: 190 training return: tensor(-201.6379, device='cuda:0')
episode: 191 training return: tensor(335.7906, device='cuda:0')
epoch: 48 test_true_pfm: 2257.0445093234835 sim_pfm: -44.23153745701226
episode: 192 training return: tensor(236.3045, device='cuda:0')
episode: 193 training return: tensor(-1.0177, device='cuda:0')
episode: 194 training return: tensor(302.4703, device='cuda:0')
episode: 195 training return: tensor(-65.6658, device='cuda:0')
epoch: 49 test_true_pfm: 2191.2102391932704 sim_pfm: 352.81623740346794
episode: 196 training return: tensor(340.7255, device='cuda:0')
episode: 197 training return: tensor(-309.4237, device='cuda:0')
episode: 198 training return: tensor(353.1666, device='cuda:0')
episode: 199 training return: tensor(-229.9303, device='cuda:0')
epoch: 50 test_true_pfm: 2506.4980439894116 sim_pfm: 350.41920418377657
episode: 200 training return: tensor(-222.7580, device='cuda:0')
episode: 201 training return: tensor(10.5809, device='cuda:0')
episode: 202 training return: tensor(355.1555, device='cuda:0')
episode: 203 training return: tensor(351.8622, device='cuda:0')
epoch: 51 test_true_pfm: 2367.823453734303 sim_pfm: 220.12287414314537
episode: 204 training return: tensor(-28.4114, device='cuda:0')
episode: 205 training return: tensor(-30.4788, device='cuda:0')
episode: 206 training return: tensor(36.7727, device='cuda:0')
episode: 207 training return: tensor(293.0321, device='cuda:0')
epoch: 52 test_true_pfm: 3268.0654469147635 sim_pfm: 306.8876495429625
episode: 208 training return: tensor(338.4784, device='cuda:0')
episode: 209 training return: tensor(61.2624, device='cuda:0')
episode: 210 training return: tensor(359.3020, device='cuda:0')
episode: 211 training return: tensor(-318.0892, device='cuda:0')
epoch: 53 test_true_pfm: 2800.286045157096 sim_pfm: 178.31216642222716
episode: 212 training return: tensor(346.6928, device='cuda:0')
episode: 213 training return: tensor(352.4651, device='cuda:0')
episode: 214 training return: tensor(285.8189, device='cuda:0')
episode: 215 training return: tensor(-316.8611, device='cuda:0')
epoch: 54 test_true_pfm: 2523.8365953352354 sim_pfm: 7.614683657263716
episode: 216 training return: tensor(-46.9992, device='cuda:0')
episode: 217 training return: tensor(-56.4376, device='cuda:0')
episode: 218 training return: tensor(-59.0355, device='cuda:0')
episode: 219 training return: tensor(119.9119, device='cuda:0')
epoch: 55 test_true_pfm: 3238.1096373054247 sim_pfm: 183.87391946810143
episode: 220 training return: tensor(-228.4053, device='cuda:0')
episode: 221 training return: tensor(-225.2872, device='cuda:0')
episode: 222 training return: tensor(-24.0505, device='cuda:0')
episode: 223 training return: tensor(-257.4695, device='cuda:0')
epoch: 56 test_true_pfm: 2230.264514255146 sim_pfm: 332.37426905706525
episode: 224 training return: tensor(-287.3463, device='cuda:0')
episode: 225 training return: tensor(352.3347, device='cuda:0')
episode: 226 training return: tensor(-217.4225, device='cuda:0')
episode: 227 training return: tensor(385.7201, device='cuda:0')
epoch: 57 test_true_pfm: 1837.1790842692742 sim_pfm: 215.9659866845856
episode: 228 training return: tensor(11.6049, device='cuda:0')
episode: 229 training return: tensor(-169.5271, device='cuda:0')
episode: 230 training return: tensor(352.5295, device='cuda:0')
episode: 231 training return: tensor(-201.8889, device='cuda:0')
epoch: 58 test_true_pfm: 2181.6609941653237 sim_pfm: 197.7732652234845
episode: 232 training return: tensor(348.7865, device='cuda:0')
episode: 233 training return: tensor(118.5813, device='cuda:0')
episode: 234 training return: tensor(336.5078, device='cuda:0')
episode: 235 training return: tensor(365.4291, device='cuda:0')
epoch: 59 test_true_pfm: 1720.8183112392628 sim_pfm: 166.62524324306287
episode: 236 training return: tensor(-281.9540, device='cuda:0')
episode: 237 training return: tensor(378.0282, device='cuda:0')
episode: 238 training return: tensor(349.2331, device='cuda:0')
episode: 239 training return: tensor(-231.9553, device='cuda:0')
epoch: 60 test_true_pfm: 2693.2803055456766 sim_pfm: 182.19696985618793
episode: 240 training return: tensor(88.2926, device='cuda:0')
episode: 241 training return: tensor(-187.5727, device='cuda:0')
episode: 242 training return: tensor(-248.1404, device='cuda:0')
episode: 243 training return: tensor(233.9475, device='cuda:0')
epoch: 61 test_true_pfm: 1766.2403348376629 sim_pfm: 167.62067290282963
episode: 244 training return: tensor(372.6898, device='cuda:0')
episode: 245 training return: tensor(192.1922, device='cuda:0')
episode: 246 training return: tensor(305.8301, device='cuda:0')
episode: 247 training return: tensor(-165.5793, device='cuda:0')
epoch: 62 test_true_pfm: 2809.1569246124054 sim_pfm: 180.3933731408324
episode: 248 training return: tensor(259.0684, device='cuda:0')
episode: 249 training return: tensor(324.9125, device='cuda:0')
episode: 250 training return: tensor(26.0652, device='cuda:0')
episode: 251 training return: tensor(393.8494, device='cuda:0')
epoch: 63 test_true_pfm: 2186.6522081846015 sim_pfm: 237.89064043395533
episode: 252 training return: tensor(355.8176, device='cuda:0')
episode: 253 training return: tensor(407.4718, device='cuda:0')
episode: 254 training return: tensor(26.0180, device='cuda:0')
episode: 255 training return: tensor(406.5363, device='cuda:0')
epoch: 64 test_true_pfm: 2197.8042203928844 sim_pfm: 269.57427808929543
episode: 256 training return: tensor(-252.6782, device='cuda:0')
episode: 257 training return: tensor(-64.5147, device='cuda:0')
episode: 258 training return: tensor(-205.4170, device='cuda:0')
episode: 259 training return: tensor(398.4818, device='cuda:0')
epoch: 65 test_true_pfm: 3030.933878181666 sim_pfm: -100.20302334136795
episode: 260 training return: tensor(363.4416, device='cuda:0')
episode: 261 training return: tensor(350.7274, device='cuda:0')
episode: 262 training return: tensor(13.7826, device='cuda:0')
episode: 263 training return: tensor(332.1657, device='cuda:0')
epoch: 66 test_true_pfm: 2756.3186305316685 sim_pfm: -114.77944232958059
episode: 264 training return: tensor(-161.8222, device='cuda:0')
episode: 265 training return: tensor(-50.4066, device='cuda:0')
episode: 266 training return: tensor(48.1891, device='cuda:0')
episode: 267 training return: tensor(16.1913, device='cuda:0')
epoch: 67 test_true_pfm: 3220.0593278936417 sim_pfm: -0.511033948782521
episode: 268 training return: tensor(210.9503, device='cuda:0')
episode: 269 training return: tensor(-277.9743, device='cuda:0')
episode: 270 training return: tensor(-38.5863, device='cuda:0')
episode: 271 training return: tensor(-158.8751, device='cuda:0')
epoch: 68 test_true_pfm: 2497.540480295295 sim_pfm: 118.9647924627546
episode: 272 training return: tensor(14.1150, device='cuda:0')
episode: 273 training return: tensor(190.7474, device='cuda:0')
episode: 274 training return: tensor(173.9312, device='cuda:0')
episode: 275 training return: tensor(11.6134, device='cuda:0')
epoch: 69 test_true_pfm: 3236.371954401368 sim_pfm: 317.4917138227417
episode: 276 training return: tensor(-58.8754, device='cuda:0')
episode: 277 training return: tensor(43.3517, device='cuda:0')
episode: 278 training return: tensor(351.9870, device='cuda:0')
episode: 279 training return: tensor(128.4355, device='cuda:0')
epoch: 70 test_true_pfm: 2740.86470328098 sim_pfm: 173.8939434234829
episode: 280 training return: tensor(65.6981, device='cuda:0')
episode: 281 training return: tensor(137.4383, device='cuda:0')
episode: 282 training return: tensor(363.3313, device='cuda:0')
episode: 283 training return: tensor(368.3535, device='cuda:0')
epoch: 71 test_true_pfm: 2357.4688730350413 sim_pfm: 59.921998216227315
episode: 284 training return: tensor(-161.9635, device='cuda:0')
episode: 285 training return: tensor(-79.3690, device='cuda:0')
episode: 286 training return: tensor(-342.8879, device='cuda:0')
episode: 287 training return: tensor(-109.2955, device='cuda:0')
epoch: 72 test_true_pfm: 2911.5987087681174 sim_pfm: -202.09221426358758
episode: 288 training return: tensor(-211.1503, device='cuda:0')
episode: 289 training return: tensor(-37.9350, device='cuda:0')
episode: 290 training return: tensor(353.3505, device='cuda:0')
episode: 291 training return: tensor(283.1094, device='cuda:0')
epoch: 73 test_true_pfm: 2987.156961434517 sim_pfm: 299.1308389476035
episode: 292 training return: tensor(-279.5849, device='cuda:0')
episode: 293 training return: tensor(-107.3705, device='cuda:0')
episode: 294 training return: tensor(378.2908, device='cuda:0')
episode: 295 training return: tensor(264.8153, device='cuda:0')
epoch: 74 test_true_pfm: 2500.331056685682 sim_pfm: -73.83424025242373
episode: 296 training return: tensor(-311.3604, device='cuda:0')
episode: 297 training return: tensor(-195.7060, device='cuda:0')
episode: 298 training return: tensor(-83.9263, device='cuda:0')
episode: 299 training return: tensor(-95.0872, device='cuda:0')
epoch: 75 test_true_pfm: 2252.806909363411 sim_pfm: -82.65745852266748
episode: 300 training return: tensor(342.8119, device='cuda:0')
episode: 301 training return: tensor(-248.3017, device='cuda:0')
episode: 302 training return: tensor(158.3875, device='cuda:0')
episode: 303 training return: tensor(245.1775, device='cuda:0')
epoch: 76 test_true_pfm: 2106.489851017978 sim_pfm: 85.55636524940685
episode: 304 training return: tensor(-78.9493, device='cuda:0')
episode: 305 training return: tensor(192.7631, device='cuda:0')
episode: 306 training return: tensor(20.0149, device='cuda:0')
episode: 307 training return: tensor(348.7864, device='cuda:0')
epoch: 77 test_true_pfm: 2499.1024092070843 sim_pfm: 225.6991347731091
episode: 308 training return: tensor(368.1420, device='cuda:0')
episode: 309 training return: tensor(345.0335, device='cuda:0')
episode: 310 training return: tensor(307.8806, device='cuda:0')
episode: 311 training return: tensor(156.0580, device='cuda:0')
epoch: 78 test_true_pfm: 2653.7920128754663 sim_pfm: 195.02645752280173
episode: 312 training return: tensor(63.7886, device='cuda:0')
episode: 313 training return: tensor(281.5925, device='cuda:0')
episode: 314 training return: tensor(379.4914, device='cuda:0')
episode: 315 training return: tensor(320.6118, device='cuda:0')
epoch: 79 test_true_pfm: 2536.6744679923904 sim_pfm: -18.768696390325204
episode: 316 training return: tensor(-290.6882, device='cuda:0')
episode: 317 training return: tensor(-183.2905, device='cuda:0')
episode: 318 training return: tensor(44.4796, device='cuda:0')
episode: 319 training return: tensor(343.6889, device='cuda:0')
epoch: 80 test_true_pfm: 3250.388146051048 sim_pfm: -117.70197643719924
episode: 320 training return: tensor(33.0544, device='cuda:0')
episode: 321 training return: tensor(-252.3087, device='cuda:0')
episode: 322 training return: tensor(-65.9866, device='cuda:0')
episode: 323 training return: tensor(-189.1822, device='cuda:0')
epoch: 81 test_true_pfm: 3023.4911645246466 sim_pfm: 94.2830388682584
episode: 324 training return: tensor(32.2762, device='cuda:0')
episode: 325 training return: tensor(401.0120, device='cuda:0')
episode: 326 training return: tensor(-65.5725, device='cuda:0')
episode: 327 training return: tensor(-128.4609, device='cuda:0')
epoch: 82 test_true_pfm: 2770.2137511976357 sim_pfm: 190.26038921810687
episode: 328 training return: tensor(-158.1250, device='cuda:0')
episode: 329 training return: tensor(393.1660, device='cuda:0')
episode: 330 training return: tensor(382.1073, device='cuda:0')
episode: 331 training return: tensor(337.7322, device='cuda:0')
epoch: 83 test_true_pfm: 2768.130619335578 sim_pfm: -8.647974272976475
episode: 332 training return: tensor(-212.8011, device='cuda:0')
episode: 333 training return: tensor(-165.6669, device='cuda:0')
episode: 334 training return: tensor(370.9586, device='cuda:0')
episode: 335 training return: tensor(38.2383, device='cuda:0')
epoch: 84 test_true_pfm: 2932.519703429109 sim_pfm: -80.22533008401903
episode: 336 training return: tensor(364.9615, device='cuda:0')
episode: 337 training return: tensor(411.0122, device='cuda:0')
episode: 338 training return: tensor(-14.2024, device='cuda:0')
episode: 339 training return: tensor(-232.4463, device='cuda:0')
epoch: 85 test_true_pfm: 3057.0051042597097 sim_pfm: 118.26505902867454
episode: 340 training return: tensor(19.3661, device='cuda:0')
episode: 341 training return: tensor(379.2079, device='cuda:0')
episode: 342 training return: tensor(-313.9442, device='cuda:0')
episode: 343 training return: tensor(334.0466, device='cuda:0')
epoch: 86 test_true_pfm: 2439.6195397638626 sim_pfm: 306.399083631152
episode: 344 training return: tensor(20.4514, device='cuda:0')
episode: 345 training return: tensor(96.7667, device='cuda:0')
episode: 346 training return: tensor(384.4559, device='cuda:0')
episode: 347 training return: tensor(-110.0091, device='cuda:0')
epoch: 87 test_true_pfm: 2723.5167200081087 sim_pfm: 162.55349240847863
episode: 348 training return: tensor(53.1030, device='cuda:0')
episode: 349 training return: tensor(-250.7694, device='cuda:0')
episode: 350 training return: tensor(226.4064, device='cuda:0')
episode: 351 training return: tensor(104.9243, device='cuda:0')
epoch: 88 test_true_pfm: 2206.574898489193 sim_pfm: -87.31255888524659
episode: 352 training return: tensor(-197.5774, device='cuda:0')
episode: 353 training return: tensor(36.8534, device='cuda:0')
episode: 354 training return: tensor(-75.3498, device='cuda:0')
episode: 355 training return: tensor(-210.9185, device='cuda:0')
epoch: 89 test_true_pfm: 2080.6733969805505 sim_pfm: 211.92299670386515
episode: 356 training return: tensor(394.5705, device='cuda:0')
episode: 357 training return: tensor(163.1335, device='cuda:0')
episode: 358 training return: tensor(103.5580, device='cuda:0')
episode: 359 training return: tensor(-60.8909, device='cuda:0')
epoch: 90 test_true_pfm: 2249.4657175989796 sim_pfm: -13.329600429152682
episode: 360 training return: tensor(-218.4045, device='cuda:0')
episode: 361 training return: tensor(-240.5302, device='cuda:0')
episode: 362 training return: tensor(362.7377, device='cuda:0')
episode: 363 training return: tensor(-168.4907, device='cuda:0')
epoch: 91 test_true_pfm: 2192.33099146686 sim_pfm: -218.93690130875135
episode: 364 training return: tensor(298.9900, device='cuda:0')
episode: 365 training return: tensor(31.2448, device='cuda:0')
episode: 366 training return: tensor(-285.2245, device='cuda:0')
episode: 367 training return: tensor(358.1960, device='cuda:0')
epoch: 92 test_true_pfm: 2353.1248199502556 sim_pfm: 316.5545534437212
episode: 368 training return: tensor(-37.2012, device='cuda:0')
episode: 369 training return: tensor(-100.7799, device='cuda:0')
episode: 370 training return: tensor(-240.7658, device='cuda:0')
episode: 371 training return: tensor(390.9295, device='cuda:0')
epoch: 93 test_true_pfm: 2726.5314214830705 sim_pfm: 203.47962928031725
episode: 372 training return: tensor(353.1280, device='cuda:0')
episode: 373 training return: tensor(349.5396, device='cuda:0')
episode: 374 training return: tensor(-1.3807, device='cuda:0')
episode: 375 training return: tensor(385.2540, device='cuda:0')
epoch: 94 test_true_pfm: 2583.4858228685193 sim_pfm: 67.5926829323483
episode: 376 training return: tensor(-173.2561, device='cuda:0')
episode: 377 training return: tensor(229.8018, device='cuda:0')
episode: 378 training return: tensor(360.0763, device='cuda:0')
episode: 379 training return: tensor(347.9223, device='cuda:0')
epoch: 95 test_true_pfm: 2437.0304998157 sim_pfm: 95.59550161194056
episode: 380 training return: tensor(-169.9535, device='cuda:0')
episode: 381 training return: tensor(-61.9907, device='cuda:0')
episode: 382 training return: tensor(364.0554, device='cuda:0')
episode: 383 training return: tensor(398.7338, device='cuda:0')
epoch: 96 test_true_pfm: 2871.6346982907507 sim_pfm: 279.4998176655111
episode: 384 training return: tensor(-310.3920, device='cuda:0')
episode: 385 training return: tensor(379.5452, device='cuda:0')
episode: 386 training return: tensor(15.5129, device='cuda:0')
episode: 387 training return: tensor(34.1281, device='cuda:0')
epoch: 97 test_true_pfm: 2564.745636082647 sim_pfm: 20.394675441474345
episode: 388 training return: tensor(347.1814, device='cuda:0')
episode: 389 training return: tensor(369.8757, device='cuda:0')
episode: 390 training return: tensor(-244.9490, device='cuda:0')
episode: 391 training return: tensor(232.0096, device='cuda:0')
epoch: 98 test_true_pfm: 2542.7387742092797 sim_pfm: -3.666422530387839
episode: 392 training return: tensor(-308.1833, device='cuda:0')
episode: 393 training return: tensor(370.4594, device='cuda:0')
episode: 394 training return: tensor(-172.0136, device='cuda:0')
episode: 395 training return: tensor(101.0181, device='cuda:0')
epoch: 99 test_true_pfm: 1732.1664351755644 sim_pfm: 142.8602566740786
episode: 396 training return: tensor(-253.0187, device='cuda:0')
episode: 397 training return: tensor(-81.9597, device='cuda:0')
episode: 398 training return: tensor(-203.4772, device='cuda:0')
episode: 399 training return: tensor(-75.0792, device='cuda:0')
epoch: 100 test_true_pfm: 2319.670605222538 sim_pfm: 182.36148157172525
episode: 400 training return: tensor(-229.7741, device='cuda:0')
episode: 401 training return: tensor(-173.2719, device='cuda:0')
episode: 402 training return: tensor(352.7967, device='cuda:0')
episode: 403 training return: tensor(-109.8267, device='cuda:0')
epoch: 101 test_true_pfm: 3059.400818692033 sim_pfm: -124.63692329150702
episode: 404 training return: tensor(321.2055, device='cuda:0')
episode: 405 training return: tensor(346.5100, device='cuda:0')
episode: 406 training return: tensor(348.1267, device='cuda:0')
episode: 407 training return: tensor(351.0764, device='cuda:0')
epoch: 102 test_true_pfm: 2628.3889025558187 sim_pfm: 102.38657716681094
episode: 408 training return: tensor(348.1530, device='cuda:0')
episode: 409 training return: tensor(128.1165, device='cuda:0')
episode: 410 training return: tensor(-91.7343, device='cuda:0')
episode: 411 training return: tensor(118.7049, device='cuda:0')
epoch: 103 test_true_pfm: 2729.678697495217 sim_pfm: -16.853879232968513
episode: 412 training return: tensor(34.4914, device='cuda:0')
episode: 413 training return: tensor(-56.7985, device='cuda:0')
episode: 414 training return: tensor(350.4944, device='cuda:0')
episode: 415 training return: tensor(311.7120, device='cuda:0')
epoch: 104 test_true_pfm: 2706.0574146271297 sim_pfm: 158.5271078775016
episode: 416 training return: tensor(242.2064, device='cuda:0')
episode: 417 training return: tensor(378.6363, device='cuda:0')
episode: 418 training return: tensor(-216.2169, device='cuda:0')
episode: 419 training return: tensor(-3.6612, device='cuda:0')
epoch: 105 test_true_pfm: 1897.4587538155572 sim_pfm: 175.72476428064206
episode: 420 training return: tensor(387.6740, device='cuda:0')
episode: 421 training return: tensor(373.1664, device='cuda:0')
episode: 422 training return: tensor(-214.4575, device='cuda:0')
episode: 423 training return: tensor(-211.3831, device='cuda:0')
epoch: 106 test_true_pfm: 2841.9511957594186 sim_pfm: -36.39200527174398
episode: 424 training return: tensor(346.1689, device='cuda:0')
episode: 425 training return: tensor(-28.7386, device='cuda:0')
episode: 426 training return: tensor(-133.7730, device='cuda:0')
episode: 427 training return: tensor(-179.8324, device='cuda:0')
epoch: 107 test_true_pfm: 2597.28149067608 sim_pfm: 99.76520986296237
episode: 428 training return: tensor(251.8223, device='cuda:0')
episode: 429 training return: tensor(341.4218, device='cuda:0')
episode: 430 training return: tensor(-61.3589, device='cuda:0')
episode: 431 training return: tensor(107.3639, device='cuda:0')
epoch: 108 test_true_pfm: 1871.8013376534443 sim_pfm: 306.820106034788
episode: 432 training return: tensor(-281.7946, device='cuda:0')
episode: 433 training return: tensor(-307.7958, device='cuda:0')
episode: 434 training return: tensor(275.4669, device='cuda:0')
episode: 435 training return: tensor(-91.0696, device='cuda:0')
epoch: 109 test_true_pfm: 3111.9977612663292 sim_pfm: 246.54236237875497
episode: 436 training return: tensor(334.5053, device='cuda:0')
episode: 437 training return: tensor(-58.5179, device='cuda:0')
episode: 438 training return: tensor(349.1958, device='cuda:0')
episode: 439 training return: tensor(-63.7547, device='cuda:0')
epoch: 110 test_true_pfm: 2605.5749175408223 sim_pfm: 81.96282139599013
episode: 440 training return: tensor(-48.3285, device='cuda:0')
episode: 441 training return: tensor(334.0987, device='cuda:0')
episode: 442 training return: tensor(-44.7635, device='cuda:0')
episode: 443 training return: tensor(326.9532, device='cuda:0')
epoch: 111 test_true_pfm: 2453.0212168697217 sim_pfm: 72.00316780844393
episode: 444 training return: tensor(361.1989, device='cuda:0')
episode: 445 training return: tensor(116.4636, device='cuda:0')
episode: 446 training return: tensor(36.9654, device='cuda:0')
episode: 447 training return: tensor(16.4888, device='cuda:0')
epoch: 112 test_true_pfm: 2381.990476106366 sim_pfm: -4.418930169971039
episode: 448 training return: tensor(383.1350, device='cuda:0')
episode: 449 training return: tensor(-171.6215, device='cuda:0')
episode: 450 training return: tensor(351.0768, device='cuda:0')
episode: 451 training return: tensor(208.6902, device='cuda:0')
epoch: 113 test_true_pfm: 2648.4049798953138 sim_pfm: -97.53192434917825
episode: 452 training return: tensor(-207.4023, device='cuda:0')
episode: 453 training return: tensor(172.5190, device='cuda:0')
episode: 454 training return: tensor(383.8574, device='cuda:0')
episode: 455 training return: tensor(-195.3294, device='cuda:0')
epoch: 114 test_true_pfm: 2264.504727356201 sim_pfm: 103.09649388554196
episode: 456 training return: tensor(160.6735, device='cuda:0')
episode: 457 training return: tensor(-8.6630, device='cuda:0')
episode: 458 training return: tensor(-291.1673, device='cuda:0')
episode: 459 training return: tensor(26.5189, device='cuda:0')
epoch: 115 test_true_pfm: 2278.436540464505 sim_pfm: 194.18555929884315
episode: 460 training return: tensor(28.5308, device='cuda:0')
episode: 461 training return: tensor(-49.6147, device='cuda:0')
episode: 462 training return: tensor(354.9505, device='cuda:0')
episode: 463 training return: tensor(354.8073, device='cuda:0')
epoch: 116 test_true_pfm: 3032.2515839754565 sim_pfm: -2.5069468055153266
episode: 464 training return: tensor(348.1902, device='cuda:0')
episode: 465 training return: tensor(-246.9487, device='cuda:0')
episode: 466 training return: tensor(-293.4262, device='cuda:0')
episode: 467 training return: tensor(352.7323, device='cuda:0')
epoch: 117 test_true_pfm: 2289.8768812232606 sim_pfm: 388.2306248679233
episode: 468 training return: tensor(373.1185, device='cuda:0')
episode: 469 training return: tensor(-219.7616, device='cuda:0')
episode: 470 training return: tensor(210.2356, device='cuda:0')
episode: 471 training return: tensor(310.9935, device='cuda:0')
epoch: 118 test_true_pfm: 3255.3736823528166 sim_pfm: 115.59153148853996
episode: 472 training return: tensor(-249.4874, device='cuda:0')
episode: 473 training return: tensor(408.1645, device='cuda:0')
episode: 474 training return: tensor(-151.2332, device='cuda:0')
episode: 475 training return: tensor(-23.1965, device='cuda:0')
epoch: 119 test_true_pfm: 2354.8481863484362 sim_pfm: 303.5304171158544
episode: 476 training return: tensor(-185.0697, device='cuda:0')
episode: 477 training return: tensor(-173.6505, device='cuda:0')
episode: 478 training return: tensor(361.4513, device='cuda:0')
episode: 479 training return: tensor(-276.8617, device='cuda:0')
epoch: 120 test_true_pfm: 2015.9737786746985 sim_pfm: 234.31781652010977
episode: 480 training return: tensor(371.9157, device='cuda:0')
episode: 481 training return: tensor(-176.0031, device='cuda:0')
episode: 482 training return: tensor(-106.3074, device='cuda:0')
episode: 483 training return: tensor(327.5526, device='cuda:0')
epoch: 121 test_true_pfm: 2261.286689650462 sim_pfm: 94.48037216207013
episode: 484 training return: tensor(-135.8720, device='cuda:0')
episode: 485 training return: tensor(-191.7144, device='cuda:0')
episode: 486 training return: tensor(261.6872, device='cuda:0')
episode: 487 training return: tensor(49.6904, device='cuda:0')
epoch: 122 test_true_pfm: 2042.791931098897 sim_pfm: 211.07816735794768
episode: 488 training return: tensor(-229.6907, device='cuda:0')
episode: 489 training return: tensor(-165.6689, device='cuda:0')
episode: 490 training return: tensor(-216.9994, device='cuda:0')
episode: 491 training return: tensor(131.3418, device='cuda:0')
epoch: 123 test_true_pfm: 2616.5517754805255 sim_pfm: -20.438304895418696
episode: 492 training return: tensor(-288.4366, device='cuda:0')
episode: 493 training return: tensor(115.4305, device='cuda:0')
episode: 494 training return: tensor(126.0692, device='cuda:0')
episode: 495 training return: tensor(81.8127, device='cuda:0')
epoch: 124 test_true_pfm: 2265.6908600551697 sim_pfm: 79.63976815707672
episode: 496 training return: tensor(343.8081, device='cuda:0')
episode: 497 training return: tensor(348.6136, device='cuda:0')
episode: 498 training return: tensor(25.6614, device='cuda:0')
episode: 499 training return: tensor(333.2960, device='cuda:0')
epoch: 125 test_true_pfm: 2551.0212216785844 sim_pfm: -98.31224374426529
episode: 500 training return: tensor(-37.3713, device='cuda:0')
episode: 501 training return: tensor(340.1206, device='cuda:0')
episode: 502 training return: tensor(316.6750, device='cuda:0')
episode: 503 training return: tensor(400.2219, device='cuda:0')
epoch: 126 test_true_pfm: 2424.0556171839703 sim_pfm: 200.72864832775667
episode: 504 training return: tensor(412.1034, device='cuda:0')
episode: 505 training return: tensor(312.2208, device='cuda:0')
episode: 506 training return: tensor(-239.0825, device='cuda:0')
episode: 507 training return: tensor(128.3392, device='cuda:0')
epoch: 127 test_true_pfm: 2610.541823822819 sim_pfm: 231.86581095928946
episode: 508 training return: tensor(350.2524, device='cuda:0')
episode: 509 training return: tensor(344.9676, device='cuda:0')
episode: 510 training return: tensor(-141.7944, device='cuda:0')
episode: 511 training return: tensor(134.1874, device='cuda:0')
epoch: 128 test_true_pfm: 2113.9172786913996 sim_pfm: 272.86970846067805
episode: 512 training return: tensor(415.2095, device='cuda:0')
episode: 513 training return: tensor(387.3074, device='cuda:0')
episode: 514 training return: tensor(133.8097, device='cuda:0')
episode: 515 training return: tensor(371.8716, device='cuda:0')
epoch: 129 test_true_pfm: 2014.330275882349 sim_pfm: 256.66279702175717
episode: 516 training return: tensor(201.2937, device='cuda:0')
episode: 517 training return: tensor(-44.7437, device='cuda:0')
episode: 518 training return: tensor(37.0378, device='cuda:0')
episode: 519 training return: tensor(341.6041, device='cuda:0')
epoch: 130 test_true_pfm: 2469.3544187421826 sim_pfm: 286.79011471608345
episode: 520 training return: tensor(-124.2754, device='cuda:0')
episode: 521 training return: tensor(-180.3695, device='cuda:0')
episode: 522 training return: tensor(103.7775, device='cuda:0')
episode: 523 training return: tensor(-170.3102, device='cuda:0')
epoch: 131 test_true_pfm: 2575.129220294384 sim_pfm: 80.76735088089481
episode: 524 training return: tensor(-72.9323, device='cuda:0')
episode: 525 training return: tensor(14.5543, device='cuda:0')
episode: 526 training return: tensor(345.7507, device='cuda:0')
episode: 527 training return: tensor(213.2975, device='cuda:0')
epoch: 132 test_true_pfm: 2370.299779415866 sim_pfm: 57.45973170129582
episode: 528 training return: tensor(304.4451, device='cuda:0')
episode: 529 training return: tensor(337.9323, device='cuda:0')
episode: 530 training return: tensor(-6.4329, device='cuda:0')
episode: 531 training return: tensor(374.5573, device='cuda:0')
epoch: 133 test_true_pfm: 2828.94051339534 sim_pfm: -84.98209425667301
episode: 532 training return: tensor(348.4424, device='cuda:0')
episode: 533 training return: tensor(-164.4401, device='cuda:0')
episode: 534 training return: tensor(1.7418, device='cuda:0')
episode: 535 training return: tensor(-301.1856, device='cuda:0')
epoch: 134 test_true_pfm: 1971.2248597790683 sim_pfm: 184.12957906536758
episode: 536 training return: tensor(-0.4632, device='cuda:0')
episode: 537 training return: tensor(339.2456, device='cuda:0')
episode: 538 training return: tensor(308.2487, device='cuda:0')
episode: 539 training return: tensor(-62.4766, device='cuda:0')
epoch: 135 test_true_pfm: 2748.951883913305 sim_pfm: 133.07903306692606
episode: 540 training return: tensor(364.1925, device='cuda:0')
episode: 541 training return: tensor(83.2841, device='cuda:0')
episode: 542 training return: tensor(108.0010, device='cuda:0')
episode: 543 training return: tensor(69.5768, device='cuda:0')
epoch: 136 test_true_pfm: 2240.7713928763346 sim_pfm: 85.12985688028857
episode: 544 training return: tensor(352.1037, device='cuda:0')
episode: 545 training return: tensor(387.4446, device='cuda:0')
episode: 546 training return: tensor(14.9363, device='cuda:0')
episode: 547 training return: tensor(-179.8455, device='cuda:0')
epoch: 137 test_true_pfm: 3120.1905779365557 sim_pfm: -106.54004868647705
episode: 548 training return: tensor(312.9952, device='cuda:0')
episode: 549 training return: tensor(362.4105, device='cuda:0')
episode: 550 training return: tensor(-82.7000, device='cuda:0')
episode: 551 training return: tensor(-136.3622, device='cuda:0')
epoch: 138 test_true_pfm: 2894.5269705985297 sim_pfm: 72.48781414326125
episode: 552 training return: tensor(117.8969, device='cuda:0')
episode: 553 training return: tensor(24.2937, device='cuda:0')
episode: 554 training return: tensor(117.2150, device='cuda:0')
episode: 555 training return: tensor(344.2510, device='cuda:0')
epoch: 139 test_true_pfm: 2865.0745798821986 sim_pfm: 322.35742818020907
episode: 556 training return: tensor(-14.1698, device='cuda:0')
episode: 557 training return: tensor(-86.5238, device='cuda:0')
episode: 558 training return: tensor(143.4401, device='cuda:0')
episode: 559 training return: tensor(364.0931, device='cuda:0')
epoch: 140 test_true_pfm: 2458.4586936899163 sim_pfm: 192.34773651952855
episode: 560 training return: tensor(224.6105, device='cuda:0')
episode: 561 training return: tensor(232.2917, device='cuda:0')
episode: 562 training return: tensor(-78.9725, device='cuda:0')
episode: 563 training return: tensor(221.5538, device='cuda:0')
epoch: 141 test_true_pfm: 2532.81332737115 sim_pfm: 11.30530580188497
episode: 564 training return: tensor(331.6870, device='cuda:0')
episode: 565 training return: tensor(253.3374, device='cuda:0')
episode: 566 training return: tensor(-58.1876, device='cuda:0')
episode: 567 training return: tensor(-308.8028, device='cuda:0')
epoch: 142 test_true_pfm: 2137.2643590590305 sim_pfm: 189.0718175782822
episode: 568 training return: tensor(345.4168, device='cuda:0')
episode: 569 training return: tensor(-201.9419, device='cuda:0')
episode: 570 training return: tensor(298.3943, device='cuda:0')
episode: 571 training return: tensor(138.5792, device='cuda:0')
epoch: 143 test_true_pfm: 3316.2238636954644 sim_pfm: 134.44055397599004
episode: 572 training return: tensor(-90.8846, device='cuda:0')
episode: 573 training return: tensor(369.9709, device='cuda:0')
episode: 574 training return: tensor(-186.4856, device='cuda:0')
episode: 575 training return: tensor(-227.9439, device='cuda:0')
epoch: 144 test_true_pfm: 2937.08807106223 sim_pfm: 237.80548023341302
episode: 576 training return: tensor(208.1046, device='cuda:0')
episode: 577 training return: tensor(-281.9168, device='cuda:0')
episode: 578 training return: tensor(-214.6850, device='cuda:0')
episode: 579 training return: tensor(193.4786, device='cuda:0')
epoch: 145 test_true_pfm: 2727.548806524044 sim_pfm: 335.16124739606556
episode: 580 training return: tensor(-298.0202, device='cuda:0')
episode: 581 training return: tensor(146.4643, device='cuda:0')
episode: 582 training return: tensor(367.5658, device='cuda:0')
episode: 583 training return: tensor(21.1429, device='cuda:0')
epoch: 146 test_true_pfm: 3232.090622764564 sim_pfm: -167.0643576142223
episode: 584 training return: tensor(-261.3495, device='cuda:0')
episode: 585 training return: tensor(303.7183, device='cuda:0')
episode: 586 training return: tensor(63.2981, device='cuda:0')
episode: 587 training return: tensor(175.2563, device='cuda:0')
epoch: 147 test_true_pfm: 2911.7320688093373 sim_pfm: 171.45483606479442
episode: 588 training return: tensor(360.6118, device='cuda:0')
episode: 589 training return: tensor(365.0604, device='cuda:0')
episode: 590 training return: tensor(356.0943, device='cuda:0')
episode: 591 training return: tensor(-198.5573, device='cuda:0')
epoch: 148 test_true_pfm: 2429.263041382903 sim_pfm: 115.84146922578414
episode: 592 training return: tensor(-4.9780, device='cuda:0')
episode: 593 training return: tensor(-252.3731, device='cuda:0')
episode: 594 training return: tensor(-158.8590, device='cuda:0')
episode: 595 training return: tensor(125.8498, device='cuda:0')
epoch: 149 test_true_pfm: 2826.4477758915436 sim_pfm: 116.27001084239843
episode: 596 training return: tensor(60.4395, device='cuda:0')
episode: 597 training return: tensor(-26.1852, device='cuda:0')
episode: 598 training return: tensor(356.5617, device='cuda:0')
episode: 599 training return: tensor(36.2362, device='cuda:0')
epoch: 150 test_true_pfm: 2243.289587870155 sim_pfm: -114.80584520140353
