['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'behavior', '--traj', 'mixed', '--seed', '4', '--data', '100000']
epoch: 0 training_loss 0.20987099885940552 test_loss: 0.14088551998138427
epoch: 1 training_loss 0.14279180966317653 test_loss: 0.14545085430145263
epoch: 2 training_loss 0.12828243300318717 test_loss: 0.11159170866012573
epoch: 3 training_loss 0.11982645023614168 test_loss: 0.1239043116569519
epoch: 4 training_loss 0.11505248595029116 test_loss: 0.13104206323623657
epoch: 5 training_loss 0.11939523180946708 test_loss: 0.12169355154037476
epoch: 6 training_loss 0.12034990269690753 test_loss: 0.09928138852119446
epoch: 7 training_loss 0.1074037554487586 test_loss: 0.11118966341018677
epoch: 8 training_loss 0.10799826078116893 test_loss: 0.15002182722091675
epoch: 9 training_loss 0.11663405578583479 test_loss: 0.120250403881073
epoch: 10 training_loss 0.10959885321557522 test_loss: 0.10099124908447266
epoch: 11 training_loss 0.11957026455551385 test_loss: 0.08749614953994751
epoch: 12 training_loss 0.10836060013622045 test_loss: 0.10115129947662353
epoch: 13 training_loss 0.10738661294803023 test_loss: 0.10068235397338868
epoch: 14 training_loss 0.11190710231661796 test_loss: 0.097113698720932
epoch: 15 training_loss 0.10934737987816334 test_loss: 0.10236626863479614
epoch: 16 training_loss 0.11342341259121895 test_loss: 0.1155556321144104
epoch: 17 training_loss 0.10590626705437899 test_loss: 0.11069773435592652
epoch: 18 training_loss 0.10505802169442177 test_loss: 0.10441166162490845
epoch: 19 training_loss 0.11248288005590439 test_loss: 0.11342415809631348
epoch: 20 training_loss 0.1046330158226192 test_loss: 0.10136880874633789
epoch: 21 training_loss 0.10567025195807218 test_loss: 0.10633665323257446
epoch: 22 training_loss 0.10907173527404666 test_loss: 0.10897022485733032
epoch: 23 training_loss 0.1090872272849083 test_loss: 0.09929967522621155
epoch: 24 training_loss 0.10430219516158104 test_loss: 0.10146596431732177
epoch: 25 training_loss 0.10436188770458102 test_loss: 0.10307700634002685
epoch: 26 training_loss 0.10268007840961219 test_loss: 0.0961988091468811
epoch: 27 training_loss 0.1025287889689207 test_loss: 0.11704034805297851
epoch: 28 training_loss 0.10202219121158124 test_loss: 0.11377780437469483
epoch: 29 training_loss 0.10409041883423925 test_loss: 0.09990218877792359
epoch: 30 training_loss 0.10694377977401018 test_loss: 0.10414386987686157
epoch: 31 training_loss 0.10936484687030315 test_loss: 0.10848897695541382
epoch: 32 training_loss 0.10769788820296526 test_loss: 0.09766483306884766
epoch: 33 training_loss 0.10482877768576145 test_loss: 0.11239349842071533
epoch: 34 training_loss 0.09713784545660019 test_loss: 0.0877634584903717
epoch: 35 training_loss 0.10199124231934548 test_loss: 0.10209052562713623
epoch: 36 training_loss 0.09903812957927585 test_loss: 0.09372973442077637
epoch: 37 training_loss 0.09992131050676108 test_loss: 0.12448680400848389
epoch: 38 training_loss 0.10543965876102447 test_loss: 0.11593968868255615
epoch: 39 training_loss 0.09817236807197333 test_loss: 0.11494388580322265
epoch: 40 training_loss 0.1059842138364911 test_loss: 0.09345545172691345
epoch: 41 training_loss 0.10088019911199808 test_loss: 0.09084582328796387
epoch: 42 training_loss 0.10232758332043886 test_loss: 0.10754252672195434
epoch: 43 training_loss 0.10092929843813181 test_loss: 0.10387320518493652
epoch: 44 training_loss 0.10431388355791568 test_loss: 0.1156660795211792
epoch: 45 training_loss 0.09788744008168578 test_loss: 0.11258420944213868
epoch: 46 training_loss 0.10333880860358477 test_loss: 0.09561832547187805
epoch: 47 training_loss 0.10583818331360817 test_loss: 0.10594819784164429
epoch: 48 training_loss 0.09758831860497594 test_loss: 0.10393811464309692
epoch: 49 training_loss 0.09944709789007902 test_loss: 0.0950460433959961
epoch: 50 training_loss 0.10028005134314298 test_loss: 0.09606256484985351
epoch: 51 training_loss 0.10075269024819136 test_loss: 0.09612455368041992
epoch: 52 training_loss 0.10166007097810507 test_loss: 0.10425289869308471
epoch: 53 training_loss 0.10304805128835141 test_loss: 0.10801407098770141
epoch: 54 training_loss 0.10274362236261368 test_loss: 0.10470821857452392
epoch: 55 training_loss 0.09854361809790134 test_loss: 0.10577272176742554
epoch: 56 training_loss 0.09204243041574955 test_loss: 0.10002795457839966
epoch: 57 training_loss 0.10114876065403224 test_loss: 0.11106644868850708
epoch: 58 training_loss 0.1034626019001007 test_loss: 0.10159225463867187
epoch: 59 training_loss 0.10115499921143055 test_loss: 0.10350464582443238
epoch: 60 training_loss 0.10432023104280233 test_loss: 0.09949318170547486
epoch: 61 training_loss 0.10239357978105545 test_loss: 0.09430590271949768
epoch: 62 training_loss 0.1013893068023026 test_loss: 0.10455896854400634
epoch: 63 training_loss 0.09930684205144644 test_loss: 0.10850430727005005
epoch: 64 training_loss 0.10007146324962378 test_loss: 0.1170122742652893
epoch: 65 training_loss 0.10037889119237661 test_loss: 0.09904932975769043
epoch: 66 training_loss 0.10271287564188242 test_loss: 0.11374194622039795
epoch: 67 training_loss 0.09910996332764625 test_loss: 0.09632567763328552
epoch: 68 training_loss 0.10271078582853078 test_loss: 0.09993501901626586
epoch: 69 training_loss 0.09924279423430563 test_loss: 0.09941051602363586
epoch: 70 training_loss 0.10300896624103188 test_loss: 0.09840302467346192
epoch: 71 training_loss 0.09955126341432333 test_loss: 0.09771180152893066
epoch: 72 training_loss 0.10281114853918552 test_loss: 0.12140990495681762
epoch: 73 training_loss 0.10188797049224377 test_loss: 0.09980654120445251
epoch: 74 training_loss 0.10020756974816322 test_loss: 0.0907802164554596
epoch: 75 training_loss 0.09698720451444387 test_loss: 0.1008332371711731
epoch: 76 training_loss 0.09944825686514377 test_loss: 0.09872648119926453
epoch: 77 training_loss 0.10053508162498474 test_loss: 0.10380078554153442
epoch: 78 training_loss 0.10107157498598099 test_loss: 0.11195884943008423
epoch: 79 training_loss 0.10617995634675026 test_loss: 0.11424707174301148
epoch: 80 training_loss 0.09780829855706542 test_loss: 0.09975450038909912
epoch: 81 training_loss 0.09721851527690888 test_loss: 0.09468426108360291
epoch: 82 training_loss 0.09592282574623823 test_loss: 0.08550328612327576
epoch: 83 training_loss 0.09798311483114958 test_loss: 0.11152607202529907
epoch: 84 training_loss 0.1043373054638505 test_loss: 0.10386935472488404
epoch: 85 training_loss 0.0986676912009716 test_loss: 0.09927175641059875
epoch: 86 training_loss 0.10329629052430392 test_loss: 0.1096561074256897
epoch: 87 training_loss 0.09527780290693044 test_loss: 0.10123687982559204
epoch: 88 training_loss 0.09613159386441111 test_loss: 0.10008234977722168
epoch: 89 training_loss 0.09213721286505461 test_loss: 0.12596319913864135
epoch: 90 training_loss 0.09794727643951774 test_loss: 0.10061075687408447
epoch: 91 training_loss 0.10493846770375967 test_loss: 0.09840384125709534
epoch: 92 training_loss 0.10593074539676309 test_loss: 0.1076218843460083
epoch: 93 training_loss 0.09826066803187132 test_loss: 0.0968355119228363
epoch: 94 training_loss 0.10703772306442261 test_loss: 0.11506268978118897
epoch: 95 training_loss 0.10185171980410815 test_loss: 0.09121934175491334
epoch: 96 training_loss 0.10444006076082588 test_loss: 0.11392487287521362
epoch: 97 training_loss 0.09553436730057001 test_loss: 0.1016284704208374
epoch: 98 training_loss 0.09659323323518038 test_loss: 0.09481584429740905
epoch: 99 training_loss 0.09496835401281714 test_loss: 0.08668928742408752
epoch: 100 training_loss 0.09413822557777167 test_loss: 0.08816640377044678
epoch: 101 training_loss 0.09799119466915726 test_loss: 0.09769579172134399
epoch: 102 training_loss 0.1036123539879918 test_loss: 0.09520696997642517
epoch: 103 training_loss 0.09753997951745987 test_loss: 0.10465027093887329
epoch: 104 training_loss 0.10020157875493169 test_loss: 0.10016295909881592
epoch: 105 training_loss 0.09723104942589998 test_loss: 0.08918290734291076
epoch: 106 training_loss 0.10232565339654684 test_loss: 0.10709129571914673
epoch: 107 training_loss 0.09973832491785288 test_loss: 0.10533057451248169
epoch: 108 training_loss 0.10163436979055404 test_loss: 0.09635509252548217
epoch: 109 training_loss 0.0941761293169111 test_loss: 0.09069697260856628
epoch: 110 training_loss 0.09636026490479707 test_loss: 0.10143872499465942
epoch: 111 training_loss 0.09523659080266952 test_loss: 0.09767040610313416
epoch: 112 training_loss 0.10081546545028687 test_loss: 0.08966917991638183
epoch: 113 training_loss 0.09484250174835324 test_loss: 0.09610403180122376
epoch: 114 training_loss 0.10426308162510395 test_loss: 0.09747377634048462
epoch: 115 training_loss 0.0999513242766261 test_loss: 0.09780691862106324
epoch: 116 training_loss 0.10033749788999557 test_loss: 0.11095033884048462
epoch: 117 training_loss 0.10082053199410439 test_loss: 0.09716134071350098
epoch: 118 training_loss 0.10523632243275642 test_loss: 0.09321793913841248
epoch: 119 training_loss 0.09521920409053564 test_loss: 0.08851388096809387
epoch: 120 training_loss 0.09904899757355451 test_loss: 0.1111193299293518
epoch: 121 training_loss 0.09811959153972566 test_loss: 0.09902776479721069
epoch: 122 training_loss 0.09615381501615047 test_loss: 0.09230048656463623
epoch: 123 training_loss 0.0988124230876565 test_loss: 0.1042366623878479
epoch: 124 training_loss 0.10454757198691368 test_loss: 0.10484305620193482
epoch: 125 training_loss 0.09688412599265575 test_loss: 0.10091328620910645
epoch: 126 training_loss 0.10458911515772343 test_loss: 0.10383546352386475
epoch: 127 training_loss 0.0993083818629384 test_loss: 0.09619956612586975
epoch: 128 training_loss 0.09332488620653749 test_loss: 0.11671326160430909
epoch: 129 training_loss 0.10035893093794584 test_loss: 0.099932461977005
epoch: 130 training_loss 0.0970865611359477 test_loss: 0.09030894637107849
epoch: 131 training_loss 0.09846657177899033 test_loss: 0.0977867603302002
epoch: 132 training_loss 0.09477965479716659 test_loss: 0.09418579936027527
epoch: 133 training_loss 0.09714728653430939 test_loss: 0.11025069952011109
epoch: 134 training_loss 0.100171068739146 test_loss: 0.09635878205299378
epoch: 135 training_loss 0.10056809157133102 test_loss: 0.09715186953544616
epoch: 136 training_loss 0.10018672896549105 test_loss: 0.09449589848518372
epoch: 137 training_loss 0.09634822411462665 test_loss: 0.10418876409530639
epoch: 138 training_loss 0.10436383459717036 test_loss: 0.1033168911933899
epoch: 139 training_loss 0.10005001375451684 test_loss: 0.10157477855682373
epoch: 140 training_loss 0.10631605036556721 test_loss: 0.09837871193885803
epoch: 141 training_loss 0.09719187326729298 test_loss: 0.10329548120498658
epoch: 142 training_loss 0.09830045791342855 test_loss: 0.10002492666244507
epoch: 143 training_loss 0.0954359532892704 test_loss: 0.10235029458999634
epoch: 144 training_loss 0.09713497128337621 test_loss: 0.10225534439086914
epoch: 145 training_loss 0.09897125773131847 test_loss: 0.09496766328811646
epoch: 146 training_loss 0.0990541385114193 test_loss: 0.09713045358657837
epoch: 147 training_loss 0.10236158769577741 test_loss: 0.11650732755661011
epoch: 148 training_loss 0.10176161142066122 test_loss: 0.10237537622451783
epoch: 149 training_loss 0.10355792947113514 test_loss: 0.09842156767845153
epoch: 0 training_loss 8.99393075466156 test_loss: 6.1500293731689455
epoch: 1 training_loss 5.306177949905395 test_loss: 4.540091705322266
epoch: 2 training_loss 4.154888496398926 test_loss: 3.8786808013916017
epoch: 3 training_loss 3.4420853281021118 test_loss: 3.3002357482910156
epoch: 4 training_loss 2.997104663848877 test_loss: 2.8099241256713867
epoch: 5 training_loss 2.7251153683662412 test_loss: 2.5843387603759767
epoch: 6 training_loss 2.427951533794403 test_loss: 2.4346704483032227
epoch: 7 training_loss 2.3048457610607147 test_loss: 2.233862113952637
epoch: 8 training_loss 2.182715768814087 test_loss: 2.1853464126586912
epoch: 9 training_loss 2.0844066417217255 test_loss: 1.9530622482299804
epoch: 10 training_loss 1.9782057058811189 test_loss: 1.919114112854004
epoch: 11 training_loss 1.8744707989692688 test_loss: 1.8324617385864257
epoch: 12 training_loss 1.8129512798786163 test_loss: 1.7414569854736328
epoch: 13 training_loss 1.8073228406906128 test_loss: 1.8166114807128906
epoch: 14 training_loss 1.7355467438697816 test_loss: 1.6391731262207032
epoch: 15 training_loss 1.6883220052719117 test_loss: 1.6277687072753906
epoch: 16 training_loss 1.67741401553154 test_loss: 1.6364105224609375
epoch: 17 training_loss 1.5963903427124024 test_loss: 1.5805967330932618
epoch: 18 training_loss 1.583580151796341 test_loss: 1.5142943382263183
epoch: 19 training_loss 1.5427725481987 test_loss: 1.5093687057495118
epoch: 20 training_loss 1.5284516394138337 test_loss: 1.4793999671936036
epoch: 21 training_loss 1.4778126621246337 test_loss: 1.4541406631469727
epoch: 22 training_loss 1.4559047830104828 test_loss: 1.4414683341979981
epoch: 23 training_loss 1.4493773078918457 test_loss: 1.4251625061035156
epoch: 24 training_loss 1.402539896965027 test_loss: 1.4199546813964843
epoch: 25 training_loss 1.392112100124359 test_loss: 1.3255497932434082
epoch: 26 training_loss 1.3940955913066864 test_loss: 1.3056299209594726
epoch: 27 training_loss 1.3945234537124633 test_loss: 1.3244370460510253
epoch: 28 training_loss 1.3573179638385773 test_loss: 1.3163960456848145
epoch: 29 training_loss 1.3129227566719055 test_loss: 1.3060256004333497
epoch: 30 training_loss 1.3221303534507751 test_loss: 1.3122194290161133
epoch: 31 training_loss 1.2843781125545501 test_loss: 1.304739284515381
epoch: 32 training_loss 1.2903178536891937 test_loss: 1.324397659301758
epoch: 33 training_loss 1.2483793354034425 test_loss: 1.3007505416870118
epoch: 34 training_loss 1.275637537240982 test_loss: 1.2116422653198242
epoch: 35 training_loss 1.2529906487464906 test_loss: 1.232833480834961
epoch: 36 training_loss 1.227293267250061 test_loss: 1.1969558715820312
epoch: 37 training_loss 1.2094694143533706 test_loss: 1.249516201019287
epoch: 38 training_loss 1.2029715609550475 test_loss: 1.2021896362304687
epoch: 39 training_loss 1.1962238866090775 test_loss: 1.2829049110412598
epoch: 40 training_loss 1.1862976348400116 test_loss: 1.1897336006164552
epoch: 41 training_loss 1.1651891785860062 test_loss: 1.174990463256836
epoch: 42 training_loss 1.1556079173088074 test_loss: 1.1609490394592286
epoch: 43 training_loss 1.154835416674614 test_loss: 1.1646539688110351
epoch: 44 training_loss 1.1589404845237732 test_loss: 1.1563387870788575
epoch: 45 training_loss 1.136973946094513 test_loss: 1.1794151306152343
epoch: 46 training_loss 1.125100969672203 test_loss: 1.1064675331115723
epoch: 47 training_loss 1.12203832924366 test_loss: 1.1193350791931151
epoch: 48 training_loss 1.096434257030487 test_loss: 1.096859073638916
epoch: 49 training_loss 1.0887638306617737 test_loss: 1.0836363792419434
epoch: 50 training_loss 1.109168637394905 test_loss: 1.0526021003723145
epoch: 51 training_loss 1.1009948873519897 test_loss: 1.0856451034545898
epoch: 52 training_loss 1.0989922112226487 test_loss: 1.1142192840576173
epoch: 53 training_loss 1.0738326638936997 test_loss: 1.0979256629943848
epoch: 54 training_loss 1.0569479978084564 test_loss: 1.1016334533691405
epoch: 55 training_loss 1.0606424659490585 test_loss: 1.0564863204956054
epoch: 56 training_loss 1.0557011491060257 test_loss: 1.0862536430358887
epoch: 57 training_loss 1.053187210559845 test_loss: 1.0819047927856444
epoch: 58 training_loss 1.0277566307783126 test_loss: 1.0208627700805664
epoch: 59 training_loss 1.0407627046108245 test_loss: 0.9816513061523438
epoch: 60 training_loss 1.041751333475113 test_loss: 1.0584000587463378
epoch: 61 training_loss 1.0015466570854188 test_loss: 0.9991978645324707
epoch: 62 training_loss 1.033703287243843 test_loss: 0.9439202308654785
epoch: 63 training_loss 1.01284250497818 test_loss: 0.9918569564819336
epoch: 64 training_loss 1.0096399730443955 test_loss: 1.0503036499023437
epoch: 65 training_loss 1.0384913337230683 test_loss: 0.9788769721984864
epoch: 66 training_loss 0.9939864659309388 test_loss: 1.0067108154296875
epoch: 67 training_loss 1.002871693968773 test_loss: 1.05696439743042
epoch: 68 training_loss 1.0100220596790315 test_loss: 1.0087624549865724
epoch: 69 training_loss 0.9837715113162995 test_loss: 1.0003817558288575
epoch: 70 training_loss 0.9606131339073181 test_loss: 0.9457156181335449
epoch: 71 training_loss 0.9801090240478516 test_loss: 0.9818941116333008
epoch: 72 training_loss 0.9700302064418793 test_loss: 0.9762284278869628
epoch: 73 training_loss 0.9826079684495926 test_loss: 0.9661710739135743
epoch: 74 training_loss 0.9602522403001785 test_loss: 0.9775503158569336
epoch: 75 training_loss 0.9554455310106278 test_loss: 0.9538920402526856
epoch: 76 training_loss 0.9494427859783172 test_loss: 0.974089241027832
epoch: 77 training_loss 0.9480197882652283 test_loss: 0.9106261253356933
epoch: 78 training_loss 0.9552449244260788 test_loss: 0.9484232902526856
epoch: 79 training_loss 0.9548605453968048 test_loss: 0.9456966400146485
epoch: 80 training_loss 0.9378323769569397 test_loss: 0.9538372993469239
epoch: 81 training_loss 0.948472004532814 test_loss: 0.9395846366882324
epoch: 82 training_loss 0.9454586583375931 test_loss: 0.9156266212463379
epoch: 83 training_loss 0.9382987260818482 test_loss: 0.9095518112182617
epoch: 84 training_loss 0.9184884667396546 test_loss: 0.9524288177490234
epoch: 85 training_loss 0.9415918022394181 test_loss: 0.9088061332702637
epoch: 86 training_loss 0.9331314253807068 test_loss: 1.0280534744262695
epoch: 87 training_loss 0.9313966941833496 test_loss: 0.9699416160583496
epoch: 88 training_loss 0.9129147154092788 test_loss: 0.9295841217041015
epoch: 89 training_loss 0.9010389035940171 test_loss: 0.8733551025390625
epoch: 90 training_loss 0.9034974503517151 test_loss: 0.8790836334228516
epoch: 91 training_loss 0.9134984004497528 test_loss: 0.9300343513488769
epoch: 92 training_loss 0.9026353138685227 test_loss: 0.9057208061218261
epoch: 93 training_loss 0.9012469166517257 test_loss: 0.9000526428222656
epoch: 94 training_loss 0.9180918860435486 test_loss: 0.9011161804199219
epoch: 95 training_loss 0.8957267373800277 test_loss: 0.8836944580078125
epoch: 96 training_loss 0.9045739036798477 test_loss: 0.8753166198730469
epoch: 97 training_loss 0.8812982851266861 test_loss: 0.9289841651916504
epoch: 98 training_loss 0.8889153277873993 test_loss: 0.918852424621582
epoch: 99 training_loss 0.8923125678300857 test_loss: 0.9174559593200684
epoch: 100 training_loss 0.9058863490819931 test_loss: 0.8951198577880859
epoch: 101 training_loss 0.8838747811317443 test_loss: 0.8635765075683594
epoch: 102 training_loss 0.8941770249605179 test_loss: 0.8498340606689453
epoch: 103 training_loss 0.8786191987991333 test_loss: 0.8468791007995605
epoch: 104 training_loss 0.8696893757581711 test_loss: 0.8637197494506836
epoch: 105 training_loss 0.8551831287145615 test_loss: 0.8719735145568848
epoch: 106 training_loss 0.8543010634183884 test_loss: 0.8476834297180176
epoch: 107 training_loss 0.8731870770454406 test_loss: 0.9267054557800293
epoch: 108 training_loss 0.861304714679718 test_loss: 0.8715041160583497
epoch: 109 training_loss 0.8621246606111527 test_loss: 0.8363665580749512
epoch: 110 training_loss 0.8628033500909805 test_loss: 0.8869433403015137
epoch: 111 training_loss 0.8641734433174133 test_loss: 0.8237674713134766
epoch: 112 training_loss 0.8640755081176758 test_loss: 0.8506911277770997
epoch: 113 training_loss 0.8574182921648026 test_loss: 0.8271625518798829
epoch: 114 training_loss 0.8485789120197296 test_loss: 0.8679110527038574
epoch: 115 training_loss 0.8599231123924256 test_loss: 0.83697509765625
epoch: 116 training_loss 0.8577598935365677 test_loss: 0.8269396781921386
epoch: 117 training_loss 0.8505010330677032 test_loss: 0.8555972099304199
epoch: 118 training_loss 0.8394464933872223 test_loss: 0.8821170806884766
epoch: 119 training_loss 0.8601836973428726 test_loss: 0.8270919799804688
epoch: 120 training_loss 0.8295098584890366 test_loss: 0.8744982719421387
epoch: 121 training_loss 0.8434748637676239 test_loss: 0.8160722732543946
epoch: 122 training_loss 0.8503231149911881 test_loss: 0.8383997917175293
epoch: 123 training_loss 0.8446633392572402 test_loss: 0.863709545135498
epoch: 124 training_loss 0.82919917345047 test_loss: 0.8394271850585937
epoch: 125 training_loss 0.8308702743053437 test_loss: 0.847252082824707
epoch: 126 training_loss 0.8200954270362854 test_loss: 0.8286029815673828
epoch: 127 training_loss 0.8428121328353881 test_loss: 0.8328098297119141
epoch: 128 training_loss 0.8265905737876892 test_loss: 0.8367402076721191
epoch: 129 training_loss 0.8306873309612274 test_loss: 0.8430411338806152
epoch: 130 training_loss 0.8091991448402405 test_loss: 0.8381174087524415
epoch: 131 training_loss 0.8331224066019058 test_loss: 0.84549560546875
epoch: 132 training_loss 0.8100881648063659 test_loss: 0.7813743114471435
epoch: 133 training_loss 0.8247358334064484 test_loss: 0.8437426567077637
epoch: 134 training_loss 0.8249087828397751 test_loss: 0.7994461536407471
epoch: 135 training_loss 0.7961223262548447 test_loss: 0.8124028205871582
epoch: 136 training_loss 0.8092621314525604 test_loss: 0.85824613571167
epoch: 137 training_loss 0.8059107398986817 test_loss: 0.7804872035980225
epoch: 138 training_loss 0.8035396385192871 test_loss: 0.84787015914917
epoch: 139 training_loss 0.8109732794761658 test_loss: 0.7732592105865479
epoch: 140 training_loss 0.8021793562173843 test_loss: 0.8074069023132324
epoch: 141 training_loss 0.8121897315979004 test_loss: 0.7864920616149902
epoch: 142 training_loss 0.7915952056646347 test_loss: 0.8277385711669922
epoch: 143 training_loss 0.8018398982286453 test_loss: 0.8329583168029785
epoch: 144 training_loss 0.8113394385576248 test_loss: 0.8394015312194825
epoch: 145 training_loss 0.8105658495426178 test_loss: 0.8033556938171387
epoch: 146 training_loss 0.8013834452629089 test_loss: 0.7705723762512207
epoch: 147 training_loss 0.804396858215332 test_loss: 0.8381961822509766
epoch: 148 training_loss 0.7938689142465591 test_loss: 0.808140754699707
epoch: 149 training_loss 0.7899335658550263 test_loss: 0.8053504943847656
3116.085995717803
episode: 0 training return: tensor(-454.5059, device='cuda:0')
episode: 1 training return: tensor(165.5359, device='cuda:0')
episode: 2 training return: tensor(206.0347, device='cuda:0')
episode: 3 training return: tensor(144.5401, device='cuda:0')
epoch: 1 test_true_pfm: 3173.304985624924 sim_pfm: 189.49180151363058
episode: 4 training return: tensor(124.3409, device='cuda:0')
episode: 5 training return: tensor(128.7203, device='cuda:0')
episode: 6 training return: tensor(-464.0208, device='cuda:0')
episode: 7 training return: tensor(134.3902, device='cuda:0')
epoch: 2 test_true_pfm: 2987.066115783881 sim_pfm: 159.76511238228218
episode: 8 training return: tensor(-23.7976, device='cuda:0')
episode: 9 training return: tensor(-6.2740, device='cuda:0')
episode: 10 training return: tensor(133.5171, device='cuda:0')
episode: 11 training return: tensor(82.2185, device='cuda:0')
epoch: 3 test_true_pfm: 3197.9837575757515 sim_pfm: 180.08908221887154
episode: 12 training return: tensor(168.8013, device='cuda:0')
episode: 13 training return: tensor(-156.5533, device='cuda:0')
episode: 14 training return: tensor(215.5423, device='cuda:0')
episode: 15 training return: tensor(180.4052, device='cuda:0')
epoch: 4 test_true_pfm: 1968.8663657666057 sim_pfm: 106.2663824064269
episode: 16 training return: tensor(132.5437, device='cuda:0')
episode: 17 training return: tensor(209.9087, device='cuda:0')
episode: 18 training return: tensor(-163.1168, device='cuda:0')
episode: 19 training return: tensor(-82.8473, device='cuda:0')
epoch: 5 test_true_pfm: 1880.9084932407977 sim_pfm: -40.42645163455745
episode: 20 training return: tensor(222.2012, device='cuda:0')
episode: 21 training return: tensor(-375.1809, device='cuda:0')
episode: 22 training return: tensor(-350.3338, device='cuda:0')
episode: 23 training return: tensor(68.8133, device='cuda:0')
epoch: 6 test_true_pfm: 2862.8913515953695 sim_pfm: -50.62006479643363
episode: 24 training return: tensor(-342.8276, device='cuda:0')
episode: 25 training return: tensor(-78.4101, device='cuda:0')
episode: 26 training return: tensor(-13.8803, device='cuda:0')
episode: 27 training return: tensor(-49.7707, device='cuda:0')
epoch: 7 test_true_pfm: 3215.1289398253207 sim_pfm: 89.09651751103229
episode: 28 training return: tensor(159.4994, device='cuda:0')
episode: 29 training return: tensor(170.4578, device='cuda:0')
episode: 30 training return: tensor(155.3722, device='cuda:0')
episode: 31 training return: tensor(-139.8193, device='cuda:0')
epoch: 8 test_true_pfm: 3207.4539241147154 sim_pfm: 160.16008283102806
episode: 32 training return: tensor(-88.7862, device='cuda:0')
episode: 33 training return: tensor(153.2207, device='cuda:0')
episode: 34 training return: tensor(181.7941, device='cuda:0')
episode: 35 training return: tensor(-315.3802, device='cuda:0')
epoch: 9 test_true_pfm: 3179.827414252258 sim_pfm: 160.3910831087754
episode: 36 training return: tensor(-297.1599, device='cuda:0')
episode: 37 training return: tensor(70.3310, device='cuda:0')
episode: 38 training return: tensor(-74.6208, device='cuda:0')
episode: 39 training return: tensor(201.1796, device='cuda:0')
epoch: 10 test_true_pfm: 3214.5985245882657 sim_pfm: 171.86307663163947
episode: 40 training return: tensor(210.4137, device='cuda:0')
episode: 41 training return: tensor(161.4160, device='cuda:0')
episode: 42 training return: tensor(179.9405, device='cuda:0')
episode: 43 training return: tensor(-226.5836, device='cuda:0')
epoch: 11 test_true_pfm: 3173.3272681631497 sim_pfm: 151.72362622434352
episode: 44 training return: tensor(-260.2076, device='cuda:0')
episode: 45 training return: tensor(-376.4251, device='cuda:0')
episode: 46 training return: tensor(85.1757, device='cuda:0')
episode: 47 training return: tensor(162.7084, device='cuda:0')
epoch: 12 test_true_pfm: 3201.9296991910255 sim_pfm: 160.02494544691095
episode: 48 training return: tensor(-163.9856, device='cuda:0')
episode: 49 training return: tensor(154.1058, device='cuda:0')
episode: 50 training return: tensor(136.3953, device='cuda:0')
episode: 51 training return: tensor(-44.6003, device='cuda:0')
epoch: 13 test_true_pfm: 3156.5970258604207 sim_pfm: 166.37222479307093
episode: 52 training return: tensor(169.6303, device='cuda:0')
episode: 53 training return: tensor(-231.7793, device='cuda:0')
episode: 54 training return: tensor(-1.7227, device='cuda:0')
episode: 55 training return: tensor(200.7348, device='cuda:0')
epoch: 14 test_true_pfm: 2806.821693143782 sim_pfm: 148.7708511595459
episode: 56 training return: tensor(-4.7111, device='cuda:0')
episode: 57 training return: tensor(233.9400, device='cuda:0')
episode: 58 training return: tensor(170.0691, device='cuda:0')
episode: 59 training return: tensor(172.7340, device='cuda:0')
epoch: 15 test_true_pfm: 3189.3308732562177 sim_pfm: 195.53365272436835
episode: 60 training return: tensor(244.2084, device='cuda:0')
episode: 61 training return: tensor(172.6302, device='cuda:0')
episode: 62 training return: tensor(220.7608, device='cuda:0')
episode: 63 training return: tensor(201.7746, device='cuda:0')
epoch: 16 test_true_pfm: 3188.257100796296 sim_pfm: 153.58277581295502
episode: 64 training return: tensor(-84.6176, device='cuda:0')
episode: 65 training return: tensor(-68.2950, device='cuda:0')
episode: 66 training return: tensor(63.4368, device='cuda:0')
episode: 67 training return: tensor(188.1520, device='cuda:0')
epoch: 17 test_true_pfm: 3224.530729857641 sim_pfm: 198.52428659518287
episode: 68 training return: tensor(162.6930, device='cuda:0')
episode: 69 training return: tensor(170.0322, device='cuda:0')
episode: 70 training return: tensor(193.8150, device='cuda:0')
episode: 71 training return: tensor(161.9285, device='cuda:0')
epoch: 18 test_true_pfm: 3193.4138963994305 sim_pfm: 135.2994999580551
episode: 72 training return: tensor(171.2317, device='cuda:0')
episode: 73 training return: tensor(149.5030, device='cuda:0')
episode: 74 training return: tensor(-163.5332, device='cuda:0')
episode: 75 training return: tensor(-201.7023, device='cuda:0')
epoch: 19 test_true_pfm: 3185.855657981723 sim_pfm: 166.29285364070287
episode: 76 training return: tensor(154.4179, device='cuda:0')
episode: 77 training return: tensor(146.7890, device='cuda:0')
episode: 78 training return: tensor(5.6819, device='cuda:0')
episode: 79 training return: tensor(185.3384, device='cuda:0')
epoch: 20 test_true_pfm: 3234.733882507849 sim_pfm: 165.72823658113097
episode: 80 training return: tensor(219.7720, device='cuda:0')
episode: 81 training return: tensor(166.1467, device='cuda:0')
episode: 82 training return: tensor(128.4192, device='cuda:0')
episode: 83 training return: tensor(-430.1419, device='cuda:0')
epoch: 21 test_true_pfm: 3202.985708431242 sim_pfm: 159.81384577420735
episode: 84 training return: tensor(264.3762, device='cuda:0')
episode: 85 training return: tensor(182.7824, device='cuda:0')
episode: 86 training return: tensor(2.9228, device='cuda:0')
episode: 87 training return: tensor(211.7948, device='cuda:0')
epoch: 22 test_true_pfm: 3171.9808014002015 sim_pfm: 164.6074494404214
episode: 88 training return: tensor(164.6315, device='cuda:0')
episode: 89 training return: tensor(170.5288, device='cuda:0')
episode: 90 training return: tensor(-46.0348, device='cuda:0')
episode: 91 training return: tensor(-233.0782, device='cuda:0')
epoch: 23 test_true_pfm: 3215.921638202093 sim_pfm: 186.6558912769639
episode: 92 training return: tensor(212.4478, device='cuda:0')
episode: 93 training return: tensor(160.8348, device='cuda:0')
episode: 94 training return: tensor(215.3126, device='cuda:0')
episode: 95 training return: tensor(-110.1813, device='cuda:0')
epoch: 24 test_true_pfm: 3180.9385031456727 sim_pfm: 182.3630185238823
episode: 96 training return: tensor(162.4415, device='cuda:0')
episode: 97 training return: tensor(-221.6600, device='cuda:0')
episode: 98 training return: tensor(195.6698, device='cuda:0')
episode: 99 training return: tensor(146.1800, device='cuda:0')
epoch: 25 test_true_pfm: 3202.7146503891054 sim_pfm: 162.40623435903885
episode: 100 training return: tensor(-95.7849, device='cuda:0')
episode: 101 training return: tensor(168.5483, device='cuda:0')
episode: 102 training return: tensor(-208.4141, device='cuda:0')
episode: 103 training return: tensor(215.5739, device='cuda:0')
epoch: 26 test_true_pfm: 3215.399905274562 sim_pfm: 182.31428997026524
episode: 104 training return: tensor(180.8229, device='cuda:0')
episode: 105 training return: tensor(-391.8003, device='cuda:0')
episode: 106 training return: tensor(148.3922, device='cuda:0')
episode: 107 training return: tensor(-237.3349, device='cuda:0')
epoch: 27 test_true_pfm: 3189.733398609578 sim_pfm: 173.7781573079701
episode: 108 training return: tensor(214.7007, device='cuda:0')
episode: 109 training return: tensor(170.7982, device='cuda:0')
episode: 110 training return: tensor(-269.7128, device='cuda:0')
episode: 111 training return: tensor(233.0132, device='cuda:0')
epoch: 28 test_true_pfm: 3178.1680679923734 sim_pfm: 149.57214082304077
episode: 112 training return: tensor(127.6917, device='cuda:0')
episode: 113 training return: tensor(191.7738, device='cuda:0')
episode: 114 training return: tensor(-163.9730, device='cuda:0')
episode: 115 training return: tensor(188.9367, device='cuda:0')
epoch: 29 test_true_pfm: 3217.3047863053084 sim_pfm: 118.26474391729182
episode: 116 training return: tensor(0.4594, device='cuda:0')
episode: 117 training return: tensor(-49.2327, device='cuda:0')
episode: 118 training return: tensor(111.9605, device='cuda:0')
episode: 119 training return: tensor(233.4124, device='cuda:0')
epoch: 30 test_true_pfm: 3199.559348056602 sim_pfm: 194.01526502405372
episode: 120 training return: tensor(215.0871, device='cuda:0')
episode: 121 training return: tensor(-255.3452, device='cuda:0')
episode: 122 training return: tensor(209.7076, device='cuda:0')
episode: 123 training return: tensor(65.1281, device='cuda:0')
epoch: 31 test_true_pfm: 2972.9111283259704 sim_pfm: 217.8780690074394
episode: 124 training return: tensor(171.5529, device='cuda:0')
episode: 125 training return: tensor(192.7606, device='cuda:0')
episode: 126 training return: tensor(189.6039, device='cuda:0')
episode: 127 training return: tensor(148.1802, device='cuda:0')
epoch: 32 test_true_pfm: 3173.0257881458565 sim_pfm: 75.43730624439195
episode: 128 training return: tensor(168.2387, device='cuda:0')
episode: 129 training return: tensor(-419.9092, device='cuda:0')
episode: 130 training return: tensor(182.3175, device='cuda:0')
episode: 131 training return: tensor(-125.7466, device='cuda:0')
epoch: 33 test_true_pfm: 3120.007761058372 sim_pfm: 109.78611040841982
episode: 132 training return: tensor(-272.7022, device='cuda:0')
episode: 133 training return: tensor(-140.1291, device='cuda:0')
episode: 134 training return: tensor(-210.4484, device='cuda:0')
episode: 135 training return: tensor(186.8790, device='cuda:0')
epoch: 34 test_true_pfm: 2872.1236620640907 sim_pfm: 220.84800621216223
episode: 136 training return: tensor(-35.4692, device='cuda:0')
episode: 137 training return: tensor(171.9942, device='cuda:0')
episode: 138 training return: tensor(17.6004, device='cuda:0')
episode: 139 training return: tensor(-205.9080, device='cuda:0')
epoch: 35 test_true_pfm: 2758.1265405962527 sim_pfm: 159.2483783198501
episode: 140 training return: tensor(176.7586, device='cuda:0')
episode: 141 training return: tensor(-219.2095, device='cuda:0')
episode: 142 training return: tensor(-216.9779, device='cuda:0')
episode: 143 training return: tensor(-291.2823, device='cuda:0')
epoch: 36 test_true_pfm: 2623.5153187113783 sim_pfm: -9.509430356284915
episode: 144 training return: tensor(184.4202, device='cuda:0')
episode: 145 training return: tensor(-323.5869, device='cuda:0')
episode: 146 training return: tensor(155.0453, device='cuda:0')
episode: 147 training return: tensor(186.8741, device='cuda:0')
epoch: 37 test_true_pfm: 3214.5479796338345 sim_pfm: 177.63386253308272
episode: 148 training return: tensor(167.1273, device='cuda:0')
episode: 149 training return: tensor(169.8043, device='cuda:0')
episode: 150 training return: tensor(135.9051, device='cuda:0')
episode: 151 training return: tensor(54.7969, device='cuda:0')
epoch: 38 test_true_pfm: 2568.081690501908 sim_pfm: 205.42716017051134
episode: 152 training return: tensor(-139.7906, device='cuda:0')
episode: 153 training return: tensor(0.2221, device='cuda:0')
episode: 154 training return: tensor(195.2128, device='cuda:0')
episode: 155 training return: tensor(173.1350, device='cuda:0')
epoch: 39 test_true_pfm: 3220.859613419782 sim_pfm: 112.8653069975165
episode: 156 training return: tensor(193.4388, device='cuda:0')
episode: 157 training return: tensor(-247.5264, device='cuda:0')
episode: 158 training return: tensor(131.9023, device='cuda:0')
episode: 159 training return: tensor(93.4165, device='cuda:0')
epoch: 40 test_true_pfm: 2448.4302056146757 sim_pfm: 108.7665552823358
episode: 160 training return: tensor(-210.4880, device='cuda:0')
episode: 161 training return: tensor(180.0318, device='cuda:0')
episode: 162 training return: tensor(50.2009, device='cuda:0')
episode: 163 training return: tensor(202.9612, device='cuda:0')
epoch: 41 test_true_pfm: 3074.2425354876045 sim_pfm: 141.3604025512177
episode: 164 training return: tensor(169.9307, device='cuda:0')
episode: 165 training return: tensor(168.7718, device='cuda:0')
episode: 166 training return: tensor(231.4194, device='cuda:0')
episode: 167 training return: tensor(85.2980, device='cuda:0')
epoch: 42 test_true_pfm: 2861.3753767417925 sim_pfm: 206.7677804499593
episode: 168 training return: tensor(280.7291, device='cuda:0')
episode: 169 training return: tensor(-208.8004, device='cuda:0')
episode: 170 training return: tensor(193.7665, device='cuda:0')
episode: 171 training return: tensor(92.3899, device='cuda:0')
epoch: 43 test_true_pfm: 3236.334227512251 sim_pfm: 176.28790392377414
episode: 172 training return: tensor(175.3157, device='cuda:0')
episode: 173 training return: tensor(174.7836, device='cuda:0')
episode: 174 training return: tensor(158.7700, device='cuda:0')
episode: 175 training return: tensor(-92.3794, device='cuda:0')
epoch: 44 test_true_pfm: 2989.610170395361 sim_pfm: 1.8978124769831386
episode: 176 training return: tensor(128.7370, device='cuda:0')
episode: 177 training return: tensor(-58.6098, device='cuda:0')
episode: 178 training return: tensor(157.7468, device='cuda:0')
episode: 179 training return: tensor(35.8126, device='cuda:0')
epoch: 45 test_true_pfm: 2333.4693955561083 sim_pfm: -4.270366452741048
episode: 180 training return: tensor(-417.8726, device='cuda:0')
episode: 181 training return: tensor(193.7274, device='cuda:0')
episode: 182 training return: tensor(-112.4412, device='cuda:0')
episode: 183 training return: tensor(-47.4089, device='cuda:0')
epoch: 46 test_true_pfm: 2459.161510369692 sim_pfm: 204.3626726893514
episode: 184 training return: tensor(212.1463, device='cuda:0')
episode: 185 training return: tensor(78.3224, device='cuda:0')
episode: 186 training return: tensor(-394.6847, device='cuda:0')
episode: 187 training return: tensor(-144.2786, device='cuda:0')
epoch: 47 test_true_pfm: 3240.5050298604297 sim_pfm: 170.8302113103758
episode: 188 training return: tensor(-280.8647, device='cuda:0')
episode: 189 training return: tensor(182.9095, device='cuda:0')
episode: 190 training return: tensor(-109.9984, device='cuda:0')
episode: 191 training return: tensor(222.0836, device='cuda:0')
epoch: 48 test_true_pfm: 3252.02896274649 sim_pfm: 205.77766090084333
episode: 192 training return: tensor(236.9691, device='cuda:0')
episode: 193 training return: tensor(10.1704, device='cuda:0')
episode: 194 training return: tensor(-85.1945, device='cuda:0')
episode: 195 training return: tensor(177.2989, device='cuda:0')
epoch: 49 test_true_pfm: 3271.439142117484 sim_pfm: 7.328054441682373
episode: 196 training return: tensor(236.7287, device='cuda:0')
episode: 197 training return: tensor(-385.5181, device='cuda:0')
episode: 198 training return: tensor(-150.7459, device='cuda:0')
episode: 199 training return: tensor(-96.6858, device='cuda:0')
epoch: 50 test_true_pfm: 3232.2717368505932 sim_pfm: 87.23280339919923
episode: 200 training return: tensor(-292.5468, device='cuda:0')
episode: 201 training return: tensor(-209.8777, device='cuda:0')
episode: 202 training return: tensor(155.3922, device='cuda:0')
episode: 203 training return: tensor(241.2567, device='cuda:0')
epoch: 51 test_true_pfm: 2391.295299644033 sim_pfm: -56.519959194857314
episode: 204 training return: tensor(-134.1091, device='cuda:0')
episode: 205 training return: tensor(-164.8804, device='cuda:0')
episode: 206 training return: tensor(-337.8586, device='cuda:0')
episode: 207 training return: tensor(-114.3050, device='cuda:0')
epoch: 52 test_true_pfm: 3291.97458385432 sim_pfm: 156.5505564600268
episode: 208 training return: tensor(-38.2188, device='cuda:0')
episode: 209 training return: tensor(163.0412, device='cuda:0')
episode: 210 training return: tensor(-262.1787, device='cuda:0')
episode: 211 training return: tensor(-241.5788, device='cuda:0')
epoch: 53 test_true_pfm: 2598.288779460693 sim_pfm: 12.397778530954383
episode: 212 training return: tensor(251.7876, device='cuda:0')
episode: 213 training return: tensor(-420.6472, device='cuda:0')
episode: 214 training return: tensor(-315.5948, device='cuda:0')
episode: 215 training return: tensor(-160.2494, device='cuda:0')
epoch: 54 test_true_pfm: 2836.505408142983 sim_pfm: 203.91056062530456
episode: 216 training return: tensor(-175.7254, device='cuda:0')
episode: 217 training return: tensor(197.8587, device='cuda:0')
episode: 218 training return: tensor(-253.0307, device='cuda:0')
episode: 219 training return: tensor(172.5528, device='cuda:0')
epoch: 55 test_true_pfm: 3073.364438653276 sim_pfm: 81.70307787389417
episode: 220 training return: tensor(200.0579, device='cuda:0')
episode: 221 training return: tensor(-161.5524, device='cuda:0')
episode: 222 training return: tensor(-239.2671, device='cuda:0')
episode: 223 training return: tensor(220.6037, device='cuda:0')
epoch: 56 test_true_pfm: 3101.323158830559 sim_pfm: 165.12925972836092
episode: 224 training return: tensor(90.5965, device='cuda:0')
episode: 225 training return: tensor(-385.8292, device='cuda:0')
episode: 226 training return: tensor(-396.8319, device='cuda:0')
episode: 227 training return: tensor(191.9612, device='cuda:0')
epoch: 57 test_true_pfm: 3217.2892310711327 sim_pfm: 169.80972299792725
episode: 228 training return: tensor(265.4821, device='cuda:0')
episode: 229 training return: tensor(47.3779, device='cuda:0')
episode: 230 training return: tensor(-396.0376, device='cuda:0')
episode: 231 training return: tensor(39.0121, device='cuda:0')
epoch: 58 test_true_pfm: 3229.362123880075 sim_pfm: 90.34784334356664
episode: 232 training return: tensor(-293.0043, device='cuda:0')
episode: 233 training return: tensor(-236.6801, device='cuda:0')
episode: 234 training return: tensor(-429.7372, device='cuda:0')
episode: 235 training return: tensor(6.7640, device='cuda:0')
epoch: 59 test_true_pfm: 3045.0695013020536 sim_pfm: 38.20806859825583
episode: 236 training return: tensor(-188.6287, device='cuda:0')
episode: 237 training return: tensor(240.4031, device='cuda:0')
episode: 238 training return: tensor(-246.1588, device='cuda:0')
episode: 239 training return: tensor(217.5543, device='cuda:0')
epoch: 60 test_true_pfm: 2150.0998380391443 sim_pfm: -80.15329352700307
episode: 240 training return: tensor(183.4649, device='cuda:0')
episode: 241 training return: tensor(121.4138, device='cuda:0')
episode: 242 training return: tensor(-214.8870, device='cuda:0')
episode: 243 training return: tensor(263.7314, device='cuda:0')
epoch: 61 test_true_pfm: 1801.703696337702 sim_pfm: -272.94818304640165
episode: 244 training return: tensor(192.3993, device='cuda:0')
episode: 245 training return: tensor(-274.8268, device='cuda:0')
episode: 246 training return: tensor(200.3982, device='cuda:0')
episode: 247 training return: tensor(-17.1626, device='cuda:0')
epoch: 62 test_true_pfm: 2926.3164465467603 sim_pfm: -171.86962301761378
episode: 248 training return: tensor(-360.7693, device='cuda:0')
episode: 249 training return: tensor(202.8307, device='cuda:0')
episode: 250 training return: tensor(-416.1847, device='cuda:0')
episode: 251 training return: tensor(26.1635, device='cuda:0')
epoch: 63 test_true_pfm: 2732.4604154223166 sim_pfm: -32.266978679903936
episode: 252 training return: tensor(-17.1552, device='cuda:0')
episode: 253 training return: tensor(-228.0612, device='cuda:0')
episode: 254 training return: tensor(8.7118, device='cuda:0')
episode: 255 training return: tensor(-225.5144, device='cuda:0')
epoch: 64 test_true_pfm: 2532.1000827779335 sim_pfm: -38.8126442678913
episode: 256 training return: tensor(215.9751, device='cuda:0')
episode: 257 training return: tensor(-269.1273, device='cuda:0')
episode: 258 training return: tensor(139.3654, device='cuda:0')
episode: 259 training return: tensor(-125.8275, device='cuda:0')
epoch: 65 test_true_pfm: 2616.81561699079 sim_pfm: -174.91129890142474
episode: 260 training return: tensor(-289.7902, device='cuda:0')
episode: 261 training return: tensor(-319.6608, device='cuda:0')
episode: 262 training return: tensor(-220.9747, device='cuda:0')
episode: 263 training return: tensor(-132.9688, device='cuda:0')
epoch: 66 test_true_pfm: 1542.9142440330843 sim_pfm: -287.4156133309395
episode: 264 training return: tensor(204.8128, device='cuda:0')
episode: 265 training return: tensor(-370.7612, device='cuda:0')
episode: 266 training return: tensor(-32.1002, device='cuda:0')
episode: 267 training return: tensor(-211.5602, device='cuda:0')
epoch: 67 test_true_pfm: 1910.9191104429183 sim_pfm: -87.7838925945883
episode: 268 training return: tensor(147.8999, device='cuda:0')
episode: 269 training return: tensor(107.5475, device='cuda:0')
episode: 270 training return: tensor(-387.1598, device='cuda:0')
episode: 271 training return: tensor(-254.3592, device='cuda:0')
epoch: 68 test_true_pfm: 2957.265956743829 sim_pfm: 83.8584712681477
episode: 272 training return: tensor(239.9299, device='cuda:0')
episode: 273 training return: tensor(-228.6714, device='cuda:0')
episode: 274 training return: tensor(-426.0282, device='cuda:0')
episode: 275 training return: tensor(-269.0693, device='cuda:0')
epoch: 69 test_true_pfm: 2405.5095424694246 sim_pfm: -114.59591704493505
episode: 276 training return: tensor(-115.5789, device='cuda:0')
episode: 277 training return: tensor(-385.2830, device='cuda:0')
episode: 278 training return: tensor(-290.0517, device='cuda:0')
episode: 279 training return: tensor(-337.3163, device='cuda:0')
epoch: 70 test_true_pfm: 1802.0217429599063 sim_pfm: -68.41508656098934
episode: 280 training return: tensor(88.6016, device='cuda:0')
episode: 281 training return: tensor(-402.9201, device='cuda:0')
episode: 282 training return: tensor(-254.4055, device='cuda:0')
episode: 283 training return: tensor(-268.8270, device='cuda:0')
epoch: 71 test_true_pfm: 1669.3538962978243 sim_pfm: -172.01321763285281
episode: 284 training return: tensor(-295.8802, device='cuda:0')
episode: 285 training return: tensor(-245.7011, device='cuda:0')
episode: 286 training return: tensor(-299.0515, device='cuda:0')
episode: 287 training return: tensor(280.0204, device='cuda:0')
epoch: 72 test_true_pfm: 2539.7262358185526 sim_pfm: 143.44898602482863
episode: 288 training return: tensor(-176.2938, device='cuda:0')
episode: 289 training return: tensor(-299.6631, device='cuda:0')
episode: 290 training return: tensor(-155.0073, device='cuda:0')
episode: 291 training return: tensor(-203.0336, device='cuda:0')
epoch: 73 test_true_pfm: 2708.1976735885205 sim_pfm: 119.4177170508483
episode: 292 training return: tensor(-25.7123, device='cuda:0')
episode: 293 training return: tensor(-96.2673, device='cuda:0')
episode: 294 training return: tensor(-430.8934, device='cuda:0')
episode: 295 training return: tensor(-393.1078, device='cuda:0')
epoch: 74 test_true_pfm: 1668.739392949704 sim_pfm: -164.93390665443926
episode: 296 training return: tensor(214.6837, device='cuda:0')
episode: 297 training return: tensor(-420.6317, device='cuda:0')
episode: 298 training return: tensor(-147.9400, device='cuda:0')
episode: 299 training return: tensor(225.0288, device='cuda:0')
epoch: 75 test_true_pfm: 1629.3041616982134 sim_pfm: -219.72704695855887
episode: 300 training return: tensor(-69.9599, device='cuda:0')
episode: 301 training return: tensor(58.3274, device='cuda:0')
episode: 302 training return: tensor(-93.0695, device='cuda:0')
episode: 303 training return: tensor(-190.6627, device='cuda:0')
epoch: 76 test_true_pfm: 1904.5044827608115 sim_pfm: 56.50844037656983
episode: 304 training return: tensor(-402.9444, device='cuda:0')
episode: 305 training return: tensor(-210.7709, device='cuda:0')
episode: 306 training return: tensor(37.9368, device='cuda:0')
episode: 307 training return: tensor(-93.0043, device='cuda:0')
epoch: 77 test_true_pfm: 1854.9041962945132 sim_pfm: -231.68676738786357
episode: 308 training return: tensor(37.4496, device='cuda:0')
episode: 309 training return: tensor(176.1334, device='cuda:0')
episode: 310 training return: tensor(-251.6377, device='cuda:0')
episode: 311 training return: tensor(148.7052, device='cuda:0')
epoch: 78 test_true_pfm: 2995.996770658987 sim_pfm: 125.73407585661819
episode: 312 training return: tensor(-54.4072, device='cuda:0')
episode: 313 training return: tensor(-284.8079, device='cuda:0')
episode: 314 training return: tensor(-125.2968, device='cuda:0')
episode: 315 training return: tensor(-77.2788, device='cuda:0')
epoch: 79 test_true_pfm: 1675.989326524764 sim_pfm: -115.58814852374296
episode: 316 training return: tensor(-214.8812, device='cuda:0')
episode: 317 training return: tensor(-356.6624, device='cuda:0')
episode: 318 training return: tensor(-160.4812, device='cuda:0')
episode: 319 training return: tensor(-260.4249, device='cuda:0')
epoch: 80 test_true_pfm: 1873.3492091399676 sim_pfm: -204.66575125568002
episode: 320 training return: tensor(-127.4655, device='cuda:0')
episode: 321 training return: tensor(-342.2688, device='cuda:0')
episode: 322 training return: tensor(-224.0433, device='cuda:0')
episode: 323 training return: tensor(-384.4820, device='cuda:0')
epoch: 81 test_true_pfm: 1655.2704924700963 sim_pfm: -260.0859410787816
episode: 324 training return: tensor(-289.6482, device='cuda:0')
episode: 325 training return: tensor(-383.1068, device='cuda:0')
episode: 326 training return: tensor(-51.3087, device='cuda:0')
episode: 327 training return: tensor(162.4840, device='cuda:0')
epoch: 82 test_true_pfm: 2027.6412021229262 sim_pfm: -110.84380430002541
episode: 328 training return: tensor(-224.4391, device='cuda:0')
episode: 329 training return: tensor(-258.4478, device='cuda:0')
episode: 330 training return: tensor(29.0939, device='cuda:0')
episode: 331 training return: tensor(-267.5703, device='cuda:0')
epoch: 83 test_true_pfm: 1725.3107910215524 sim_pfm: -292.4132456685281
episode: 332 training return: tensor(-290.9565, device='cuda:0')
episode: 333 training return: tensor(-5.5145, device='cuda:0')
episode: 334 training return: tensor(114.0910, device='cuda:0')
episode: 335 training return: tensor(-80.1207, device='cuda:0')
epoch: 84 test_true_pfm: 1656.9195183908857 sim_pfm: -292.2965927910215
episode: 336 training return: tensor(-358.6352, device='cuda:0')
episode: 337 training return: tensor(-50.3062, device='cuda:0')
episode: 338 training return: tensor(-221.9518, device='cuda:0')
episode: 339 training return: tensor(-177.7354, device='cuda:0')
epoch: 85 test_true_pfm: 1548.2505771320602 sim_pfm: -279.38540047795203
episode: 340 training return: tensor(-323.6929, device='cuda:0')
episode: 341 training return: tensor(-301.1718, device='cuda:0')
episode: 342 training return: tensor(-442.8708, device='cuda:0')
episode: 343 training return: tensor(-288.3744, device='cuda:0')
epoch: 86 test_true_pfm: 1714.281263914733 sim_pfm: -316.66471067940194
episode: 344 training return: tensor(174.6114, device='cuda:0')
episode: 345 training return: tensor(-52.0166, device='cuda:0')
episode: 346 training return: tensor(-319.0719, device='cuda:0')
episode: 347 training return: tensor(-258.8171, device='cuda:0')
epoch: 87 test_true_pfm: 1896.0310362766852 sim_pfm: -75.20074132657221
episode: 348 training return: tensor(-338.4869, device='cuda:0')
episode: 349 training return: tensor(-122.2815, device='cuda:0')
episode: 350 training return: tensor(-207.7185, device='cuda:0')
episode: 351 training return: tensor(33.9571, device='cuda:0')
epoch: 88 test_true_pfm: 2595.258948683793 sim_pfm: -76.5785639815925
episode: 352 training return: tensor(-72.4053, device='cuda:0')
episode: 353 training return: tensor(-267.7156, device='cuda:0')
episode: 354 training return: tensor(-290.7357, device='cuda:0')
episode: 355 training return: tensor(-432.9531, device='cuda:0')
epoch: 89 test_true_pfm: 1580.2342935568195 sim_pfm: -214.02602679041834
episode: 356 training return: tensor(-293.7681, device='cuda:0')
episode: 357 training return: tensor(-61.2088, device='cuda:0')
episode: 358 training return: tensor(7.2223, device='cuda:0')
episode: 359 training return: tensor(39.9479, device='cuda:0')
epoch: 90 test_true_pfm: 1824.7992468764553 sim_pfm: -246.2862702176984
episode: 360 training return: tensor(-300.2565, device='cuda:0')
episode: 361 training return: tensor(-129.1507, device='cuda:0')
episode: 362 training return: tensor(-140.9299, device='cuda:0')
episode: 363 training return: tensor(-381.8045, device='cuda:0')
epoch: 91 test_true_pfm: 1906.3291044873674 sim_pfm: -102.73105514525862
episode: 364 training return: tensor(16.7884, device='cuda:0')
episode: 365 training return: tensor(-195.4702, device='cuda:0')
episode: 366 training return: tensor(-315.8454, device='cuda:0')
episode: 367 training return: tensor(-202.3668, device='cuda:0')
epoch: 92 test_true_pfm: 1989.822442310915 sim_pfm: -292.080466185386
episode: 368 training return: tensor(-218.9127, device='cuda:0')
episode: 369 training return: tensor(-313.9326, device='cuda:0')
episode: 370 training return: tensor(-210.6210, device='cuda:0')
episode: 371 training return: tensor(-380.0522, device='cuda:0')
epoch: 93 test_true_pfm: 1634.8933223230085 sim_pfm: -236.40894436836243
episode: 372 training return: tensor(290.7008, device='cuda:0')
episode: 373 training return: tensor(-397.3789, device='cuda:0')
episode: 374 training return: tensor(-252.4241, device='cuda:0')
episode: 375 training return: tensor(191.6074, device='cuda:0')
epoch: 94 test_true_pfm: 1625.7676250840311 sim_pfm: -288.746356069576
episode: 376 training return: tensor(-423.1272, device='cuda:0')
episode: 377 training return: tensor(-231.2884, device='cuda:0')
episode: 378 training return: tensor(-25.8315, device='cuda:0')
episode: 379 training return: tensor(128.1778, device='cuda:0')
epoch: 95 test_true_pfm: 1602.4934831729468 sim_pfm: -232.78659119251338
episode: 380 training return: tensor(183.7202, device='cuda:0')
episode: 381 training return: tensor(-267.4791, device='cuda:0')
episode: 382 training return: tensor(-172.7042, device='cuda:0')
episode: 383 training return: tensor(-401.5992, device='cuda:0')
epoch: 96 test_true_pfm: 1742.4051330395207 sim_pfm: -121.22864823161702
episode: 384 training return: tensor(-395.9926, device='cuda:0')
episode: 385 training return: tensor(-4.5977, device='cuda:0')
episode: 386 training return: tensor(-151.3273, device='cuda:0')
episode: 387 training return: tensor(-308.4733, device='cuda:0')
epoch: 97 test_true_pfm: 1595.51484574206 sim_pfm: -307.43099538062233
episode: 388 training return: tensor(-351.0349, device='cuda:0')
episode: 389 training return: tensor(116.9892, device='cuda:0')
episode: 390 training return: tensor(152.6282, device='cuda:0')
episode: 391 training return: tensor(-259.9522, device='cuda:0')
epoch: 98 test_true_pfm: 1639.2846061046614 sim_pfm: -248.46115444398797
episode: 392 training return: tensor(-312.8215, device='cuda:0')
episode: 393 training return: tensor(-114.6598, device='cuda:0')
episode: 394 training return: tensor(-321.9935, device='cuda:0')
episode: 395 training return: tensor(-157.4178, device='cuda:0')
epoch: 99 test_true_pfm: 1622.374901016775 sim_pfm: -265.52465133516426
episode: 396 training return: tensor(-356.1645, device='cuda:0')
episode: 397 training return: tensor(-219.5208, device='cuda:0')
episode: 398 training return: tensor(-373.0025, device='cuda:0')
episode: 399 training return: tensor(-152.3403, device='cuda:0')
epoch: 100 test_true_pfm: 1616.840009103805 sim_pfm: -305.06883224565536
episode: 400 training return: tensor(-249.9991, device='cuda:0')
episode: 401 training return: tensor(-246.8761, device='cuda:0')
episode: 402 training return: tensor(-147.4922, device='cuda:0')
episode: 403 training return: tensor(-267.3433, device='cuda:0')
epoch: 101 test_true_pfm: 2077.0950407267264 sim_pfm: -72.07917086706341
episode: 404 training return: tensor(255.6187, device='cuda:0')
episode: 405 training return: tensor(-76.9750, device='cuda:0')
episode: 406 training return: tensor(-430.7253, device='cuda:0')
episode: 407 training return: tensor(-82.4753, device='cuda:0')
epoch: 102 test_true_pfm: 1612.7107339730744 sim_pfm: -264.89344962727046
episode: 408 training return: tensor(-339.6984, device='cuda:0')
episode: 409 training return: tensor(-250.5556, device='cuda:0')
episode: 410 training return: tensor(-408.7982, device='cuda:0')
episode: 411 training return: tensor(-225.4874, device='cuda:0')
epoch: 103 test_true_pfm: 1691.490702776977 sim_pfm: -236.21879815065768
episode: 412 training return: tensor(-227.7433, device='cuda:0')
episode: 413 training return: tensor(-380.5792, device='cuda:0')
episode: 414 training return: tensor(-92.4765, device='cuda:0')
episode: 415 training return: tensor(-300.1001, device='cuda:0')
epoch: 104 test_true_pfm: 2141.2837178259374 sim_pfm: -181.0819384623804
episode: 416 training return: tensor(-338.7015, device='cuda:0')
episode: 417 training return: tensor(-164.0265, device='cuda:0')
episode: 418 training return: tensor(-241.5733, device='cuda:0')
episode: 419 training return: tensor(-332.4684, device='cuda:0')
epoch: 105 test_true_pfm: 1780.9507134323367 sim_pfm: -304.5043922030988
episode: 420 training return: tensor(-259.6768, device='cuda:0')
episode: 421 training return: tensor(-429.7057, device='cuda:0')
episode: 422 training return: tensor(-291.4292, device='cuda:0')
episode: 423 training return: tensor(-393.0913, device='cuda:0')
epoch: 106 test_true_pfm: 1923.3357486662346 sim_pfm: -271.35719883069396
episode: 424 training return: tensor(-381.4270, device='cuda:0')
episode: 425 training return: tensor(-183.1568, device='cuda:0')
episode: 426 training return: tensor(215.2488, device='cuda:0')
episode: 427 training return: tensor(-298.8286, device='cuda:0')
epoch: 107 test_true_pfm: 1910.2313963475935 sim_pfm: -309.079875726117
episode: 428 training return: tensor(-116.5746, device='cuda:0')
episode: 429 training return: tensor(-217.0196, device='cuda:0')
episode: 430 training return: tensor(-295.6659, device='cuda:0')
episode: 431 training return: tensor(-3.4246, device='cuda:0')
epoch: 108 test_true_pfm: 1949.3791876402208 sim_pfm: -298.7398902565862
episode: 432 training return: tensor(-217.0851, device='cuda:0')
episode: 433 training return: tensor(-24.0929, device='cuda:0')
episode: 434 training return: tensor(185.8775, device='cuda:0')
episode: 435 training return: tensor(-229.1484, device='cuda:0')
epoch: 109 test_true_pfm: 1588.8938509884576 sim_pfm: -316.12153256797075
episode: 436 training return: tensor(-319.7062, device='cuda:0')
episode: 437 training return: tensor(-382.7646, device='cuda:0')
episode: 438 training return: tensor(-243.5646, device='cuda:0')
episode: 439 training return: tensor(-61.0156, device='cuda:0')
epoch: 110 test_true_pfm: 1987.5880974241506 sim_pfm: -345.9650151198148
episode: 440 training return: tensor(-262.8492, device='cuda:0')
episode: 441 training return: tensor(-289.2130, device='cuda:0')
episode: 442 training return: tensor(-390.3433, device='cuda:0')
episode: 443 training return: tensor(-312.2003, device='cuda:0')
epoch: 111 test_true_pfm: 1951.3569195258387 sim_pfm: -299.16279553082614
episode: 444 training return: tensor(-242.4967, device='cuda:0')
episode: 445 training return: tensor(-11.7395, device='cuda:0')
episode: 446 training return: tensor(2.8447, device='cuda:0')
episode: 447 training return: tensor(-280.7500, device='cuda:0')
epoch: 112 test_true_pfm: 1665.402708566916 sim_pfm: -115.60345116783476
episode: 448 training return: tensor(-177.9604, device='cuda:0')
episode: 449 training return: tensor(-401.5482, device='cuda:0')
episode: 450 training return: tensor(-62.2943, device='cuda:0')
episode: 451 training return: tensor(-290.9589, device='cuda:0')
epoch: 113 test_true_pfm: 1600.9362243154471 sim_pfm: -299.2278165986548
episode: 452 training return: tensor(-258.9079, device='cuda:0')
episode: 453 training return: tensor(-238.9261, device='cuda:0')
episode: 454 training return: tensor(79.6269, device='cuda:0')
episode: 455 training return: tensor(-313.9766, device='cuda:0')
epoch: 114 test_true_pfm: 1863.2056195707871 sim_pfm: -257.0386263859885
episode: 456 training return: tensor(-376.3978, device='cuda:0')
episode: 457 training return: tensor(-197.5214, device='cuda:0')
episode: 458 training return: tensor(-294.8078, device='cuda:0')
episode: 459 training return: tensor(-47.8582, device='cuda:0')
epoch: 115 test_true_pfm: 1577.1163740882178 sim_pfm: -274.7510362606651
episode: 460 training return: tensor(-384.6157, device='cuda:0')
episode: 461 training return: tensor(-292.0367, device='cuda:0')
episode: 462 training return: tensor(-306.5197, device='cuda:0')
episode: 463 training return: tensor(-140.8000, device='cuda:0')
epoch: 116 test_true_pfm: 1627.8089142407505 sim_pfm: -294.55366831464926
episode: 464 training return: tensor(-294.0107, device='cuda:0')
episode: 465 training return: tensor(-277.7313, device='cuda:0')
episode: 466 training return: tensor(-266.9101, device='cuda:0')
episode: 467 training return: tensor(-133.7567, device='cuda:0')
epoch: 117 test_true_pfm: 1404.7979693474488 sim_pfm: -408.51760874991305
episode: 468 training return: tensor(-322.7588, device='cuda:0')
episode: 469 training return: tensor(-320.6755, device='cuda:0')
episode: 470 training return: tensor(-190.6870, device='cuda:0')
episode: 471 training return: tensor(-318.1104, device='cuda:0')
epoch: 118 test_true_pfm: 1641.832691156663 sim_pfm: -263.4694556044803
episode: 472 training return: tensor(-366.0833, device='cuda:0')
episode: 473 training return: tensor(-251.8736, device='cuda:0')
episode: 474 training return: tensor(-282.7031, device='cuda:0')
episode: 475 training return: tensor(-264.6157, device='cuda:0')
epoch: 119 test_true_pfm: 1662.6593069886094 sim_pfm: -301.1118691663917
episode: 476 training return: tensor(-429.1139, device='cuda:0')
episode: 477 training return: tensor(-327.5344, device='cuda:0')
episode: 478 training return: tensor(-342.9841, device='cuda:0')
episode: 479 training return: tensor(275.1933, device='cuda:0')
epoch: 120 test_true_pfm: 1910.0395710135954 sim_pfm: -189.20329011049276
episode: 480 training return: tensor(-145.1186, device='cuda:0')
episode: 481 training return: tensor(-231.2995, device='cuda:0')
episode: 482 training return: tensor(-230.7982, device='cuda:0')
episode: 483 training return: tensor(-328.8898, device='cuda:0')
epoch: 121 test_true_pfm: 1728.593790691083 sim_pfm: -272.66609582013916
episode: 484 training return: tensor(-336.4023, device='cuda:0')
episode: 485 training return: tensor(-335.7962, device='cuda:0')
episode: 486 training return: tensor(-130.8554, device='cuda:0')
episode: 487 training return: tensor(-85.4663, device='cuda:0')
epoch: 122 test_true_pfm: 1599.0279712330566 sim_pfm: -301.1783794471606
episode: 488 training return: tensor(-285.1754, device='cuda:0')
episode: 489 training return: tensor(-289.8065, device='cuda:0')
episode: 490 training return: tensor(-284.5311, device='cuda:0')
episode: 491 training return: tensor(-417.3848, device='cuda:0')
epoch: 123 test_true_pfm: 1718.9642927349644 sim_pfm: -306.60160945247236
episode: 492 training return: tensor(-67.5499, device='cuda:0')
episode: 493 training return: tensor(-305.7888, device='cuda:0')
episode: 494 training return: tensor(-207.5671, device='cuda:0')
episode: 495 training return: tensor(-299.6998, device='cuda:0')
epoch: 124 test_true_pfm: 1669.1546226491682 sim_pfm: -288.2127585007499
episode: 496 training return: tensor(-282.6213, device='cuda:0')
episode: 497 training return: tensor(-306.3708, device='cuda:0')
episode: 498 training return: tensor(105.9556, device='cuda:0')
episode: 499 training return: tensor(-119.0922, device='cuda:0')
epoch: 125 test_true_pfm: 1526.0029255090128 sim_pfm: -115.60785874142312
episode: 500 training return: tensor(-221.0666, device='cuda:0')
episode: 501 training return: tensor(-35.1008, device='cuda:0')
episode: 502 training return: tensor(-339.8432, device='cuda:0')
episode: 503 training return: tensor(-80.8702, device='cuda:0')
epoch: 126 test_true_pfm: 1721.949996698615 sim_pfm: -309.19855278509203
episode: 504 training return: tensor(-360.2122, device='cuda:0')
episode: 505 training return: tensor(-405.3694, device='cuda:0')
episode: 506 training return: tensor(-255.2750, device='cuda:0')
episode: 507 training return: tensor(-334.9236, device='cuda:0')
epoch: 127 test_true_pfm: 1602.5306741968059 sim_pfm: -342.6641414273278
episode: 508 training return: tensor(-298.6234, device='cuda:0')
episode: 509 training return: tensor(-296.4306, device='cuda:0')
episode: 510 training return: tensor(-307.6597, device='cuda:0')
episode: 511 training return: tensor(-403.6817, device='cuda:0')
epoch: 128 test_true_pfm: 1519.0041930197785 sim_pfm: -334.0486659073892
episode: 512 training return: tensor(-432.5076, device='cuda:0')
episode: 513 training return: tensor(-52.3394, device='cuda:0')
episode: 514 training return: tensor(-220.2749, device='cuda:0')
episode: 515 training return: tensor(-130.0480, device='cuda:0')
epoch: 129 test_true_pfm: 1766.735749404471 sim_pfm: -250.16584988010194
episode: 516 training return: tensor(-394.7698, device='cuda:0')
episode: 517 training return: tensor(-407.0235, device='cuda:0')
episode: 518 training return: tensor(-426.6275, device='cuda:0')
episode: 519 training return: tensor(-376.8972, device='cuda:0')
epoch: 130 test_true_pfm: 1598.2787027919974 sim_pfm: -233.65298227317786
episode: 520 training return: tensor(-325.6957, device='cuda:0')
episode: 521 training return: tensor(-262.2497, device='cuda:0')
episode: 522 training return: tensor(-389.3133, device='cuda:0')
episode: 523 training return: tensor(-178.7430, device='cuda:0')
epoch: 131 test_true_pfm: 1700.568898291587 sim_pfm: -302.3144792566891
episode: 524 training return: tensor(-344.1813, device='cuda:0')
episode: 525 training return: tensor(-335.4871, device='cuda:0')
episode: 526 training return: tensor(-218.1616, device='cuda:0')
episode: 527 training return: tensor(-263.6371, device='cuda:0')
epoch: 132 test_true_pfm: 1514.5208598635788 sim_pfm: -359.7875641256105
episode: 528 training return: tensor(-225.7934, device='cuda:0')
episode: 529 training return: tensor(-227.3929, device='cuda:0')
episode: 530 training return: tensor(-337.1816, device='cuda:0')
episode: 531 training return: tensor(-208.7345, device='cuda:0')
epoch: 133 test_true_pfm: 1537.770639252334 sim_pfm: -367.1567509549204
episode: 532 training return: tensor(-265.4994, device='cuda:0')
episode: 533 training return: tensor(-362.7501, device='cuda:0')
episode: 534 training return: tensor(-203.6180, device='cuda:0')
episode: 535 training return: tensor(-319.3090, device='cuda:0')
epoch: 134 test_true_pfm: 2030.1966091801787 sim_pfm: -345.4239606873016
episode: 536 training return: tensor(-399.3131, device='cuda:0')
episode: 537 training return: tensor(-423.3250, device='cuda:0')
episode: 538 training return: tensor(-338.1801, device='cuda:0')
episode: 539 training return: tensor(-311.7057, device='cuda:0')
epoch: 135 test_true_pfm: 1689.9073152755616 sim_pfm: -193.19727308450578
episode: 540 training return: tensor(-169.1242, device='cuda:0')
episode: 541 training return: tensor(-435.5828, device='cuda:0')
episode: 542 training return: tensor(-214.3120, device='cuda:0')
episode: 543 training return: tensor(-255.4221, device='cuda:0')
epoch: 136 test_true_pfm: 1448.2686919065611 sim_pfm: -298.0508242643361
episode: 544 training return: tensor(-201.7290, device='cuda:0')
episode: 545 training return: tensor(-300.7453, device='cuda:0')
episode: 546 training return: tensor(48.3624, device='cuda:0')
episode: 547 training return: tensor(-300.9812, device='cuda:0')
epoch: 137 test_true_pfm: 1870.9020521038285 sim_pfm: -319.9462817844469
episode: 548 training return: tensor(-394.2903, device='cuda:0')
episode: 549 training return: tensor(-380.0524, device='cuda:0')
episode: 550 training return: tensor(-213.7271, device='cuda:0')
episode: 551 training return: tensor(-399.8113, device='cuda:0')
epoch: 138 test_true_pfm: 1548.9995848978017 sim_pfm: -203.224196649855
episode: 552 training return: tensor(-222.6951, device='cuda:0')
episode: 553 training return: tensor(-252.9872, device='cuda:0')
episode: 554 training return: tensor(-106.2576, device='cuda:0')
episode: 555 training return: tensor(-446.3194, device='cuda:0')
epoch: 139 test_true_pfm: 1609.3141806410697 sim_pfm: -302.2706980602816
episode: 556 training return: tensor(-294.5427, device='cuda:0')
episode: 557 training return: tensor(-338.1676, device='cuda:0')
episode: 558 training return: tensor(59.6526, device='cuda:0')
episode: 559 training return: tensor(-283.8024, device='cuda:0')
epoch: 140 test_true_pfm: 1746.2506224499568 sim_pfm: -127.61665030273919
episode: 560 training return: tensor(-358.0249, device='cuda:0')
episode: 561 training return: tensor(-126.5618, device='cuda:0')
episode: 562 training return: tensor(-272.6128, device='cuda:0')
episode: 563 training return: tensor(-348.6602, device='cuda:0')
epoch: 141 test_true_pfm: 1778.6763103544254 sim_pfm: -287.24070287598687
episode: 564 training return: tensor(-218.5361, device='cuda:0')
episode: 565 training return: tensor(-215.9594, device='cuda:0')
episode: 566 training return: tensor(-242.3763, device='cuda:0')
episode: 567 training return: tensor(-397.0635, device='cuda:0')
epoch: 142 test_true_pfm: 1774.2518921164494 sim_pfm: -241.18479133944493
episode: 568 training return: tensor(-231.1691, device='cuda:0')
episode: 569 training return: tensor(-345.7502, device='cuda:0')
episode: 570 training return: tensor(-194.6905, device='cuda:0')
episode: 571 training return: tensor(-252.8084, device='cuda:0')
epoch: 143 test_true_pfm: 1648.375792650201 sim_pfm: -283.9978813006698
episode: 572 training return: tensor(-394.2977, device='cuda:0')
episode: 573 training return: tensor(-320.6505, device='cuda:0')
episode: 574 training return: tensor(-339.2121, device='cuda:0')
episode: 575 training return: tensor(-376.9283, device='cuda:0')
epoch: 144 test_true_pfm: 1369.6557958798785 sim_pfm: -382.7596899862401
episode: 576 training return: tensor(-422.3172, device='cuda:0')
episode: 577 training return: tensor(-171.0506, device='cuda:0')
episode: 578 training return: tensor(-342.6454, device='cuda:0')
episode: 579 training return: tensor(-304.6214, device='cuda:0')
epoch: 145 test_true_pfm: 1725.1148167282718 sim_pfm: -153.77559031920586
episode: 580 training return: tensor(-302.3282, device='cuda:0')
episode: 581 training return: tensor(107.5945, device='cuda:0')
episode: 582 training return: tensor(-280.2926, device='cuda:0')
episode: 583 training return: tensor(-67.5126, device='cuda:0')
epoch: 146 test_true_pfm: 3008.4240069160714 sim_pfm: 203.77299649086004
episode: 584 training return: tensor(-372.9939, device='cuda:0')
episode: 585 training return: tensor(-354.7395, device='cuda:0')
episode: 586 training return: tensor(-179.1179, device='cuda:0')
episode: 587 training return: tensor(-332.1761, device='cuda:0')
epoch: 147 test_true_pfm: 1725.6743157128178 sim_pfm: -258.7507884031899
episode: 588 training return: tensor(-305.7973, device='cuda:0')
episode: 589 training return: tensor(-281.9318, device='cuda:0')
episode: 590 training return: tensor(-229.7399, device='cuda:0')
episode: 591 training return: tensor(168.7659, device='cuda:0')
epoch: 148 test_true_pfm: 1911.0146866990344 sim_pfm: -135.65542859694688
episode: 592 training return: tensor(-308.4821, device='cuda:0')
episode: 593 training return: tensor(-295.5609, device='cuda:0')
episode: 594 training return: tensor(-337.5320, device='cuda:0')
episode: 595 training return: tensor(-275.1866, device='cuda:0')
epoch: 149 test_true_pfm: 1691.109609803104 sim_pfm: -313.31872672527487
episode: 596 training return: tensor(-387.6111, device='cuda:0')
episode: 597 training return: tensor(-270.2504, device='cuda:0')
episode: 598 training return: tensor(-276.2831, device='cuda:0')
episode: 599 training return: tensor(-398.7170, device='cuda:0')
epoch: 150 test_true_pfm: 1495.3656836922974 sim_pfm: -333.97562199807726
