['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '3']
epoch: 0 training_loss 0.2604070802032947 test_loss: 0.20256106853485106
epoch: 1 training_loss 0.20976195767521857 test_loss: 0.21375563144683837
epoch: 2 training_loss 0.1962805211544037 test_loss: 0.19751250743865967
epoch: 3 training_loss 0.20191917508840562 test_loss: 0.2073657274246216
epoch: 4 training_loss 0.18144822493195534 test_loss: 0.1702317476272583
epoch: 5 training_loss 0.19417253769934179 test_loss: 0.19153783321380616
epoch: 6 training_loss 0.1999525236338377 test_loss: 0.18435274362564086
epoch: 7 training_loss 0.1987706012278795 test_loss: 0.1930209517478943
epoch: 8 training_loss 0.18942459538578987 test_loss: 0.185731840133667
epoch: 9 training_loss 0.19451950155198575 test_loss: 0.1652733325958252
epoch: 10 training_loss 0.19035071291029454 test_loss: 0.18562827110290528
epoch: 11 training_loss 0.18665744192898273 test_loss: 0.17143757343292237
epoch: 12 training_loss 0.18527883730828762 test_loss: 0.17654027938842773
epoch: 13 training_loss 0.18568987935781478 test_loss: 0.1843896269798279
epoch: 14 training_loss 0.18785570740699767 test_loss: 0.1761934518814087
epoch: 15 training_loss 0.18158042304217814 test_loss: 0.1841785192489624
epoch: 16 training_loss 0.18318231225013734 test_loss: 0.1896886944770813
epoch: 17 training_loss 0.1860720045119524 test_loss: 0.1906351327896118
epoch: 18 training_loss 0.1857835789769888 test_loss: 0.16884803771972656
epoch: 19 training_loss 0.18634419806301594 test_loss: 0.19047436714172364
epoch: 20 training_loss 0.18418848522007467 test_loss: 0.1806779146194458
epoch: 21 training_loss 0.184634872302413 test_loss: 0.1825162649154663
epoch: 22 training_loss 0.18304135292768478 test_loss: 0.1902190089225769
epoch: 23 training_loss 0.18618670642375945 test_loss: 0.18670203685760497
epoch: 24 training_loss 0.17943809650838374 test_loss: 0.18709182739257812
epoch: 25 training_loss 0.18294710487127305 test_loss: 0.17585750818252563
epoch: 26 training_loss 0.19239710934460164 test_loss: 0.17724258899688722
epoch: 27 training_loss 0.18777600310742856 test_loss: 0.1847884774208069
epoch: 28 training_loss 0.18373860612511636 test_loss: 0.17040146589279176
epoch: 29 training_loss 0.17370095185935497 test_loss: 0.1891258955001831
epoch: 30 training_loss 0.17713699579238892 test_loss: 0.17761849164962767
epoch: 31 training_loss 0.17804933041334153 test_loss: 0.16427279710769654
epoch: 32 training_loss 0.18581110566854478 test_loss: 0.14189603328704833
epoch: 33 training_loss 0.19009396225214004 test_loss: 0.1801559805870056
epoch: 34 training_loss 0.17816046476364136 test_loss: 0.16775506734848022
epoch: 35 training_loss 0.18475056409835816 test_loss: 0.18984419107437134
epoch: 36 training_loss 0.18520724959671497 test_loss: 0.1774176001548767
epoch: 37 training_loss 0.1779745937883854 test_loss: 0.18726221323013306
epoch: 38 training_loss 0.18186103798449038 test_loss: 0.17674562931060792
epoch: 39 training_loss 0.18002132311463356 test_loss: 0.20628297328948975
epoch: 40 training_loss 0.17947377353906632 test_loss: 0.17009594440460205
epoch: 41 training_loss 0.18573965057730674 test_loss: 0.18531787395477295
epoch: 42 training_loss 0.17694223716855048 test_loss: 0.17714643478393555
epoch: 43 training_loss 0.18290637470781804 test_loss: 0.19033325910568238
epoch: 44 training_loss 0.18522919565439225 test_loss: 0.18069958686828613
epoch: 45 training_loss 0.1786342776566744 test_loss: 0.17538996934890747
epoch: 46 training_loss 0.18565525725483895 test_loss: 0.18658431768417358
epoch: 47 training_loss 0.17650304719805718 test_loss: 0.18526453971862794
epoch: 48 training_loss 0.17922449491918088 test_loss: 0.17619189023971557
epoch: 49 training_loss 0.17832793466746807 test_loss: 0.17405177354812623
epoch: 50 training_loss 0.18124007254838945 test_loss: 0.1628397822380066
epoch: 51 training_loss 0.18184360884130002 test_loss: 0.17355839014053345
epoch: 52 training_loss 0.1808874562382698 test_loss: 0.17546087503433228
epoch: 53 training_loss 0.1810901776701212 test_loss: 0.19663629531860352
epoch: 54 training_loss 0.18350799225270747 test_loss: 0.17725566625595093
epoch: 55 training_loss 0.17646964244544505 test_loss: 0.17232153415679932
epoch: 56 training_loss 0.17911364443600178 test_loss: 0.17425522804260254
epoch: 57 training_loss 0.18180188417434692 test_loss: 0.17648030519485475
epoch: 58 training_loss 0.1787769088894129 test_loss: 0.17294578552246093
epoch: 59 training_loss 0.17370716955512763 test_loss: 0.1538249135017395
epoch: 60 training_loss 0.18948422215878963 test_loss: 0.1906906008720398
epoch: 61 training_loss 0.18216597370803356 test_loss: 0.18557460308074952
epoch: 62 training_loss 0.1788188573718071 test_loss: 0.18428782224655152
epoch: 63 training_loss 0.18484319560229778 test_loss: 0.18996381759643555
epoch: 64 training_loss 0.1767576280236244 test_loss: 0.18629218339920045
epoch: 65 training_loss 0.18369937427341937 test_loss: 0.19298954010009767
epoch: 66 training_loss 0.1826943852007389 test_loss: 0.19214500188827516
epoch: 67 training_loss 0.17959393493831158 test_loss: 0.1715625762939453
epoch: 68 training_loss 0.17522419586777688 test_loss: 0.18275370597839355
epoch: 69 training_loss 0.18266925558447838 test_loss: 0.1890238642692566
epoch: 70 training_loss 0.17804598391056062 test_loss: 0.18277348279953004
epoch: 71 training_loss 0.17873147383332252 test_loss: 0.19175329208374023
epoch: 72 training_loss 0.18375084079802037 test_loss: 0.18338896036148072
epoch: 73 training_loss 0.183583477512002 test_loss: 0.1845311403274536
epoch: 74 training_loss 0.17575568310916423 test_loss: 0.1742016553878784
epoch: 75 training_loss 0.17992356948554517 test_loss: 0.17563982009887696
epoch: 76 training_loss 0.17923498041927816 test_loss: 0.17706288099288942
epoch: 77 training_loss 0.178443803191185 test_loss: 0.1764958143234253
epoch: 78 training_loss 0.19012882068753242 test_loss: 0.1892177104949951
epoch: 79 training_loss 0.18327294945716857 test_loss: 0.1790834426879883
epoch: 80 training_loss 0.1831515295058489 test_loss: 0.17343000173568726
epoch: 81 training_loss 0.1817520895600319 test_loss: 0.17217987775802612
epoch: 82 training_loss 0.18106119439005852 test_loss: 0.1724933862686157
epoch: 83 training_loss 0.180146182179451 test_loss: 0.1808064103126526
epoch: 84 training_loss 0.18539018154144288 test_loss: 0.18577518463134765
epoch: 85 training_loss 0.17832700312137603 test_loss: 0.1784569501876831
epoch: 86 training_loss 0.17832609444856642 test_loss: 0.19610167741775514
epoch: 87 training_loss 0.17692285507917405 test_loss: 0.1749552845954895
epoch: 88 training_loss 0.173319421634078 test_loss: 0.14688123464584352
epoch: 89 training_loss 0.17940883949398995 test_loss: 0.167100727558136
epoch: 90 training_loss 0.18634330324828624 test_loss: 0.1817389965057373
epoch: 91 training_loss 0.17682111732661723 test_loss: 0.16675504446029663
epoch: 92 training_loss 0.1760142780840397 test_loss: 0.1859049081802368
epoch: 93 training_loss 0.176526111215353 test_loss: 0.18511751890182496
epoch: 94 training_loss 0.1805880396068096 test_loss: 0.17932218313217163
epoch: 95 training_loss 0.17723853968083858 test_loss: 0.17816561460494995
epoch: 96 training_loss 0.17713786564767361 test_loss: 0.18195044994354248
epoch: 97 training_loss 0.17118670992553234 test_loss: 0.18353904485702516
epoch: 98 training_loss 0.17954797692596913 test_loss: 0.17356503009796143
epoch: 99 training_loss 0.17569603942334652 test_loss: 0.1677319049835205
epoch: 100 training_loss 0.1793766400963068 test_loss: 0.18739331960678102
epoch: 101 training_loss 0.18124610804021357 test_loss: 0.1943470597267151
epoch: 102 training_loss 0.17541533075273036 test_loss: 0.18815349340438842
epoch: 103 training_loss 0.1745771971344948 test_loss: 0.19151591062545775
epoch: 104 training_loss 0.17283413842320441 test_loss: 0.18285154104232787
epoch: 105 training_loss 0.180121603384614 test_loss: 0.16759264469146729
epoch: 106 training_loss 0.1714299139380455 test_loss: 0.1765123724937439
epoch: 107 training_loss 0.1796286753937602 test_loss: 0.18447195291519164
epoch: 108 training_loss 0.17918238386511803 test_loss: 0.18326336145401
epoch: 109 training_loss 0.181974730566144 test_loss: 0.19033018350601197
epoch: 110 training_loss 0.18195719499140978 test_loss: 0.1790242075920105
epoch: 111 training_loss 0.1738666959106922 test_loss: 0.17290338277816772
epoch: 112 training_loss 0.17700931176543236 test_loss: 0.1891070246696472
epoch: 113 training_loss 0.1816036570072174 test_loss: 0.1840360641479492
epoch: 114 training_loss 0.18349370516836644 test_loss: 0.18296260833740235
epoch: 115 training_loss 0.17964979372918605 test_loss: 0.16665232181549072
epoch: 116 training_loss 0.17502995938062668 test_loss: 0.18124284744262695
epoch: 117 training_loss 0.18210433766245843 test_loss: 0.17309292554855346
epoch: 118 training_loss 0.17233394980430602 test_loss: 0.1665268659591675
epoch: 119 training_loss 0.18389185309410094 test_loss: 0.1866026520729065
epoch: 120 training_loss 0.17861199691891672 test_loss: 0.17524163722991942
epoch: 121 training_loss 0.18132127590477468 test_loss: 0.1774966835975647
epoch: 122 training_loss 0.17667335249483584 test_loss: 0.17806625366210938
epoch: 123 training_loss 0.18204427793622016 test_loss: 0.1812973737716675
epoch: 124 training_loss 0.1834171176329255 test_loss: 0.18253544569015503
epoch: 125 training_loss 0.18241505369544028 test_loss: 0.1680211305618286
epoch: 126 training_loss 0.17810101978480816 test_loss: 0.1827385663986206
epoch: 127 training_loss 0.17369192637503147 test_loss: 0.18206881284713744
epoch: 128 training_loss 0.18076181411743164 test_loss: 0.15942604541778566
epoch: 129 training_loss 0.17049740761518478 test_loss: 0.170106840133667
epoch: 130 training_loss 0.18458960957825185 test_loss: 0.1966509222984314
epoch: 131 training_loss 0.17351936779916285 test_loss: 0.1849688172340393
epoch: 132 training_loss 0.17434880919754506 test_loss: 0.18594714403152465
epoch: 133 training_loss 0.17379873409867286 test_loss: 0.19733827114105223
epoch: 134 training_loss 0.16915529258549214 test_loss: 0.17677538394927977
epoch: 135 training_loss 0.17297609739005565 test_loss: 0.16714398860931395
epoch: 136 training_loss 0.16723682552576066 test_loss: 0.19127869606018066
epoch: 137 training_loss 0.17530942663550378 test_loss: 0.18714554309844972
epoch: 138 training_loss 0.18006898187100887 test_loss: 0.17548599243164062
epoch: 139 training_loss 0.18349141336977481 test_loss: 0.1849300503730774
epoch: 140 training_loss 0.17869332328438758 test_loss: 0.1999226689338684
epoch: 141 training_loss 0.1736307117342949 test_loss: 0.1700724720954895
epoch: 142 training_loss 0.1745086481422186 test_loss: 0.1802685022354126
epoch: 143 training_loss 0.18190635219216347 test_loss: 0.1852232336997986
epoch: 144 training_loss 0.17115837819874286 test_loss: 0.1725056767463684
epoch: 145 training_loss 0.17830387234687806 test_loss: 0.17889106273651123
epoch: 146 training_loss 0.1793117706477642 test_loss: 0.1829846143722534
epoch: 147 training_loss 0.17432568557560443 test_loss: 0.17453690767288207
epoch: 148 training_loss 0.18703970164060593 test_loss: 0.18468129634857178
epoch: 149 training_loss 0.17586818136274815 test_loss: 0.1827736973762512
epoch: 0 training_loss 9.052299251556397 test_loss: 5.579217529296875
epoch: 1 training_loss 4.326279366016388 test_loss: 3.4180221557617188
epoch: 2 training_loss 3.030923342704773 test_loss: 2.6113842010498045
epoch: 3 training_loss 2.415013666152954 test_loss: 2.1766597747802736
epoch: 4 training_loss 2.051452285051346 test_loss: 1.8612335205078125
epoch: 5 training_loss 1.7518116545677185 test_loss: 1.6438819885253906
epoch: 6 training_loss 1.5759196174144745 test_loss: 1.4687028884887696
epoch: 7 training_loss 1.4544537198543548 test_loss: 1.4043522834777833
epoch: 8 training_loss 1.3463174641132354 test_loss: 1.2921154975891114
epoch: 9 training_loss 1.266000646352768 test_loss: 1.2570637702941894
epoch: 10 training_loss 1.2019341397285461 test_loss: 1.163963222503662
epoch: 11 training_loss 1.1424225741624832 test_loss: 1.1030437469482421
epoch: 12 training_loss 1.096002191901207 test_loss: 1.099335289001465
epoch: 13 training_loss 1.0678157359361649 test_loss: 1.0431097030639649
epoch: 14 training_loss 1.0432740956544877 test_loss: 1.050257968902588
epoch: 15 training_loss 1.0089361137151718 test_loss: 0.9845026969909668
epoch: 16 training_loss 0.9737330257892609 test_loss: 0.967099380493164
epoch: 17 training_loss 0.9508434045314789 test_loss: 0.9030679702758789
epoch: 18 training_loss 0.9213449329137802 test_loss: 0.91427583694458
epoch: 19 training_loss 0.9197694432735443 test_loss: 0.8935462951660156
epoch: 20 training_loss 0.882271671295166 test_loss: 0.8526603698730468
epoch: 21 training_loss 0.881480360031128 test_loss: 0.8494805335998535
epoch: 22 training_loss 0.8670412665605545 test_loss: 0.8391169548034668
epoch: 23 training_loss 0.8436263537406922 test_loss: 0.8204213142395019
epoch: 24 training_loss 0.8369492405653 test_loss: 0.8292737007141113
epoch: 25 training_loss 0.8219870859384537 test_loss: 0.8114042282104492
epoch: 26 training_loss 0.8195271027088166 test_loss: 0.8377233505249023
epoch: 27 training_loss 0.8002372324466706 test_loss: 0.7811513423919678
epoch: 28 training_loss 0.7828186535835266 test_loss: 0.7624226570129394
epoch: 29 training_loss 0.7731652283668518 test_loss: 0.7620957374572754
epoch: 30 training_loss 0.7640425783395767 test_loss: 0.7905906200408935
epoch: 31 training_loss 0.7602545040845871 test_loss: 0.762214994430542
epoch: 32 training_loss 0.7584474223852158 test_loss: 0.7327552318572998
epoch: 33 training_loss 0.7356519675254822 test_loss: 0.7232267379760742
epoch: 34 training_loss 0.7412298822402954 test_loss: 0.7239180088043213
epoch: 35 training_loss 0.7250563645362854 test_loss: 0.7581166744232177
epoch: 36 training_loss 0.7170559591054917 test_loss: 0.7316619396209717
epoch: 37 training_loss 0.7185968661308288 test_loss: 0.7321435928344726
epoch: 38 training_loss 0.713651859164238 test_loss: 0.699713134765625
epoch: 39 training_loss 0.6921061372756958 test_loss: 0.6947021961212159
epoch: 40 training_loss 0.7183788061141968 test_loss: 0.7078881740570069
epoch: 41 training_loss 0.6845669996738434 test_loss: 0.6771793842315674
epoch: 42 training_loss 0.6852535378932952 test_loss: 0.6948304653167725
epoch: 43 training_loss 0.6880151557922364 test_loss: 0.6657886981964112
epoch: 44 training_loss 0.6821582055091858 test_loss: 0.6801546573638916
epoch: 45 training_loss 0.678535082936287 test_loss: 0.7293810844421387
epoch: 46 training_loss 0.6796104943752289 test_loss: 0.6571245670318604
epoch: 47 training_loss 0.6785098677873611 test_loss: 0.6629647731781005
epoch: 48 training_loss 0.6688739687204361 test_loss: 0.636122465133667
epoch: 49 training_loss 0.66222875893116 test_loss: 0.6580214023590087
epoch: 50 training_loss 0.6543334704637528 test_loss: 0.6528172969818116
epoch: 51 training_loss 0.6580403453111648 test_loss: 0.6267760276794434
epoch: 52 training_loss 0.6529783648252487 test_loss: 0.6259479999542237
epoch: 53 training_loss 0.6449667412042618 test_loss: 0.6449158191680908
epoch: 54 training_loss 0.6406893986463547 test_loss: 0.641309118270874
epoch: 55 training_loss 0.6463919365406037 test_loss: 0.634656572341919
epoch: 56 training_loss 0.6346741998195649 test_loss: 0.6450853824615479
epoch: 57 training_loss 0.6404061949253083 test_loss: 0.6629672527313233
epoch: 58 training_loss 0.6384429144859314 test_loss: 0.6259626388549805
epoch: 59 training_loss 0.6222214019298553 test_loss: 0.6275898456573487
epoch: 60 training_loss 0.6213481420278549 test_loss: 0.6274785995483398
epoch: 61 training_loss 0.628485637307167 test_loss: 0.6171289443969726
epoch: 62 training_loss 0.6150802212953568 test_loss: 0.5941890239715576
epoch: 63 training_loss 0.6213930153846741 test_loss: 0.5934641838073731
epoch: 64 training_loss 0.6104091596603394 test_loss: 0.6305132865905761
epoch: 65 training_loss 0.6174593287706375 test_loss: 0.6031916618347168
epoch: 66 training_loss 0.6104307126998901 test_loss: 0.5889339447021484
epoch: 67 training_loss 0.5942254793643952 test_loss: 0.5843788146972656
epoch: 68 training_loss 0.5962125313282013 test_loss: 0.614498233795166
epoch: 69 training_loss 0.5977885717153549 test_loss: 0.5809741020202637
epoch: 70 training_loss 0.5897032886743545 test_loss: 0.5977338790893555
epoch: 71 training_loss 0.5930945575237274 test_loss: 0.5864119052886962
epoch: 72 training_loss 0.5944059789180756 test_loss: 0.6026824474334717
epoch: 73 training_loss 0.589275985956192 test_loss: 0.5755277156829834
epoch: 74 training_loss 0.5837338626384735 test_loss: 0.5866235256195068
epoch: 75 training_loss 0.5849436902999878 test_loss: 0.5960734844207763
epoch: 76 training_loss 0.5824680185317993 test_loss: 0.566460371017456
epoch: 77 training_loss 0.5747065490484238 test_loss: 0.5903480529785157
epoch: 78 training_loss 0.5775982481241226 test_loss: 0.5831123352050781
epoch: 79 training_loss 0.5836901849508286 test_loss: 0.576273775100708
epoch: 80 training_loss 0.5656239157915115 test_loss: 0.599143648147583
epoch: 81 training_loss 0.57899582862854 test_loss: 0.5731254577636719
epoch: 82 training_loss 0.5744632330536842 test_loss: 0.5985094547271729
epoch: 83 training_loss 0.5736268639564515 test_loss: 0.5771146297454834
epoch: 84 training_loss 0.5648538613319397 test_loss: 0.5589186668395996
epoch: 85 training_loss 0.561972186267376 test_loss: 0.5558457374572754
epoch: 86 training_loss 0.5646878582239151 test_loss: 0.555881118774414
epoch: 87 training_loss 0.573153050839901 test_loss: 0.55060715675354
epoch: 88 training_loss 0.5698543122410774 test_loss: 0.5684499740600586
epoch: 89 training_loss 0.5566838473081589 test_loss: 0.5474194526672364
epoch: 90 training_loss 0.55437135130167 test_loss: 0.5737545490264893
epoch: 91 training_loss 0.5579451718926429 test_loss: 0.5623622417449952
epoch: 92 training_loss 0.551361888051033 test_loss: 0.5742050170898437
epoch: 93 training_loss 0.5570264986157417 test_loss: 0.5871297359466553
epoch: 94 training_loss 0.5592526957392693 test_loss: 0.5604434013366699
epoch: 95 training_loss 0.5483071583509446 test_loss: 0.5474402427673339
epoch: 96 training_loss 0.5524811506271362 test_loss: 0.560956335067749
epoch: 97 training_loss 0.5671686580777169 test_loss: 0.5521751403808594
epoch: 98 training_loss 0.5571811389923096 test_loss: 0.5518661499023437
epoch: 99 training_loss 0.5370617461204529 test_loss: 0.5347284793853759
epoch: 100 training_loss 0.5360320219397545 test_loss: 0.5660546302795411
epoch: 101 training_loss 0.5381215560436249 test_loss: 0.5570030689239502
epoch: 102 training_loss 0.5326589500904083 test_loss: 0.5285509586334228
epoch: 103 training_loss 0.5498411500453949 test_loss: 0.5277987957000733
epoch: 104 training_loss 0.5502703428268433 test_loss: 0.5591644763946533
epoch: 105 training_loss 0.5390813872218132 test_loss: 0.5374088764190674
epoch: 106 training_loss 0.5303098574280739 test_loss: 0.5417671203613281
epoch: 107 training_loss 0.5589500617980957 test_loss: 0.5890779018402099
epoch: 108 training_loss 0.5387225061655044 test_loss: 0.5369759559631347
epoch: 109 training_loss 0.5329366365075111 test_loss: 0.5347133636474609
epoch: 110 training_loss 0.5434016880393028 test_loss: 0.5557603359222412
epoch: 111 training_loss 0.5253731003403663 test_loss: 0.5205615043640137
epoch: 112 training_loss 0.5242563539743423 test_loss: 0.51981520652771
epoch: 113 training_loss 0.5272455874085427 test_loss: 0.5279018402099609
epoch: 114 training_loss 0.535909064412117 test_loss: 0.5244412422180176
epoch: 115 training_loss 0.5317668434977532 test_loss: 0.5256710529327393
epoch: 116 training_loss 0.5250902023911476 test_loss: 0.5132538795471191
epoch: 117 training_loss 0.5130576685070991 test_loss: 0.5170678615570068
epoch: 118 training_loss 0.5335981187224388 test_loss: 0.5240589141845703
epoch: 119 training_loss 0.5213865715265275 test_loss: 0.5429415702819824
epoch: 120 training_loss 0.5263845846056938 test_loss: 0.5538390636444092
epoch: 121 training_loss 0.5242106214165687 test_loss: 0.512791633605957
epoch: 122 training_loss 0.5185689377784729 test_loss: 0.5185307502746582
epoch: 123 training_loss 0.511660789847374 test_loss: 0.5089389324188233
epoch: 124 training_loss 0.5197753483057022 test_loss: 0.5100318431854248
epoch: 125 training_loss 0.5134045699238777 test_loss: 0.5338140010833741
epoch: 126 training_loss 0.5210792255401612 test_loss: 0.5372813224792481
epoch: 127 training_loss 0.5182296922802925 test_loss: 0.5191826820373535
epoch: 128 training_loss 0.5175396451354026 test_loss: 0.5119598388671875
epoch: 129 training_loss 0.5171694305539131 test_loss: 0.4984781265258789
epoch: 130 training_loss 0.5033458340167999 test_loss: 0.49596257209777833
epoch: 131 training_loss 0.5121919828653335 test_loss: 0.5164764881134033
epoch: 132 training_loss 0.507176139652729 test_loss: 0.507810640335083
epoch: 133 training_loss 0.5138404589891433 test_loss: 0.5301123142242432
epoch: 134 training_loss 0.5094877797365188 test_loss: 0.5147107124328614
epoch: 135 training_loss 0.5011201751232147 test_loss: 0.4944193363189697
epoch: 136 training_loss 0.5110603028535843 test_loss: 0.5033326148986816
epoch: 137 training_loss 0.5093797639012336 test_loss: 0.5109508991241455
epoch: 138 training_loss 0.5042263633012771 test_loss: 0.4942723274230957
epoch: 139 training_loss 0.49764974892139435 test_loss: 0.5338887691497802
epoch: 140 training_loss 0.5075149500370025 test_loss: 0.5017505645751953
epoch: 141 training_loss 0.49869422942399977 test_loss: 0.5130480766296387
epoch: 142 training_loss 0.4980715587735176 test_loss: 0.5059461116790771
epoch: 143 training_loss 0.5029478842020034 test_loss: 0.4970651626586914
epoch: 144 training_loss 0.4955323413014412 test_loss: 0.49223036766052247
epoch: 145 training_loss 0.5012348482012748 test_loss: 0.4872906684875488
epoch: 146 training_loss 0.4920575085282326 test_loss: 0.485899543762207
epoch: 147 training_loss 0.49771706491708756 test_loss: 0.49478778839111326
epoch: 148 training_loss 0.4919742232561111 test_loss: 0.5054372787475586
epoch: 149 training_loss 0.49955542862415314 test_loss: 0.49866228103637694
1762.898943109914
episode: 0 training return: tensor(-322.2612, device='cuda:0')
episode: 1 training return: tensor(-295.5165, device='cuda:0')
episode: 2 training return: tensor(-262.8996, device='cuda:0')
episode: 3 training return: tensor(226.0524, device='cuda:0')
epoch: 1 test_true_pfm: 1463.0519970055345 sim_pfm: -175.0383493786309
episode: 4 training return: tensor(-75.7334, device='cuda:0')
episode: 5 training return: tensor(-307.0699, device='cuda:0')
episode: 6 training return: tensor(-369.2119, device='cuda:0')
episode: 7 training return: tensor(-300.0380, device='cuda:0')
epoch: 2 test_true_pfm: 1395.1825390525719 sim_pfm: -210.08537215401884
episode: 8 training return: tensor(-321.0372, device='cuda:0')
episode: 9 training return: tensor(-387.2208, device='cuda:0')
episode: 10 training return: tensor(257.0329, device='cuda:0')
episode: 11 training return: tensor(-415.1880, device='cuda:0')
epoch: 3 test_true_pfm: 1867.7426176737638 sim_pfm: -334.7374752720352
episode: 12 training return: tensor(73.7834, device='cuda:0')
episode: 13 training return: tensor(-276.6171, device='cuda:0')
episode: 14 training return: tensor(-321.7385, device='cuda:0')
episode: 15 training return: tensor(241.9293, device='cuda:0')
epoch: 4 test_true_pfm: 1525.006522274911 sim_pfm: -372.4248039803545
episode: 16 training return: tensor(-342.1795, device='cuda:0')
episode: 17 training return: tensor(-347.9435, device='cuda:0')
episode: 18 training return: tensor(-30.3773, device='cuda:0')
episode: 19 training return: tensor(125.9461, device='cuda:0')
epoch: 5 test_true_pfm: 2404.575831013383 sim_pfm: -9.088373144322153
episode: 20 training return: tensor(-365.6919, device='cuda:0')
episode: 21 training return: tensor(-383.0388, device='cuda:0')
episode: 22 training return: tensor(-61.4772, device='cuda:0')
episode: 23 training return: tensor(-321.9165, device='cuda:0')
epoch: 6 test_true_pfm: 2699.9289209893327 sim_pfm: -145.55052135675214
episode: 24 training return: tensor(-354.5754, device='cuda:0')
episode: 25 training return: tensor(-110.1999, device='cuda:0')
episode: 26 training return: tensor(-96.7412, device='cuda:0')
episode: 27 training return: tensor(-306.2164, device='cuda:0')
epoch: 7 test_true_pfm: 1950.608670676651 sim_pfm: -135.5983712815602
episode: 28 training return: tensor(-173.2158, device='cuda:0')
episode: 29 training return: tensor(-302.0042, device='cuda:0')
episode: 30 training return: tensor(-85.0328, device='cuda:0')
episode: 31 training return: tensor(-372.6335, device='cuda:0')
epoch: 8 test_true_pfm: 1545.0237615552248 sim_pfm: -242.86472781340126
episode: 32 training return: tensor(41.8838, device='cuda:0')
episode: 33 training return: tensor(302.0902, device='cuda:0')
episode: 34 training return: tensor(-405.0645, device='cuda:0')
episode: 35 training return: tensor(-61.0243, device='cuda:0')
epoch: 9 test_true_pfm: 1815.747531973074 sim_pfm: -205.95577790644407
episode: 36 training return: tensor(36.1221, device='cuda:0')
episode: 37 training return: tensor(-373.3628, device='cuda:0')
episode: 38 training return: tensor(-331.4401, device='cuda:0')
episode: 39 training return: tensor(-312.4926, device='cuda:0')
epoch: 10 test_true_pfm: 3226.485294713766 sim_pfm: -131.86538978343984
episode: 40 training return: tensor(-112.4469, device='cuda:0')
episode: 41 training return: tensor(-167.1739, device='cuda:0')
episode: 42 training return: tensor(-109.3082, device='cuda:0')
episode: 43 training return: tensor(-380.7600, device='cuda:0')
epoch: 11 test_true_pfm: 1981.0683538106712 sim_pfm: -367.6884184538115
episode: 44 training return: tensor(-361.4328, device='cuda:0')
episode: 45 training return: tensor(262.9295, device='cuda:0')
episode: 46 training return: tensor(-295.7068, device='cuda:0')
episode: 47 training return: tensor(-25.3791, device='cuda:0')
epoch: 12 test_true_pfm: 2049.5437263830495 sim_pfm: 11.609768272028305
episode: 48 training return: tensor(-361.7577, device='cuda:0')
episode: 49 training return: tensor(-420.0629, device='cuda:0')
episode: 50 training return: tensor(-343.0609, device='cuda:0')
episode: 51 training return: tensor(87.7218, device='cuda:0')
epoch: 13 test_true_pfm: 1585.2109371685754 sim_pfm: -188.63030310491254
episode: 52 training return: tensor(-378.6398, device='cuda:0')
episode: 53 training return: tensor(-389.7711, device='cuda:0')
episode: 54 training return: tensor(110.4389, device='cuda:0')
episode: 55 training return: tensor(-411.8699, device='cuda:0')
epoch: 14 test_true_pfm: 1967.8769109134203 sim_pfm: -303.6570553695783
episode: 56 training return: tensor(-302.6928, device='cuda:0')
episode: 57 training return: tensor(-316.1855, device='cuda:0')
episode: 58 training return: tensor(-387.3071, device='cuda:0')
episode: 59 training return: tensor(-388.9309, device='cuda:0')
epoch: 15 test_true_pfm: 1835.9719579713953 sim_pfm: -363.0389290638171
episode: 60 training return: tensor(-370.3182, device='cuda:0')
episode: 61 training return: tensor(-99.2297, device='cuda:0')
episode: 62 training return: tensor(141.4322, device='cuda:0')
episode: 63 training return: tensor(-28.8058, device='cuda:0')
epoch: 16 test_true_pfm: 2134.4651321420856 sim_pfm: 13.618371375564797
episode: 64 training return: tensor(-376.9452, device='cuda:0')
episode: 65 training return: tensor(-32.8644, device='cuda:0')
episode: 66 training return: tensor(-183.0401, device='cuda:0')
episode: 67 training return: tensor(-299.6035, device='cuda:0')
epoch: 17 test_true_pfm: 2149.2991015529283 sim_pfm: -287.85696284030564
episode: 68 training return: tensor(-308.6064, device='cuda:0')
episode: 69 training return: tensor(120.3754, device='cuda:0')
episode: 70 training return: tensor(-396.8076, device='cuda:0')
episode: 71 training return: tensor(35.0742, device='cuda:0')
epoch: 18 test_true_pfm: 1700.8475676616429 sim_pfm: -73.94562498478142
episode: 72 training return: tensor(-243.6803, device='cuda:0')
episode: 73 training return: tensor(-294.8095, device='cuda:0')
episode: 74 training return: tensor(-299.1004, device='cuda:0')
episode: 75 training return: tensor(-270.8610, device='cuda:0')
epoch: 19 test_true_pfm: 2182.906752782683 sim_pfm: -111.10600636790817
episode: 76 training return: tensor(-165.6359, device='cuda:0')
episode: 77 training return: tensor(-304.4771, device='cuda:0')
episode: 78 training return: tensor(46.4885, device='cuda:0')
episode: 79 training return: tensor(50.4444, device='cuda:0')
epoch: 20 test_true_pfm: 1622.2030601591644 sim_pfm: -165.3223036627363
episode: 80 training return: tensor(233.1094, device='cuda:0')
episode: 81 training return: tensor(-307.1006, device='cuda:0')
episode: 82 training return: tensor(213.2524, device='cuda:0')
episode: 83 training return: tensor(239.3316, device='cuda:0')
epoch: 21 test_true_pfm: 2316.528920713225 sim_pfm: -57.979678089623725
episode: 84 training return: tensor(243.2855, device='cuda:0')
episode: 85 training return: tensor(-289.1321, device='cuda:0')
episode: 86 training return: tensor(-34.7831, device='cuda:0')
episode: 87 training return: tensor(-292.1869, device='cuda:0')
epoch: 22 test_true_pfm: 2926.6225490040692 sim_pfm: 228.03980607865378
episode: 88 training return: tensor(-305.0473, device='cuda:0')
episode: 89 training return: tensor(-302.3633, device='cuda:0')
episode: 90 training return: tensor(-305.3371, device='cuda:0')
episode: 91 training return: tensor(-306.5771, device='cuda:0')
epoch: 23 test_true_pfm: 2700.251144631989 sim_pfm: 204.9221430797479
episode: 92 training return: tensor(278.9332, device='cuda:0')
episode: 93 training return: tensor(0.8009, device='cuda:0')
episode: 94 training return: tensor(207.1485, device='cuda:0')
episode: 95 training return: tensor(-270.7001, device='cuda:0')
epoch: 24 test_true_pfm: 1890.0721582939802 sim_pfm: -27.238388020098984
episode: 96 training return: tensor(-252.3936, device='cuda:0')
episode: 97 training return: tensor(161.1592, device='cuda:0')
episode: 98 training return: tensor(-299.3912, device='cuda:0')
episode: 99 training return: tensor(-225.6464, device='cuda:0')
epoch: 25 test_true_pfm: 3080.0079669469656 sim_pfm: 126.22253744245002
episode: 100 training return: tensor(95.9127, device='cuda:0')
episode: 101 training return: tensor(218.9032, device='cuda:0')
episode: 102 training return: tensor(-133.7284, device='cuda:0')
episode: 103 training return: tensor(200.6369, device='cuda:0')
epoch: 26 test_true_pfm: 2768.727961367071 sim_pfm: 169.97144272533478
episode: 104 training return: tensor(-198.9223, device='cuda:0')
episode: 105 training return: tensor(232.3234, device='cuda:0')
episode: 106 training return: tensor(-299.9604, device='cuda:0')
episode: 107 training return: tensor(-304.9727, device='cuda:0')
epoch: 27 test_true_pfm: 2716.7177088662747 sim_pfm: 69.37298546302675
episode: 108 training return: tensor(242.3398, device='cuda:0')
episode: 109 training return: tensor(-51.5711, device='cuda:0')
episode: 110 training return: tensor(3.8267, device='cuda:0')
episode: 111 training return: tensor(63.5563, device='cuda:0')
epoch: 28 test_true_pfm: 3192.324767042071 sim_pfm: 53.8860365372578
episode: 112 training return: tensor(73.8071, device='cuda:0')
episode: 113 training return: tensor(255.0709, device='cuda:0')
episode: 114 training return: tensor(271.0898, device='cuda:0')
episode: 115 training return: tensor(-218.9838, device='cuda:0')
epoch: 29 test_true_pfm: 2683.2893949245554 sim_pfm: 28.937517317923874
episode: 116 training return: tensor(225.9298, device='cuda:0')
episode: 117 training return: tensor(185.7472, device='cuda:0')
episode: 118 training return: tensor(-302.0167, device='cuda:0')
episode: 119 training return: tensor(-212.5513, device='cuda:0')
epoch: 30 test_true_pfm: 2838.7094004292994 sim_pfm: 116.74946763162734
episode: 120 training return: tensor(103.5064, device='cuda:0')
episode: 121 training return: tensor(203.6066, device='cuda:0')
episode: 122 training return: tensor(-313.9579, device='cuda:0')
episode: 123 training return: tensor(-298.2536, device='cuda:0')
epoch: 31 test_true_pfm: 2533.151464493427 sim_pfm: 49.02274806442438
episode: 124 training return: tensor(-54.2454, device='cuda:0')
episode: 125 training return: tensor(-234.3244, device='cuda:0')
episode: 126 training return: tensor(210.7407, device='cuda:0')
episode: 127 training return: tensor(-307.7566, device='cuda:0')
epoch: 32 test_true_pfm: 2696.822106567082 sim_pfm: 148.8864357424318
episode: 128 training return: tensor(188.4250, device='cuda:0')
episode: 129 training return: tensor(241.3621, device='cuda:0')
episode: 130 training return: tensor(-295.9329, device='cuda:0')
episode: 131 training return: tensor(-210.9408, device='cuda:0')
epoch: 33 test_true_pfm: 2602.611047513792 sim_pfm: -270.5782180276777
episode: 132 training return: tensor(-308.2755, device='cuda:0')
episode: 133 training return: tensor(210.7825, device='cuda:0')
episode: 134 training return: tensor(233.4761, device='cuda:0')
episode: 135 training return: tensor(-293.9442, device='cuda:0')
epoch: 34 test_true_pfm: 2698.7374617596274 sim_pfm: -84.91543328504001
episode: 136 training return: tensor(-285.7326, device='cuda:0')
episode: 137 training return: tensor(-249.7951, device='cuda:0')
episode: 138 training return: tensor(-141.6053, device='cuda:0')
episode: 139 training return: tensor(-141.4462, device='cuda:0')
epoch: 35 test_true_pfm: 2607.334351750789 sim_pfm: 3.199450395642392
episode: 140 training return: tensor(-294.5313, device='cuda:0')
episode: 141 training return: tensor(141.9279, device='cuda:0')
episode: 142 training return: tensor(225.8366, device='cuda:0')
episode: 143 training return: tensor(-311.2867, device='cuda:0')
epoch: 36 test_true_pfm: 2138.573009737329 sim_pfm: 96.04581811044288
episode: 144 training return: tensor(-268.0578, device='cuda:0')
episode: 145 training return: tensor(-279.7646, device='cuda:0')
episode: 146 training return: tensor(-277.6569, device='cuda:0')
episode: 147 training return: tensor(-256.3226, device='cuda:0')
epoch: 37 test_true_pfm: 2306.5313070097914 sim_pfm: -1.0931721685725886
episode: 148 training return: tensor(236.9638, device='cuda:0')
episode: 149 training return: tensor(-294.7548, device='cuda:0')
episode: 150 training return: tensor(76.0637, device='cuda:0')
episode: 151 training return: tensor(-85.3869, device='cuda:0')
epoch: 38 test_true_pfm: 2257.2068824385 sim_pfm: 57.10137617461927
episode: 152 training return: tensor(23.7902, device='cuda:0')
episode: 153 training return: tensor(-94.2339, device='cuda:0')
episode: 154 training return: tensor(-299.6106, device='cuda:0')
episode: 155 training return: tensor(-289.0215, device='cuda:0')
epoch: 39 test_true_pfm: 2956.7635653418615 sim_pfm: -12.569136095388481
episode: 156 training return: tensor(86.1061, device='cuda:0')
episode: 157 training return: tensor(-40.1526, device='cuda:0')
episode: 158 training return: tensor(-297.7464, device='cuda:0')
episode: 159 training return: tensor(-302.4221, device='cuda:0')
epoch: 40 test_true_pfm: 1756.390470361757 sim_pfm: 60.596332610817626
episode: 160 training return: tensor(-160.0202, device='cuda:0')
episode: 161 training return: tensor(166.5595, device='cuda:0')
episode: 162 training return: tensor(-114.4635, device='cuda:0')
episode: 163 training return: tensor(256.0215, device='cuda:0')
epoch: 41 test_true_pfm: 3217.477949904977 sim_pfm: 82.46712601138279
episode: 164 training return: tensor(248.4307, device='cuda:0')
episode: 165 training return: tensor(-321.4121, device='cuda:0')
episode: 166 training return: tensor(286.0488, device='cuda:0')
episode: 167 training return: tensor(-50.9873, device='cuda:0')
epoch: 42 test_true_pfm: 2614.9338153320227 sim_pfm: 227.12520360358758
episode: 168 training return: tensor(-276.9976, device='cuda:0')
episode: 169 training return: tensor(138.7326, device='cuda:0')
episode: 170 training return: tensor(247.7899, device='cuda:0')
episode: 171 training return: tensor(-300.6231, device='cuda:0')
epoch: 43 test_true_pfm: 3103.4606974664543 sim_pfm: 155.99890298952232
episode: 172 training return: tensor(240.7627, device='cuda:0')
episode: 173 training return: tensor(190.1979, device='cuda:0')
episode: 174 training return: tensor(-232.8648, device='cuda:0')
episode: 175 training return: tensor(272.4684, device='cuda:0')
epoch: 44 test_true_pfm: 2996.8092370331633 sim_pfm: 83.45418824953958
episode: 176 training return: tensor(-213.0236, device='cuda:0')
episode: 177 training return: tensor(-293.3426, device='cuda:0')
episode: 178 training return: tensor(236.7013, device='cuda:0')
episode: 179 training return: tensor(-300.0197, device='cuda:0')
epoch: 45 test_true_pfm: 1871.6345171932016 sim_pfm: -109.79580804889945
episode: 180 training return: tensor(207.8881, device='cuda:0')
episode: 181 training return: tensor(-202.6754, device='cuda:0')
episode: 182 training return: tensor(287.4635, device='cuda:0')
episode: 183 training return: tensor(-234.8178, device='cuda:0')
epoch: 46 test_true_pfm: 2425.284400411065 sim_pfm: 67.20623802775906
episode: 184 training return: tensor(-265.2376, device='cuda:0')
episode: 185 training return: tensor(-287.4207, device='cuda:0')
episode: 186 training return: tensor(183.1701, device='cuda:0')
episode: 187 training return: tensor(-355.8323, device='cuda:0')
epoch: 47 test_true_pfm: 2312.2653954201483 sim_pfm: 174.44564165987927
episode: 188 training return: tensor(198.7260, device='cuda:0')
episode: 189 training return: tensor(208.1940, device='cuda:0')
episode: 190 training return: tensor(1.8270, device='cuda:0')
episode: 191 training return: tensor(-295.8614, device='cuda:0')
epoch: 48 test_true_pfm: 3211.1092661328275 sim_pfm: 195.45782620386066
episode: 192 training return: tensor(-281.0852, device='cuda:0')
episode: 193 training return: tensor(-304.1923, device='cuda:0')
episode: 194 training return: tensor(-306.5706, device='cuda:0')
episode: 195 training return: tensor(-61.0189, device='cuda:0')
epoch: 49 test_true_pfm: 2574.1349312771385 sim_pfm: -181.12605151336174
episode: 196 training return: tensor(-217.8210, device='cuda:0')
episode: 197 training return: tensor(-200.7167, device='cuda:0')
episode: 198 training return: tensor(-310.7250, device='cuda:0')
episode: 199 training return: tensor(-303.0507, device='cuda:0')
epoch: 50 test_true_pfm: 2763.746167454232 sim_pfm: -4.562644224963151
episode: 200 training return: tensor(-233.8650, device='cuda:0')
episode: 201 training return: tensor(-284.5768, device='cuda:0')
episode: 202 training return: tensor(-303.0813, device='cuda:0')
episode: 203 training return: tensor(-257.2018, device='cuda:0')
epoch: 51 test_true_pfm: 2843.3226833521 sim_pfm: 56.99940774894397
episode: 204 training return: tensor(119.0782, device='cuda:0')
episode: 205 training return: tensor(-156.8429, device='cuda:0')
episode: 206 training return: tensor(230.5551, device='cuda:0')
episode: 207 training return: tensor(-310.9626, device='cuda:0')
epoch: 52 test_true_pfm: 2878.753959268294 sim_pfm: 9.808764266568081
episode: 208 training return: tensor(63.3076, device='cuda:0')
episode: 209 training return: tensor(225.7534, device='cuda:0')
episode: 210 training return: tensor(-150.9297, device='cuda:0')
episode: 211 training return: tensor(-222.5414, device='cuda:0')
epoch: 53 test_true_pfm: 1587.225603762882 sim_pfm: -144.00376084644813
episode: 212 training return: tensor(283.1828, device='cuda:0')
episode: 213 training return: tensor(-3.2706, device='cuda:0')
episode: 214 training return: tensor(237.5984, device='cuda:0')
episode: 215 training return: tensor(-311.8460, device='cuda:0')
epoch: 54 test_true_pfm: 1688.1048399777799 sim_pfm: 30.208793769192805
episode: 216 training return: tensor(-311.8538, device='cuda:0')
episode: 217 training return: tensor(-7.7713, device='cuda:0')
episode: 218 training return: tensor(-284.7299, device='cuda:0')
episode: 219 training return: tensor(-276.4105, device='cuda:0')
epoch: 55 test_true_pfm: 1702.1522526803253 sim_pfm: -255.99274391793492
episode: 220 training return: tensor(163.0932, device='cuda:0')
episode: 221 training return: tensor(96.3316, device='cuda:0')
episode: 222 training return: tensor(-360.3749, device='cuda:0')
episode: 223 training return: tensor(-306.4735, device='cuda:0')
epoch: 56 test_true_pfm: 2741.095900326347 sim_pfm: -111.54167083262776
episode: 224 training return: tensor(-286.0037, device='cuda:0')
episode: 225 training return: tensor(248.8793, device='cuda:0')
episode: 226 training return: tensor(211.4053, device='cuda:0')
episode: 227 training return: tensor(-205.4655, device='cuda:0')
epoch: 57 test_true_pfm: 2320.905922908127 sim_pfm: 79.63944093248574
episode: 228 training return: tensor(-254.8144, device='cuda:0')
episode: 229 training return: tensor(-299.2990, device='cuda:0')
episode: 230 training return: tensor(218.3708, device='cuda:0')
episode: 231 training return: tensor(239.3783, device='cuda:0')
epoch: 58 test_true_pfm: 2682.4482477904 sim_pfm: -78.27847354125697
episode: 232 training return: tensor(238.8248, device='cuda:0')
episode: 233 training return: tensor(-212.6488, device='cuda:0')
episode: 234 training return: tensor(-265.5469, device='cuda:0')
episode: 235 training return: tensor(-261.0966, device='cuda:0')
epoch: 59 test_true_pfm: 1705.3906120906329 sim_pfm: 17.868930592909845
episode: 236 training return: tensor(-308.7481, device='cuda:0')
episode: 237 training return: tensor(-263.7878, device='cuda:0')
episode: 238 training return: tensor(-195.0929, device='cuda:0')
episode: 239 training return: tensor(-308.4584, device='cuda:0')
epoch: 60 test_true_pfm: 2132.8224510510217 sim_pfm: 45.96574376351782
episode: 240 training return: tensor(-209.8033, device='cuda:0')
episode: 241 training return: tensor(286.6321, device='cuda:0')
episode: 242 training return: tensor(206.4618, device='cuda:0')
episode: 243 training return: tensor(-209.6301, device='cuda:0')
epoch: 61 test_true_pfm: 2644.233251197396 sim_pfm: -45.83103408028061
episode: 244 training return: tensor(58.8818, device='cuda:0')
episode: 245 training return: tensor(232.8432, device='cuda:0')
episode: 246 training return: tensor(-308.6364, device='cuda:0')
episode: 247 training return: tensor(-125.2091, device='cuda:0')
epoch: 62 test_true_pfm: 2650.332715505691 sim_pfm: -89.22814904289164
episode: 248 training return: tensor(238.3287, device='cuda:0')
episode: 249 training return: tensor(236.1029, device='cuda:0')
episode: 250 training return: tensor(-299.0095, device='cuda:0')
episode: 251 training return: tensor(-314.4778, device='cuda:0')
epoch: 63 test_true_pfm: 1718.173390707286 sim_pfm: -133.77061820250432
episode: 252 training return: tensor(-282.6877, device='cuda:0')
episode: 253 training return: tensor(-68.6555, device='cuda:0')
episode: 254 training return: tensor(-407.8073, device='cuda:0')
episode: 255 training return: tensor(-300.8640, device='cuda:0')
epoch: 64 test_true_pfm: 2247.493328939617 sim_pfm: 43.32330118938504
episode: 256 training return: tensor(-35.8755, device='cuda:0')
episode: 257 training return: tensor(-316.2957, device='cuda:0')
episode: 258 training return: tensor(-35.9909, device='cuda:0')
episode: 259 training return: tensor(187.4086, device='cuda:0')
epoch: 65 test_true_pfm: 2825.3461425924397 sim_pfm: -57.58334179908464
episode: 260 training return: tensor(-110.3217, device='cuda:0')
episode: 261 training return: tensor(240.8651, device='cuda:0')
episode: 262 training return: tensor(-359.2152, device='cuda:0')
episode: 263 training return: tensor(-245.3168, device='cuda:0')
epoch: 66 test_true_pfm: 2302.0829521962164 sim_pfm: -104.9582664888682
episode: 264 training return: tensor(-309.5007, device='cuda:0')
episode: 265 training return: tensor(-295.9525, device='cuda:0')
episode: 266 training return: tensor(-168.9796, device='cuda:0')
episode: 267 training return: tensor(-302.3733, device='cuda:0')
epoch: 67 test_true_pfm: 2456.2397442294846 sim_pfm: -77.1276825760142
episode: 268 training return: tensor(-287.2881, device='cuda:0')
episode: 269 training return: tensor(-310.4435, device='cuda:0')
episode: 270 training return: tensor(97.7227, device='cuda:0')
episode: 271 training return: tensor(-215.3302, device='cuda:0')
epoch: 68 test_true_pfm: 2743.145582620835 sim_pfm: 76.1275697874371
episode: 272 training return: tensor(-310.1019, device='cuda:0')
episode: 273 training return: tensor(-48.6941, device='cuda:0')
episode: 274 training return: tensor(247.9035, device='cuda:0')
episode: 275 training return: tensor(250.5758, device='cuda:0')
epoch: 69 test_true_pfm: 2253.2126849429374 sim_pfm: -76.54253637953661
episode: 276 training return: tensor(-305.6925, device='cuda:0')
episode: 277 training return: tensor(-139.4001, device='cuda:0')
episode: 278 training return: tensor(238.5073, device='cuda:0')
episode: 279 training return: tensor(221.5068, device='cuda:0')
epoch: 70 test_true_pfm: 2287.5451154946404 sim_pfm: -50.53275582566857
episode: 280 training return: tensor(229.9872, device='cuda:0')
episode: 281 training return: tensor(-206.9303, device='cuda:0')
episode: 282 training return: tensor(85.8174, device='cuda:0')
episode: 283 training return: tensor(134.5870, device='cuda:0')
epoch: 71 test_true_pfm: 3262.3529793531893 sim_pfm: 75.24175104552221
episode: 284 training return: tensor(-312.1179, device='cuda:0')
episode: 285 training return: tensor(-304.5142, device='cuda:0')
episode: 286 training return: tensor(-69.8097, device='cuda:0')
episode: 287 training return: tensor(-166.6613, device='cuda:0')
epoch: 72 test_true_pfm: 2392.51755385365 sim_pfm: -237.2574377610969
episode: 288 training return: tensor(-248.5750, device='cuda:0')
episode: 289 training return: tensor(-306.8799, device='cuda:0')
episode: 290 training return: tensor(213.1329, device='cuda:0')
episode: 291 training return: tensor(207.2361, device='cuda:0')
epoch: 73 test_true_pfm: 2414.7177872037787 sim_pfm: -255.417936356583
episode: 292 training return: tensor(-147.0271, device='cuda:0')
episode: 293 training return: tensor(-385.1262, device='cuda:0')
episode: 294 training return: tensor(-371.0492, device='cuda:0')
episode: 295 training return: tensor(-250.2570, device='cuda:0')
epoch: 74 test_true_pfm: 2605.0026843443775 sim_pfm: 29.661867106566206
episode: 296 training return: tensor(237.6366, device='cuda:0')
episode: 297 training return: tensor(-122.5786, device='cuda:0')
episode: 298 training return: tensor(-287.6326, device='cuda:0')
episode: 299 training return: tensor(-312.7528, device='cuda:0')
epoch: 75 test_true_pfm: 2623.0298469172 sim_pfm: -69.0871710627495
episode: 300 training return: tensor(48.1107, device='cuda:0')
episode: 301 training return: tensor(-308.7670, device='cuda:0')
episode: 302 training return: tensor(-146.7302, device='cuda:0')
episode: 303 training return: tensor(-126.2563, device='cuda:0')
epoch: 76 test_true_pfm: 1970.3410453625581 sim_pfm: 63.098302830088265
episode: 304 training return: tensor(238.7245, device='cuda:0')
episode: 305 training return: tensor(92.5459, device='cuda:0')
episode: 306 training return: tensor(-160.8754, device='cuda:0')
episode: 307 training return: tensor(-287.6595, device='cuda:0')
epoch: 77 test_true_pfm: 2197.2813351194113 sim_pfm: -237.17847601277754
episode: 308 training return: tensor(246.1369, device='cuda:0')
episode: 309 training return: tensor(-404.9106, device='cuda:0')
episode: 310 training return: tensor(-92.0009, device='cuda:0')
episode: 311 training return: tensor(278.0286, device='cuda:0')
epoch: 78 test_true_pfm: 2763.2251367716212 sim_pfm: -95.70984762860462
episode: 312 training return: tensor(-303.6010, device='cuda:0')
episode: 313 training return: tensor(-209.1425, device='cuda:0')
episode: 314 training return: tensor(-310.3356, device='cuda:0')
episode: 315 training return: tensor(-276.2784, device='cuda:0')
epoch: 79 test_true_pfm: 1994.9894944315436 sim_pfm: 12.219680409994908
episode: 316 training return: tensor(190.3965, device='cuda:0')
episode: 317 training return: tensor(204.7940, device='cuda:0')
episode: 318 training return: tensor(221.6683, device='cuda:0')
episode: 319 training return: tensor(225.1122, device='cuda:0')
epoch: 80 test_true_pfm: 3200.8529268919733 sim_pfm: 100.77155678666895
episode: 320 training return: tensor(-239.2824, device='cuda:0')
episode: 321 training return: tensor(-129.1756, device='cuda:0')
episode: 322 training return: tensor(128.6857, device='cuda:0')
episode: 323 training return: tensor(35.8868, device='cuda:0')
epoch: 81 test_true_pfm: 2966.490688170034 sim_pfm: -90.67896758244994
episode: 324 training return: tensor(-215.0405, device='cuda:0')
episode: 325 training return: tensor(44.8297, device='cuda:0')
episode: 326 training return: tensor(-310.1953, device='cuda:0')
episode: 327 training return: tensor(-231.1693, device='cuda:0')
epoch: 82 test_true_pfm: 1762.7913406932446 sim_pfm: -93.37453590061826
episode: 328 training return: tensor(286.4308, device='cuda:0')
episode: 329 training return: tensor(210.6341, device='cuda:0')
episode: 330 training return: tensor(-219.9798, device='cuda:0')
episode: 331 training return: tensor(-301.7781, device='cuda:0')
epoch: 83 test_true_pfm: 2175.222204640457 sim_pfm: 74.99080401933558
episode: 332 training return: tensor(114.1349, device='cuda:0')
episode: 333 training return: tensor(-250.8867, device='cuda:0')
episode: 334 training return: tensor(-146.3766, device='cuda:0')
episode: 335 training return: tensor(-203.9026, device='cuda:0')
epoch: 84 test_true_pfm: 2229.109639367902 sim_pfm: -84.22040720415923
episode: 336 training return: tensor(231.7848, device='cuda:0')
episode: 337 training return: tensor(-306.2023, device='cuda:0')
episode: 338 training return: tensor(-364.6552, device='cuda:0')
episode: 339 training return: tensor(152.7115, device='cuda:0')
epoch: 85 test_true_pfm: 1781.4871433317223 sim_pfm: -30.669711709342664
episode: 340 training return: tensor(-301.8079, device='cuda:0')
episode: 341 training return: tensor(-312.4318, device='cuda:0')
episode: 342 training return: tensor(-280.8596, device='cuda:0')
episode: 343 training return: tensor(-385.6038, device='cuda:0')
epoch: 86 test_true_pfm: 2175.1400465393513 sim_pfm: -271.66431247857207
episode: 344 training return: tensor(161.9543, device='cuda:0')
episode: 345 training return: tensor(81.0034, device='cuda:0')
episode: 346 training return: tensor(234.6004, device='cuda:0')
episode: 347 training return: tensor(197.2085, device='cuda:0')
epoch: 87 test_true_pfm: 1898.6178244724308 sim_pfm: 62.77247412845221
episode: 348 training return: tensor(-49.3712, device='cuda:0')
episode: 349 training return: tensor(-305.9077, device='cuda:0')
episode: 350 training return: tensor(254.0304, device='cuda:0')
episode: 351 training return: tensor(235.5246, device='cuda:0')
epoch: 88 test_true_pfm: 2704.6740314553467 sim_pfm: -0.8927185469462225
episode: 352 training return: tensor(-247.8258, device='cuda:0')
episode: 353 training return: tensor(-2.5173, device='cuda:0')
episode: 354 training return: tensor(-269.1677, device='cuda:0')
episode: 355 training return: tensor(-314.1111, device='cuda:0')
epoch: 89 test_true_pfm: 3279.7340234834887 sim_pfm: -108.04749624584413
episode: 356 training return: tensor(140.1116, device='cuda:0')
episode: 357 training return: tensor(-302.1570, device='cuda:0')
episode: 358 training return: tensor(59.8710, device='cuda:0')
episode: 359 training return: tensor(116.1670, device='cuda:0')
epoch: 90 test_true_pfm: 2388.6607945096316 sim_pfm: -233.772525383858
episode: 360 training return: tensor(265.6079, device='cuda:0')
episode: 361 training return: tensor(105.3057, device='cuda:0')
episode: 362 training return: tensor(-293.7041, device='cuda:0')
episode: 363 training return: tensor(-306.4070, device='cuda:0')
epoch: 91 test_true_pfm: 2360.9046377514915 sim_pfm: 81.2609211296076
episode: 364 training return: tensor(-277.0716, device='cuda:0')
episode: 365 training return: tensor(-306.7438, device='cuda:0')
episode: 366 training return: tensor(-312.0856, device='cuda:0')
episode: 367 training return: tensor(-263.9137, device='cuda:0')
epoch: 92 test_true_pfm: 2315.0145279797 sim_pfm: -76.90416815498611
episode: 368 training return: tensor(-415.9456, device='cuda:0')
episode: 369 training return: tensor(236.8338, device='cuda:0')
episode: 370 training return: tensor(248.5662, device='cuda:0')
episode: 371 training return: tensor(-274.7154, device='cuda:0')
epoch: 93 test_true_pfm: 1722.2844943343061 sim_pfm: 56.87904145407568
episode: 372 training return: tensor(-286.2560, device='cuda:0')
episode: 373 training return: tensor(-291.5499, device='cuda:0')
episode: 374 training return: tensor(-243.7335, device='cuda:0')
episode: 375 training return: tensor(-268.9262, device='cuda:0')
epoch: 94 test_true_pfm: 2751.075961213485 sim_pfm: -190.82487411036468
episode: 376 training return: tensor(247.0094, device='cuda:0')
episode: 377 training return: tensor(210.1617, device='cuda:0')
episode: 378 training return: tensor(-269.6259, device='cuda:0')
episode: 379 training return: tensor(-301.2014, device='cuda:0')
epoch: 95 test_true_pfm: 2590.3262048899755 sim_pfm: -260.24665647213504
episode: 380 training return: tensor(-265.4609, device='cuda:0')
episode: 381 training return: tensor(-294.9050, device='cuda:0')
episode: 382 training return: tensor(227.7030, device='cuda:0')
episode: 383 training return: tensor(-215.8749, device='cuda:0')
epoch: 96 test_true_pfm: 1640.1903890630945 sim_pfm: -130.9817120662192
episode: 384 training return: tensor(20.9745, device='cuda:0')
episode: 385 training return: tensor(250.0713, device='cuda:0')
episode: 386 training return: tensor(138.4745, device='cuda:0')
episode: 387 training return: tensor(-310.0830, device='cuda:0')
epoch: 97 test_true_pfm: 2326.9307008924034 sim_pfm: 171.74965011578752
episode: 388 training return: tensor(-278.3840, device='cuda:0')
episode: 389 training return: tensor(223.7117, device='cuda:0')
episode: 390 training return: tensor(10.8629, device='cuda:0')
episode: 391 training return: tensor(-360.2070, device='cuda:0')
epoch: 98 test_true_pfm: 2871.8188686520443 sim_pfm: 113.72506811421287
episode: 392 training return: tensor(-144.3214, device='cuda:0')
episode: 393 training return: tensor(254.9134, device='cuda:0')
episode: 394 training return: tensor(-304.4235, device='cuda:0')
episode: 395 training return: tensor(-133.1959, device='cuda:0')
epoch: 99 test_true_pfm: 2309.1674012185126 sim_pfm: -231.18946214280245
episode: 396 training return: tensor(225.7064, device='cuda:0')
episode: 397 training return: tensor(-199.7424, device='cuda:0')
episode: 398 training return: tensor(203.6261, device='cuda:0')
episode: 399 training return: tensor(-223.7070, device='cuda:0')
epoch: 100 test_true_pfm: 2782.0795310114227 sim_pfm: 76.83330239797942
episode: 400 training return: tensor(-68.4829, device='cuda:0')
episode: 401 training return: tensor(-377.4328, device='cuda:0')
episode: 402 training return: tensor(247.1794, device='cuda:0')
episode: 403 training return: tensor(203.3284, device='cuda:0')
epoch: 101 test_true_pfm: 2735.655498323629 sim_pfm: 69.98088501719758
episode: 404 training return: tensor(-227.5270, device='cuda:0')
episode: 405 training return: tensor(-262.7621, device='cuda:0')
episode: 406 training return: tensor(-279.8596, device='cuda:0')
episode: 407 training return: tensor(-87.8335, device='cuda:0')
epoch: 102 test_true_pfm: 2189.3570457984356 sim_pfm: -57.547864319446184
episode: 408 training return: tensor(168.9258, device='cuda:0')
episode: 409 training return: tensor(-34.0839, device='cuda:0')
episode: 410 training return: tensor(31.6107, device='cuda:0')
episode: 411 training return: tensor(233.8972, device='cuda:0')
epoch: 103 test_true_pfm: 2413.4629732790686 sim_pfm: 44.739476018585265
episode: 412 training return: tensor(-296.7947, device='cuda:0')
episode: 413 training return: tensor(-231.6169, device='cuda:0')
episode: 414 training return: tensor(76.9998, device='cuda:0')
episode: 415 training return: tensor(233.1929, device='cuda:0')
epoch: 104 test_true_pfm: 2248.3814800894966 sim_pfm: -117.73356661262612
episode: 416 training return: tensor(-309.4126, device='cuda:0')
episode: 417 training return: tensor(-276.3845, device='cuda:0')
episode: 418 training return: tensor(245.8669, device='cuda:0')
episode: 419 training return: tensor(210.0802, device='cuda:0')
epoch: 105 test_true_pfm: 2688.0255029599116 sim_pfm: -208.36836727198292
episode: 420 training return: tensor(199.8193, device='cuda:0')
episode: 421 training return: tensor(220.2745, device='cuda:0')
episode: 422 training return: tensor(-128.5004, device='cuda:0')
episode: 423 training return: tensor(-56.6750, device='cuda:0')
epoch: 106 test_true_pfm: 3075.676565094224 sim_pfm: -146.44900450960267
episode: 424 training return: tensor(-217.3483, device='cuda:0')
episode: 425 training return: tensor(-85.2162, device='cuda:0')
episode: 426 training return: tensor(-233.9046, device='cuda:0')
episode: 427 training return: tensor(222.8347, device='cuda:0')
epoch: 107 test_true_pfm: 3246.274715628761 sim_pfm: -79.43285377915406
episode: 428 training return: tensor(99.0629, device='cuda:0')
episode: 429 training return: tensor(135.2952, device='cuda:0')
episode: 430 training return: tensor(45.7154, device='cuda:0')
episode: 431 training return: tensor(220.7650, device='cuda:0')
epoch: 108 test_true_pfm: 2318.1525673932197 sim_pfm: 92.97880442911992
episode: 432 training return: tensor(-256.1385, device='cuda:0')
episode: 433 training return: tensor(195.1206, device='cuda:0')
episode: 434 training return: tensor(249.6322, device='cuda:0')
episode: 435 training return: tensor(-25.8272, device='cuda:0')
epoch: 109 test_true_pfm: 2423.2813573342332 sim_pfm: -234.21304935516673
episode: 436 training return: tensor(-310.2668, device='cuda:0')
episode: 437 training return: tensor(236.7284, device='cuda:0')
episode: 438 training return: tensor(-236.2147, device='cuda:0')
episode: 439 training return: tensor(-217.4619, device='cuda:0')
epoch: 110 test_true_pfm: 2245.0943700169787 sim_pfm: -248.1684283198944
episode: 440 training return: tensor(212.1421, device='cuda:0')
episode: 441 training return: tensor(-272.5919, device='cuda:0')
episode: 442 training return: tensor(-219.4716, device='cuda:0')
episode: 443 training return: tensor(208.1222, device='cuda:0')
epoch: 111 test_true_pfm: 1916.10748534536 sim_pfm: -78.34598325640157
episode: 444 training return: tensor(53.4094, device='cuda:0')
episode: 445 training return: tensor(-305.3297, device='cuda:0')
episode: 446 training return: tensor(-175.5721, device='cuda:0')
episode: 447 training return: tensor(-330.3253, device='cuda:0')
epoch: 112 test_true_pfm: 2747.529763992441 sim_pfm: -196.29209608297484
episode: 448 training return: tensor(226.3683, device='cuda:0')
episode: 449 training return: tensor(-212.6983, device='cuda:0')
episode: 450 training return: tensor(35.8053, device='cuda:0')
episode: 451 training return: tensor(-111.0324, device='cuda:0')
epoch: 113 test_true_pfm: 2559.8869986506656 sim_pfm: 55.47649895553089
episode: 452 training return: tensor(-232.2739, device='cuda:0')
episode: 453 training return: tensor(-249.0694, device='cuda:0')
episode: 454 training return: tensor(234.4220, device='cuda:0')
episode: 455 training return: tensor(-308.8923, device='cuda:0')
epoch: 114 test_true_pfm: 2419.2174514654275 sim_pfm: 15.155988477403298
episode: 456 training return: tensor(-133.2027, device='cuda:0')
episode: 457 training return: tensor(-304.5199, device='cuda:0')
episode: 458 training return: tensor(-311.5028, device='cuda:0')
episode: 459 training return: tensor(-302.4911, device='cuda:0')
epoch: 115 test_true_pfm: 2314.1852674698584 sim_pfm: 69.29741646628827
episode: 460 training return: tensor(-301.8891, device='cuda:0')
episode: 461 training return: tensor(130.3379, device='cuda:0')
episode: 462 training return: tensor(234.0916, device='cuda:0')
episode: 463 training return: tensor(-264.9266, device='cuda:0')
epoch: 116 test_true_pfm: 1768.3211355681062 sim_pfm: -126.48376128952562
episode: 464 training return: tensor(241.3096, device='cuda:0')
episode: 465 training return: tensor(238.8291, device='cuda:0')
episode: 466 training return: tensor(-235.5882, device='cuda:0')
episode: 467 training return: tensor(-4.3396, device='cuda:0')
epoch: 117 test_true_pfm: 1730.5070596714068 sim_pfm: 78.68745877511294
episode: 468 training return: tensor(-400.5950, device='cuda:0')
episode: 469 training return: tensor(-311.0434, device='cuda:0')
episode: 470 training return: tensor(-141.9994, device='cuda:0')
episode: 471 training return: tensor(116.8452, device='cuda:0')
epoch: 118 test_true_pfm: 2661.094218105973 sim_pfm: -135.85268453198174
episode: 472 training return: tensor(-218.2710, device='cuda:0')
episode: 473 training return: tensor(292.2148, device='cuda:0')
episode: 474 training return: tensor(-133.4430, device='cuda:0')
episode: 475 training return: tensor(82.3990, device='cuda:0')
epoch: 119 test_true_pfm: 3095.3121019271334 sim_pfm: -259.0338755190217
episode: 476 training return: tensor(-283.4900, device='cuda:0')
episode: 477 training return: tensor(-131.1460, device='cuda:0')
episode: 478 training return: tensor(-237.0489, device='cuda:0')
episode: 479 training return: tensor(229.1533, device='cuda:0')
epoch: 120 test_true_pfm: 2606.729676904423 sim_pfm: -58.14848218064677
episode: 480 training return: tensor(237.9883, device='cuda:0')
episode: 481 training return: tensor(-308.4940, device='cuda:0')
episode: 482 training return: tensor(287.5006, device='cuda:0')
episode: 483 training return: tensor(11.8586, device='cuda:0')
epoch: 121 test_true_pfm: 1763.952022051094 sim_pfm: -231.52556266536703
episode: 484 training return: tensor(236.1624, device='cuda:0')
episode: 485 training return: tensor(227.3450, device='cuda:0')
episode: 486 training return: tensor(56.0247, device='cuda:0')
episode: 487 training return: tensor(10.7655, device='cuda:0')
epoch: 122 test_true_pfm: 1859.5717011777726 sim_pfm: 96.60905938433523
episode: 488 training return: tensor(-279.5622, device='cuda:0')
episode: 489 training return: tensor(-307.1884, device='cuda:0')
episode: 490 training return: tensor(211.0995, device='cuda:0')
episode: 491 training return: tensor(218.8323, device='cuda:0')
epoch: 123 test_true_pfm: 1873.7048670097436 sim_pfm: 20.102699253737228
episode: 492 training return: tensor(-252.8447, device='cuda:0')
episode: 493 training return: tensor(-251.4818, device='cuda:0')
episode: 494 training return: tensor(-266.0414, device='cuda:0')
episode: 495 training return: tensor(-42.7991, device='cuda:0')
epoch: 124 test_true_pfm: 2238.0702518298967 sim_pfm: -154.36588413099525
episode: 496 training return: tensor(-332.7301, device='cuda:0')
episode: 497 training return: tensor(-215.2616, device='cuda:0')
episode: 498 training return: tensor(37.6829, device='cuda:0')
episode: 499 training return: tensor(-267.8282, device='cuda:0')
epoch: 125 test_true_pfm: 1718.5094505009374 sim_pfm: 75.98335786684765
episode: 500 training return: tensor(251.4713, device='cuda:0')
episode: 501 training return: tensor(-232.9390, device='cuda:0')
episode: 502 training return: tensor(-273.9489, device='cuda:0')
episode: 503 training return: tensor(234.2170, device='cuda:0')
epoch: 126 test_true_pfm: 1959.1915219951582 sim_pfm: -191.97964298771694
episode: 504 training return: tensor(-131.6948, device='cuda:0')
episode: 505 training return: tensor(-400.5104, device='cuda:0')
episode: 506 training return: tensor(-247.9357, device='cuda:0')
episode: 507 training return: tensor(238.0826, device='cuda:0')
epoch: 127 test_true_pfm: 1775.4448008946338 sim_pfm: -194.90329113268913
episode: 508 training return: tensor(268.5074, device='cuda:0')
episode: 509 training return: tensor(-311.0500, device='cuda:0')
episode: 510 training return: tensor(-83.0909, device='cuda:0')
episode: 511 training return: tensor(-272.2264, device='cuda:0')
epoch: 128 test_true_pfm: 1697.5043568564306 sim_pfm: -33.14229520619847
episode: 512 training return: tensor(185.7929, device='cuda:0')
episode: 513 training return: tensor(-135.8619, device='cuda:0')
episode: 514 training return: tensor(-217.1853, device='cuda:0')
episode: 515 training return: tensor(-256.1613, device='cuda:0')
epoch: 129 test_true_pfm: 2596.2833544525383 sim_pfm: -260.00672134093475
episode: 516 training return: tensor(243.2147, device='cuda:0')
episode: 517 training return: tensor(211.6183, device='cuda:0')
episode: 518 training return: tensor(249.0003, device='cuda:0')
episode: 519 training return: tensor(-256.9296, device='cuda:0')
epoch: 130 test_true_pfm: 2362.6353871660144 sim_pfm: 137.2865718168808
episode: 520 training return: tensor(-258.3922, device='cuda:0')
episode: 521 training return: tensor(244.2339, device='cuda:0')
episode: 522 training return: tensor(227.6640, device='cuda:0')
episode: 523 training return: tensor(108.9341, device='cuda:0')
epoch: 131 test_true_pfm: 2676.42149762186 sim_pfm: 121.47730374893096
episode: 524 training return: tensor(-307.3162, device='cuda:0')
episode: 525 training return: tensor(-312.0921, device='cuda:0')
episode: 526 training return: tensor(-225.8358, device='cuda:0')
episode: 527 training return: tensor(-349.9904, device='cuda:0')
epoch: 132 test_true_pfm: 2898.1342850425194 sim_pfm: -71.90487160366804
episode: 528 training return: tensor(-138.3220, device='cuda:0')
episode: 529 training return: tensor(-259.9721, device='cuda:0')
episode: 530 training return: tensor(296.4951, device='cuda:0')
episode: 531 training return: tensor(244.9019, device='cuda:0')
epoch: 133 test_true_pfm: 2557.8850292695156 sim_pfm: -43.60042182711186
episode: 532 training return: tensor(-196.6897, device='cuda:0')
episode: 533 training return: tensor(84.7308, device='cuda:0')
episode: 534 training return: tensor(-222.3568, device='cuda:0')
episode: 535 training return: tensor(-36.4693, device='cuda:0')
epoch: 134 test_true_pfm: 2292.671993517945 sim_pfm: 77.07570655849607
episode: 536 training return: tensor(-383.0741, device='cuda:0')
episode: 537 training return: tensor(201.3197, device='cuda:0')
episode: 538 training return: tensor(-254.1461, device='cuda:0')
episode: 539 training return: tensor(-287.7274, device='cuda:0')
epoch: 135 test_true_pfm: 1979.4599648658987 sim_pfm: -83.48826939278904
episode: 540 training return: tensor(-256.4866, device='cuda:0')
episode: 541 training return: tensor(61.5017, device='cuda:0')
episode: 542 training return: tensor(-283.4111, device='cuda:0')
episode: 543 training return: tensor(265.9777, device='cuda:0')
epoch: 136 test_true_pfm: 1885.87679080888 sim_pfm: -215.16616033234945
episode: 544 training return: tensor(-296.6959, device='cuda:0')
episode: 545 training return: tensor(265.0575, device='cuda:0')
episode: 546 training return: tensor(206.9932, device='cuda:0')
episode: 547 training return: tensor(-130.2288, device='cuda:0')
epoch: 137 test_true_pfm: 2268.018885049152 sim_pfm: -202.11023588616322
episode: 548 training return: tensor(-110.1108, device='cuda:0')
episode: 549 training return: tensor(235.2332, device='cuda:0')
episode: 550 training return: tensor(256.3630, device='cuda:0')
episode: 551 training return: tensor(114.4778, device='cuda:0')
epoch: 138 test_true_pfm: 2697.3902235823093 sim_pfm: -83.3545736495095
episode: 552 training return: tensor(-136.0410, device='cuda:0')
episode: 553 training return: tensor(-307.6991, device='cuda:0')
episode: 554 training return: tensor(226.0907, device='cuda:0')
episode: 555 training return: tensor(1.5920, device='cuda:0')
epoch: 139 test_true_pfm: 2764.892556627665 sim_pfm: 269.49966471650015
episode: 556 training return: tensor(-67.3072, device='cuda:0')
episode: 557 training return: tensor(-311.2668, device='cuda:0')
episode: 558 training return: tensor(-207.1571, device='cuda:0')
episode: 559 training return: tensor(-227.6390, device='cuda:0')
epoch: 140 test_true_pfm: 2648.3652313823536 sim_pfm: -73.93085012394779
episode: 560 training return: tensor(219.3464, device='cuda:0')
episode: 561 training return: tensor(220.6180, device='cuda:0')
episode: 562 training return: tensor(-215.6747, device='cuda:0')
episode: 563 training return: tensor(-84.6375, device='cuda:0')
epoch: 141 test_true_pfm: 2248.907119705771 sim_pfm: 82.49446924380027
episode: 564 training return: tensor(-175.2929, device='cuda:0')
episode: 565 training return: tensor(-354.7428, device='cuda:0')
episode: 566 training return: tensor(-311.5394, device='cuda:0')
episode: 567 training return: tensor(-138.7960, device='cuda:0')
epoch: 142 test_true_pfm: 2158.3083394862297 sim_pfm: 97.78642716076381
episode: 568 training return: tensor(-370.8278, device='cuda:0')
episode: 569 training return: tensor(231.8121, device='cuda:0')
episode: 570 training return: tensor(-303.9590, device='cuda:0')
episode: 571 training return: tensor(-239.7213, device='cuda:0')
epoch: 143 test_true_pfm: 2180.207976711281 sim_pfm: -10.435755784409897
episode: 572 training return: tensor(-129.3679, device='cuda:0')
episode: 573 training return: tensor(44.8557, device='cuda:0')
episode: 574 training return: tensor(-253.0043, device='cuda:0')
episode: 575 training return: tensor(284.3593, device='cuda:0')
epoch: 144 test_true_pfm: 2413.8868816029744 sim_pfm: 89.35129910854933
episode: 576 training return: tensor(178.9394, device='cuda:0')
episode: 577 training return: tensor(-259.8950, device='cuda:0')
episode: 578 training return: tensor(237.1357, device='cuda:0')
episode: 579 training return: tensor(-127.2505, device='cuda:0')
epoch: 145 test_true_pfm: 2173.0642610349596 sim_pfm: -68.11612575809704
episode: 580 training return: tensor(206.4799, device='cuda:0')
episode: 581 training return: tensor(47.4329, device='cuda:0')
episode: 582 training return: tensor(-234.9458, device='cuda:0')
episode: 583 training return: tensor(-236.3527, device='cuda:0')
epoch: 146 test_true_pfm: 2318.9435247723322 sim_pfm: -185.94575938015865
episode: 584 training return: tensor(-232.6679, device='cuda:0')
episode: 585 training return: tensor(-277.9340, device='cuda:0')
episode: 586 training return: tensor(70.2419, device='cuda:0')
episode: 587 training return: tensor(-312.7508, device='cuda:0')
epoch: 147 test_true_pfm: 2886.4149829121775 sim_pfm: 85.62279541569296
episode: 588 training return: tensor(-275.2431, device='cuda:0')
episode: 589 training return: tensor(-131.1827, device='cuda:0')
episode: 590 training return: tensor(-239.3791, device='cuda:0')
episode: 591 training return: tensor(-286.6602, device='cuda:0')
epoch: 148 test_true_pfm: 2912.8636472198364 sim_pfm: 105.52555308459948
episode: 592 training return: tensor(232.2506, device='cuda:0')
episode: 593 training return: tensor(-289.7128, device='cuda:0')
episode: 594 training return: tensor(148.6819, device='cuda:0')
episode: 595 training return: tensor(52.5892, device='cuda:0')
epoch: 149 test_true_pfm: 2278.5963898425603 sim_pfm: 63.1013188107269
episode: 596 training return: tensor(-130.9933, device='cuda:0')
episode: 597 training return: tensor(241.7043, device='cuda:0')
episode: 598 training return: tensor(84.1569, device='cuda:0')
episode: 599 training return: tensor(-20.1730, device='cuda:0')
epoch: 150 test_true_pfm: 2094.728661127878 sim_pfm: -17.141859149366308
