['--alg', 'ddpg', '--env', 'Swimmer-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '1']
epoch: 0 training_loss 0.4035611014068127 test_loss: 0.2991692543029785
epoch: 1 training_loss 0.26796757981181146 test_loss: 0.26290843486785886
epoch: 2 training_loss 0.25097267314791677 test_loss: 0.2298510789871216
epoch: 3 training_loss 0.26193311125040053 test_loss: 0.21651315689086914
epoch: 4 training_loss 0.23894909858703614 test_loss: 0.2210594892501831
epoch: 5 training_loss 0.23726559773087502 test_loss: 0.2342470645904541
epoch: 6 training_loss 0.22789764061570167 test_loss: 0.20830042362213136
epoch: 7 training_loss 0.2221799271553755 test_loss: 0.2116924047470093
epoch: 8 training_loss 0.21145476430654525 test_loss: 0.2364180564880371
epoch: 9 training_loss 0.21054404422640802 test_loss: 0.21806786060333253
epoch: 10 training_loss 0.22193682357668876 test_loss: 0.2002255916595459
epoch: 11 training_loss 0.22271913431584836 test_loss: 0.2072040319442749
epoch: 12 training_loss 0.21679938226938247 test_loss: 0.21458835601806642
epoch: 13 training_loss 0.21417389333248138 test_loss: 0.20710301399230957
epoch: 14 training_loss 0.21620904609560967 test_loss: 0.21859130859375
epoch: 15 training_loss 0.22170394010841846 test_loss: 0.20069470405578613
epoch: 16 training_loss 0.2162018133699894 test_loss: 0.2677441596984863
epoch: 17 training_loss 0.21922054708004 test_loss: 0.21418461799621583
epoch: 18 training_loss 0.20552618861198424 test_loss: 0.21651084423065187
epoch: 19 training_loss 0.1998511078953743 test_loss: 0.20084519386291505
epoch: 20 training_loss 0.21802364617586137 test_loss: 0.20554962158203124
epoch: 21 training_loss 0.2046134252101183 test_loss: 0.20765674114227295
epoch: 22 training_loss 0.20629010938107967 test_loss: 0.1944529175758362
epoch: 23 training_loss 0.1993803310394287 test_loss: 0.21448559761047364
epoch: 24 training_loss 0.21079949028789996 test_loss: 0.18431535959243775
epoch: 25 training_loss 0.20850048765540122 test_loss: 0.19757524728775025
epoch: 26 training_loss 0.19422061920166014 test_loss: 0.18850212097167968
epoch: 27 training_loss 0.1996335759013891 test_loss: 0.19929317235946656
epoch: 28 training_loss 0.20525473348796366 test_loss: 0.19707738161087035
epoch: 29 training_loss 0.1993500392138958 test_loss: 0.196694815158844
epoch: 30 training_loss 0.19907972425222398 test_loss: 0.2703716516494751
epoch: 31 training_loss 0.2014555390179157 test_loss: 0.1949794888496399
epoch: 32 training_loss 0.20217500418424605 test_loss: 0.19568647146224977
epoch: 33 training_loss 0.19797439515590667 test_loss: 0.2060274600982666
epoch: 34 training_loss 0.19372393041849137 test_loss: 0.18664369583129883
epoch: 35 training_loss 0.19721322916448117 test_loss: 0.18368111848831176
epoch: 36 training_loss 0.19575276389718055 test_loss: 0.21020188331604003
epoch: 37 training_loss 0.20948575787246226 test_loss: 0.20772137641906738
epoch: 38 training_loss 0.20167189911007882 test_loss: 0.21061415672302247
epoch: 39 training_loss 0.20069795325398446 test_loss: 0.18742252588272096
epoch: 40 training_loss 0.2034423566609621 test_loss: 0.19613567590713502
epoch: 41 training_loss 0.19810461215674877 test_loss: 0.194511342048645
epoch: 42 training_loss 0.2037606578320265 test_loss: 0.19437540769577027
epoch: 43 training_loss 0.1994804410636425 test_loss: 0.19477899074554444
epoch: 44 training_loss 0.19907492063939572 test_loss: 0.203755784034729
epoch: 45 training_loss 0.20406795360147953 test_loss: 0.18513906002044678
epoch: 46 training_loss 0.18788537815213202 test_loss: 0.19841877222061158
epoch: 47 training_loss 0.20425917208194733 test_loss: 0.1773819088935852
epoch: 48 training_loss 0.19681392788887023 test_loss: 0.18834145069122316
epoch: 49 training_loss 0.20105476908385753 test_loss: 0.20527231693267822
epoch: 50 training_loss 0.19642095655202865 test_loss: 0.18518103361129762
epoch: 51 training_loss 0.19479735039174556 test_loss: 0.19293562173843384
epoch: 52 training_loss 0.18985728606581687 test_loss: 0.18729968070983888
epoch: 53 training_loss 0.18996834859251976 test_loss: 0.18645397424697877
epoch: 54 training_loss 0.19318332470953464 test_loss: 0.1944727420806885
epoch: 55 training_loss 0.19697200305759907 test_loss: 0.1927951455116272
epoch: 56 training_loss 0.1839081633090973 test_loss: 0.18727149963378906
epoch: 57 training_loss 0.196048678830266 test_loss: 0.18767855167388917
epoch: 58 training_loss 0.19120983548462392 test_loss: 0.1826770782470703
epoch: 59 training_loss 0.1928314659744501 test_loss: 0.1755608081817627
epoch: 60 training_loss 0.19795376665890216 test_loss: 0.18482958078384398
epoch: 61 training_loss 0.19258955389261245 test_loss: 0.18495973348617553
epoch: 62 training_loss 0.18848683953285217 test_loss: 0.16302843093872071
epoch: 63 training_loss 0.19851360224187375 test_loss: 0.17036740779876708
epoch: 64 training_loss 0.1909382026642561 test_loss: 0.18460216522216796
epoch: 65 training_loss 0.19457565143704414 test_loss: 0.1845456600189209
epoch: 66 training_loss 0.1953348021954298 test_loss: 0.18453692197799682
epoch: 67 training_loss 0.1922974029928446 test_loss: 0.18840850591659547
epoch: 68 training_loss 0.19064366810023783 test_loss: 0.17412983179092406
epoch: 69 training_loss 0.19420344479382037 test_loss: 0.18114930391311646
epoch: 70 training_loss 0.1949987230449915 test_loss: 0.18262001276016235
epoch: 71 training_loss 0.18579634048044683 test_loss: 0.16765997409820557
epoch: 72 training_loss 0.18694226525723934 test_loss: 0.18386708498001098
epoch: 73 training_loss 0.1924791670590639 test_loss: 0.2082052707672119
epoch: 74 training_loss 0.19208101354539395 test_loss: 0.18438318967819214
epoch: 75 training_loss 0.18569087482988833 test_loss: 0.16840988397598267
epoch: 76 training_loss 0.185539807677269 test_loss: 0.1842537760734558
epoch: 77 training_loss 0.19565314397215844 test_loss: 0.19238042831420898
epoch: 78 training_loss 0.20034852914512158 test_loss: 0.18897614479064942
epoch: 79 training_loss 0.18819607399404048 test_loss: 0.1940186381340027
epoch: 80 training_loss 0.1864612778276205 test_loss: 0.1800464630126953
epoch: 81 training_loss 0.19293545819818975 test_loss: 0.20334117412567138
epoch: 82 training_loss 0.1837398274242878 test_loss: 0.18689532279968263
epoch: 83 training_loss 0.18914844512939452 test_loss: 0.21470942497253417
epoch: 84 training_loss 0.19881734922528266 test_loss: 0.1950251817703247
epoch: 85 training_loss 0.18625434942543506 test_loss: 0.18138542175292968
epoch: 86 training_loss 0.1868711445480585 test_loss: 0.1800468683242798
epoch: 87 training_loss 0.18531874410808086 test_loss: 0.16510916948318483
epoch: 88 training_loss 0.19549248203635217 test_loss: 0.1852054476737976
epoch: 89 training_loss 0.1805719218403101 test_loss: 0.2023559808731079
epoch: 90 training_loss 0.19261987060308455 test_loss: 0.1760173797607422
epoch: 91 training_loss 0.19351604841649533 test_loss: 0.1885419964790344
epoch: 92 training_loss 0.1863252030313015 test_loss: 0.19007904529571534
epoch: 93 training_loss 0.19102318197488785 test_loss: 0.17719544172286988
epoch: 94 training_loss 0.18222906619310378 test_loss: 0.1929535150527954
epoch: 95 training_loss 0.18327938817441464 test_loss: 0.1862092137336731
epoch: 96 training_loss 0.1824294362962246 test_loss: 0.18569502830505372
epoch: 97 training_loss 0.18485907949507235 test_loss: 0.16885011196136473
epoch: 98 training_loss 0.18494450025260448 test_loss: 0.1966275691986084
epoch: 99 training_loss 0.1924982274323702 test_loss: 0.19825674295425416
epoch: 100 training_loss 0.19183747857809066 test_loss: 0.18984608650207518
epoch: 101 training_loss 0.1878842082619667 test_loss: 0.17877031564712526
epoch: 102 training_loss 0.18575610265135764 test_loss: 0.18043117523193358
epoch: 103 training_loss 0.18724448554217815 test_loss: 0.17599354982376098
epoch: 104 training_loss 0.18685359708964824 test_loss: 0.17348984479904175
epoch: 105 training_loss 0.1907468055933714 test_loss: 0.1929013729095459
epoch: 106 training_loss 0.18304054014384746 test_loss: 0.1938284993171692
epoch: 107 training_loss 0.18387671560049057 test_loss: 0.17670484781265258
epoch: 108 training_loss 0.1739265226572752 test_loss: 0.1830580711364746
epoch: 109 training_loss 0.18728391416370868 test_loss: 0.1685614228248596
epoch: 110 training_loss 0.1892027797549963 test_loss: 0.18313370943069457
epoch: 111 training_loss 0.18669234082102776 test_loss: 0.18623678684234618
epoch: 112 training_loss 0.1822164873033762 test_loss: 0.17643293142318725
epoch: 113 training_loss 0.18806556485593318 test_loss: 0.1804302453994751
epoch: 114 training_loss 0.17739567220211028 test_loss: 0.1859504222869873
epoch: 115 training_loss 0.18599649988114833 test_loss: 0.19026509523391724
epoch: 116 training_loss 0.18905466474592686 test_loss: 0.21472830772399903
epoch: 117 training_loss 0.18052167735993863 test_loss: 0.18602629899978637
epoch: 118 training_loss 0.18632874131202698 test_loss: 0.18144235610961915
epoch: 119 training_loss 0.18172940775752067 test_loss: 0.19505891799926758
epoch: 120 training_loss 0.18158432722091675 test_loss: 0.1918701410293579
epoch: 121 training_loss 0.1868411088734865 test_loss: 0.18588327169418334
epoch: 122 training_loss 0.17893286049365997 test_loss: 0.17605228424072267
epoch: 123 training_loss 0.18847812585532664 test_loss: 0.17871009111404418
epoch: 124 training_loss 0.18424631163477898 test_loss: 0.19173817634582518
epoch: 125 training_loss 0.1852172428369522 test_loss: 0.1912410855293274
epoch: 126 training_loss 0.1801463719457388 test_loss: 0.17091963291168213
epoch: 127 training_loss 0.18675641886889935 test_loss: 0.19155256748199462
epoch: 128 training_loss 0.19012176923453808 test_loss: 0.17829318046569825
epoch: 129 training_loss 0.17625991933047772 test_loss: 0.17640668153762817
epoch: 130 training_loss 0.18688635133206843 test_loss: 0.16566425561904907
epoch: 131 training_loss 0.18265485107898713 test_loss: 0.18509162664413453
epoch: 132 training_loss 0.18222060725092887 test_loss: 0.18958921432495118
epoch: 133 training_loss 0.17768575608730316 test_loss: 0.18064392805099488
epoch: 134 training_loss 0.18435458719730377 test_loss: 0.18323321342468263
epoch: 135 training_loss 0.1889893390238285 test_loss: 0.17996325492858886
epoch: 136 training_loss 0.18375189699232578 test_loss: 0.19131962060928345
epoch: 137 training_loss 0.18436782255768777 test_loss: 0.180595862865448
epoch: 138 training_loss 0.1752268972247839 test_loss: 0.17652652263641358
epoch: 139 training_loss 0.1922155513614416 test_loss: 0.1943742513656616
epoch: 140 training_loss 0.17567065894603728 test_loss: 0.16600593328475952
epoch: 141 training_loss 0.1825924988090992 test_loss: 0.19769752025604248
epoch: 142 training_loss 0.17369393207132816 test_loss: 0.19538179636001587
epoch: 143 training_loss 0.1827295149117708 test_loss: 0.17098793983459473
epoch: 144 training_loss 0.17984648466110228 test_loss: 0.18308708667755128
epoch: 145 training_loss 0.1816623741388321 test_loss: 0.18453434705734253
epoch: 146 training_loss 0.18073106706142425 test_loss: 0.1900445580482483
epoch: 147 training_loss 0.19549903847277164 test_loss: 0.17462631464004516
epoch: 148 training_loss 0.1808926409482956 test_loss: 0.18738898038864135
epoch: 149 training_loss 0.1800722187012434 test_loss: 0.1809145450592041
epoch: 0 training_loss 45.80267288208008 test_loss: 22.985617065429686
epoch: 1 training_loss 19.020141105651856 test_loss: 16.27958526611328
epoch: 2 training_loss 15.020205583572388 test_loss: 13.571205139160156
epoch: 3 training_loss 12.887504119873046 test_loss: 12.26438217163086
epoch: 4 training_loss 11.395230321884155 test_loss: 10.843529510498048
epoch: 5 training_loss 10.441080284118652 test_loss: 10.010719299316406
epoch: 6 training_loss 9.900530281066894 test_loss: 9.665666961669922
epoch: 7 training_loss 9.252908391952515 test_loss: 8.909835052490234
epoch: 8 training_loss 8.866559205055237 test_loss: 8.372088623046874
epoch: 9 training_loss 8.191455845832825 test_loss: 8.178012084960937
epoch: 10 training_loss 7.990118999481201 test_loss: 7.982015991210938
epoch: 11 training_loss 7.727402086257935 test_loss: 7.261506652832031
epoch: 12 training_loss 7.306837143898011 test_loss: 7.30518798828125
epoch: 13 training_loss 6.922559423446655 test_loss: 6.886020660400391
epoch: 14 training_loss 6.782483606338501 test_loss: 6.512992095947266
epoch: 15 training_loss 6.334188733100891 test_loss: 6.291930770874023
epoch: 16 training_loss 6.226812257766723 test_loss: 6.369825744628907
epoch: 17 training_loss 6.034833602905273 test_loss: 5.915660095214844
epoch: 18 training_loss 5.7009801054000855 test_loss: 5.418326568603516
epoch: 19 training_loss 5.490260949134827 test_loss: 5.611762237548828
epoch: 20 training_loss 5.370162110328675 test_loss: 5.188620758056641
epoch: 21 training_loss 5.204481329917908 test_loss: 5.197899246215821
epoch: 22 training_loss 5.15174660205841 test_loss: 4.684173965454102
epoch: 23 training_loss 4.9352008867263795 test_loss: 4.870220565795899
epoch: 24 training_loss 4.949484367370605 test_loss: 4.6757556915283205
epoch: 25 training_loss 4.732210786342621 test_loss: 4.665744781494141
epoch: 26 training_loss 4.72325694322586 test_loss: 4.723812103271484
epoch: 27 training_loss 4.4200533723831175 test_loss: 4.507944107055664
epoch: 28 training_loss 4.39133695602417 test_loss: 4.396410369873047
epoch: 29 training_loss 4.373905165195465 test_loss: 4.23130989074707
epoch: 30 training_loss 4.364977216720581 test_loss: 4.207273864746094
epoch: 31 training_loss 4.211413116455078 test_loss: 4.072561264038086
epoch: 32 training_loss 4.138765573501587 test_loss: 4.217083358764649
epoch: 33 training_loss 4.056655359268189 test_loss: 4.760354614257812
epoch: 34 training_loss 4.106776466369629 test_loss: 4.185739135742187
epoch: 35 training_loss 4.013290584087372 test_loss: 4.290256500244141
epoch: 36 training_loss 4.0990216302871705 test_loss: 4.036922454833984
epoch: 37 training_loss 3.8942319083213808 test_loss: 3.8027454376220704
epoch: 38 training_loss 3.823258457183838 test_loss: 3.8368804931640623
epoch: 39 training_loss 3.872026069164276 test_loss: 3.8794181823730467
epoch: 40 training_loss 3.862906427383423 test_loss: 3.560533905029297
epoch: 41 training_loss 3.6343527030944824 test_loss: 3.7519744873046874
epoch: 42 training_loss 3.8197680616378786 test_loss: 3.58031005859375
epoch: 43 training_loss 3.682793369293213 test_loss: 3.5054927825927735
epoch: 44 training_loss 3.601930732727051 test_loss: 3.4046875
epoch: 45 training_loss 3.4832702589035036 test_loss: 3.3617862701416015
epoch: 46 training_loss 3.46217200756073 test_loss: 3.357687759399414
epoch: 47 training_loss 3.5771908736228943 test_loss: 3.691911315917969
epoch: 48 training_loss 3.609410378932953 test_loss: 3.3372875213623048
epoch: 49 training_loss 3.372906258106232 test_loss: 3.4225502014160156
epoch: 50 training_loss 3.3354662799835206 test_loss: 3.4122875213623045
epoch: 51 training_loss 3.3569506430625915 test_loss: 3.3137508392333985
epoch: 52 training_loss 3.4877513909339903 test_loss: 3.1308725357055662
epoch: 53 training_loss 3.351438195705414 test_loss: 3.159276580810547
epoch: 54 training_loss 3.2729481935501097 test_loss: 3.156114387512207
epoch: 55 training_loss 3.476438436508179 test_loss: 3.2560649871826173
epoch: 56 training_loss 3.1856982254981996 test_loss: 3.1581600189208983
epoch: 57 training_loss 3.26742249250412 test_loss: 3.524444580078125
epoch: 58 training_loss 3.115454361438751 test_loss: 3.0360235214233398
epoch: 59 training_loss 3.1224329972267153 test_loss: 2.9442272186279297
epoch: 60 training_loss 3.1008018040657044 test_loss: 2.8667346954345705
epoch: 61 training_loss 3.07301570892334 test_loss: 3.2728275299072265
epoch: 62 training_loss 3.076751616001129 test_loss: 3.021459197998047
epoch: 63 training_loss 3.1264431500434875 test_loss: 2.9100629806518556
epoch: 64 training_loss 3.09495138168335 test_loss: 3.0161197662353514
epoch: 65 training_loss 3.0196759843826295 test_loss: 3.020720291137695
epoch: 66 training_loss 3.05577712059021 test_loss: 3.025404167175293
epoch: 67 training_loss 3.0107818174362184 test_loss: 3.030216407775879
epoch: 68 training_loss 2.927960658073425 test_loss: 2.712274742126465
epoch: 69 training_loss 2.861357111930847 test_loss: 2.6666238784790037
epoch: 70 training_loss 2.8366168689727784 test_loss: 3.0225141525268553
epoch: 71 training_loss 2.873417065143585 test_loss: 3.0042720794677735
epoch: 72 training_loss 2.940639009475708 test_loss: 2.820465087890625
epoch: 73 training_loss 2.8278420400619506 test_loss: 2.6781232833862303
epoch: 74 training_loss 2.7021311151981355 test_loss: 2.9385353088378907
epoch: 75 training_loss 2.9393911814689635 test_loss: 2.815492057800293
epoch: 76 training_loss 3.039348132610321 test_loss: 2.703972816467285
epoch: 77 training_loss 2.890306417942047 test_loss: 2.7114212036132814
epoch: 78 training_loss 2.964604004621506 test_loss: 2.8178245544433596
epoch: 79 training_loss 2.7479082202911376 test_loss: 2.5712398529052733
epoch: 80 training_loss 2.7876578891277313 test_loss: 3.053336524963379
epoch: 81 training_loss 2.831421136856079 test_loss: 2.653403091430664
epoch: 82 training_loss 2.706067385673523 test_loss: 2.6639522552490233
epoch: 83 training_loss 2.8624016571044923 test_loss: 2.595438766479492
epoch: 84 training_loss 2.748665398359299 test_loss: 2.5291419982910157
epoch: 85 training_loss 2.49396719455719 test_loss: 2.432497978210449
epoch: 86 training_loss 2.7049815928936005 test_loss: 2.5935958862304687
epoch: 87 training_loss 2.5370753371715544 test_loss: 2.496670150756836
epoch: 88 training_loss 2.5881651043891907 test_loss: 2.7841560363769533
epoch: 89 training_loss 2.6356899344921114 test_loss: 2.539951515197754
epoch: 90 training_loss 2.6479012846946715 test_loss: 2.4231637954711913
epoch: 91 training_loss 2.5000626814365385 test_loss: 2.39528751373291
epoch: 92 training_loss 2.454571123123169 test_loss: 2.3966737747192384
epoch: 93 training_loss 2.6690283823013305 test_loss: 2.5196786880493165
epoch: 94 training_loss 2.4236345505714416 test_loss: 2.417911911010742
epoch: 95 training_loss 2.4609957122802735 test_loss: 2.6310756683349608
epoch: 96 training_loss 2.6567336535453796 test_loss: 2.3155519485473635
epoch: 97 training_loss 2.490801932811737 test_loss: 2.5195186614990233
epoch: 98 training_loss 2.597379004955292 test_loss: 2.6268529891967773
epoch: 99 training_loss 2.4152855014801027 test_loss: 2.2447128295898438
epoch: 100 training_loss 2.2567771685123446 test_loss: 2.233961486816406
epoch: 101 training_loss 2.3521492218971254 test_loss: 2.23658447265625
epoch: 102 training_loss 2.3426406264305113 test_loss: 2.4875938415527346
epoch: 103 training_loss 2.4149626779556272 test_loss: 2.745315170288086
epoch: 104 training_loss 2.381655720472336 test_loss: 2.3162744522094725
epoch: 105 training_loss 2.3735427927970885 test_loss: 2.4540281295776367
epoch: 106 training_loss 2.3526045203208925 test_loss: 2.1562191009521485
epoch: 107 training_loss 2.3595209515094755 test_loss: 2.409429168701172
epoch: 108 training_loss 2.3774874114990237 test_loss: 2.1869152069091795
epoch: 109 training_loss 2.2037928199768064 test_loss: 2.031351852416992
epoch: 110 training_loss 2.211659653186798 test_loss: 2.4335664749145507
epoch: 111 training_loss 2.3565821695327758 test_loss: 2.4271835327148437
epoch: 112 training_loss 2.309180746078491 test_loss: 2.1538507461547853
epoch: 113 training_loss 2.188527681827545 test_loss: 2.19830379486084
epoch: 114 training_loss 2.193410600423813 test_loss: 1.9814306259155274
epoch: 115 training_loss 2.3711571407318117 test_loss: 2.1210025787353515
epoch: 116 training_loss 2.055929048061371 test_loss: 2.0204704284667967
epoch: 117 training_loss 2.0641896712779997 test_loss: 2.151966857910156
epoch: 118 training_loss 2.1197565317153932 test_loss: 1.8839591979980468
epoch: 119 training_loss 2.1782542860507963 test_loss: 2.1853561401367188
epoch: 120 training_loss 2.091099224090576 test_loss: 2.272380828857422
epoch: 121 training_loss 2.2632947957515714 test_loss: 2.9835041046142576
epoch: 122 training_loss 2.3161171638965605 test_loss: 2.7036069869995116
epoch: 123 training_loss 2.0975134634971617 test_loss: 2.0698652267456055
epoch: 124 training_loss 2.120935769081116 test_loss: 1.8813478469848632
epoch: 125 training_loss 1.967757592201233 test_loss: 1.9942691802978516
epoch: 126 training_loss 2.030382013320923 test_loss: 2.564073944091797
epoch: 127 training_loss 2.531346197128296 test_loss: 1.9048973083496095
epoch: 128 training_loss 2.0096601819992066 test_loss: 1.8575084686279297
epoch: 129 training_loss 1.9958022785186769 test_loss: 1.9768680572509765
epoch: 130 training_loss 1.962614096403122 test_loss: 1.985633087158203
epoch: 131 training_loss 1.9401131582260132 test_loss: 1.8001791000366212
epoch: 132 training_loss 2.3305936074256897 test_loss: 2.2125329971313477
epoch: 133 training_loss 2.0694529283046723 test_loss: 1.8408180236816407
epoch: 134 training_loss 2.02540610909462 test_loss: 1.7212522506713868
epoch: 135 training_loss 2.0382626247406006 test_loss: 1.8076457977294922
epoch: 136 training_loss 1.8953464484214784 test_loss: 2.350580406188965
epoch: 137 training_loss 1.9465477061271668 test_loss: 1.972731590270996
epoch: 138 training_loss 2.1472776174545287 test_loss: 2.24362735748291
epoch: 139 training_loss 2.0641928482055665 test_loss: 1.9434289932250977
epoch: 140 training_loss 2.0111119878292083 test_loss: 1.9719179153442383
epoch: 141 training_loss 2.0461853337287903 test_loss: 1.8455280303955077
epoch: 142 training_loss 2.0484018194675446 test_loss: 1.7176803588867187
epoch: 143 training_loss 1.9494667851924896 test_loss: 1.985757064819336
epoch: 144 training_loss 2.1848230278491974 test_loss: 2.339886474609375
epoch: 145 training_loss 1.944010738134384 test_loss: 1.8909404754638672
epoch: 146 training_loss 1.8312073814868928 test_loss: 1.7469436645507812
epoch: 147 training_loss 1.9802556324005127 test_loss: 1.7908060073852539
epoch: 148 training_loss 1.9298267602920531 test_loss: 1.8111446380615235
epoch: 149 training_loss 1.9657652974128723 test_loss: 1.8940631866455078
76.97318855139598
episode: 0 training return: tensor(-212.2312, device='cuda:0')
episode: 1 training return: tensor(-99.8425, device='cuda:0')
episode: 2 training return: tensor(-107.9255, device='cuda:0')
episode: 3 training return: tensor(-105.1162, device='cuda:0')
epoch: 1 test_true_pfm: 87.21225892591265 sim_pfm: -102.62865230509779
episode: 4 training return: tensor(-179.6568, device='cuda:0')
episode: 5 training return: tensor(-74.5732, device='cuda:0')
episode: 6 training return: tensor(-83.5057, device='cuda:0')
episode: 7 training return: tensor(-102.5232, device='cuda:0')
epoch: 2 test_true_pfm: 85.92789644489626 sim_pfm: -153.8112334557809
episode: 8 training return: tensor(-155.4736, device='cuda:0')
episode: 9 training return: tensor(-115.4216, device='cuda:0')
episode: 10 training return: tensor(-114.6617, device='cuda:0')
episode: 11 training return: tensor(-146.3607, device='cuda:0')
epoch: 3 test_true_pfm: 51.67602210264911 sim_pfm: -154.4297829656047
episode: 12 training return: tensor(-210.2864, device='cuda:0')
episode: 13 training return: tensor(-210.4842, device='cuda:0')
episode: 14 training return: tensor(-205.7926, device='cuda:0')
episode: 15 training return: tensor(-183.3861, device='cuda:0')
epoch: 4 test_true_pfm: 69.40075526159463 sim_pfm: -116.24277142255451
episode: 16 training return: tensor(-145.1121, device='cuda:0')
episode: 17 training return: tensor(-163.2225, device='cuda:0')
episode: 18 training return: tensor(-166.6227, device='cuda:0')
episode: 19 training return: tensor(-122.0856, device='cuda:0')
epoch: 5 test_true_pfm: 56.39576707470385 sim_pfm: -98.67947085191263
episode: 20 training return: tensor(-216.8396, device='cuda:0')
episode: 21 training return: tensor(-191.8688, device='cuda:0')
episode: 22 training return: tensor(-118.6227, device='cuda:0')
episode: 23 training return: tensor(-226.7305, device='cuda:0')
epoch: 6 test_true_pfm: 52.89880697058144 sim_pfm: -144.39940420284984
episode: 24 training return: tensor(-172.4907, device='cuda:0')
episode: 25 training return: tensor(-188.8829, device='cuda:0')
episode: 26 training return: tensor(-100.2437, device='cuda:0')
episode: 27 training return: tensor(-88.0066, device='cuda:0')
epoch: 7 test_true_pfm: 67.46736068733125 sim_pfm: -152.07860758830793
episode: 28 training return: tensor(-119.4427, device='cuda:0')
episode: 29 training return: tensor(-86.5655, device='cuda:0')
episode: 30 training return: tensor(-148.8963, device='cuda:0')
episode: 31 training return: tensor(-114.5766, device='cuda:0')
epoch: 8 test_true_pfm: 76.78783917991682 sim_pfm: -99.37858725771657
episode: 32 training return: tensor(-158.2446, device='cuda:0')
episode: 33 training return: tensor(-87.6716, device='cuda:0')
episode: 34 training return: tensor(-84.8036, device='cuda:0')
episode: 35 training return: tensor(-198.5341, device='cuda:0')
epoch: 9 test_true_pfm: 63.985317643563256 sim_pfm: -110.65245190688874
episode: 36 training return: tensor(-118.0233, device='cuda:0')
episode: 37 training return: tensor(-199.7637, device='cuda:0')
episode: 38 training return: tensor(-125.8790, device='cuda:0')
episode: 39 training return: tensor(-187.4672, device='cuda:0')
epoch: 10 test_true_pfm: 77.9938660264842 sim_pfm: -139.5710695483198
episode: 40 training return: tensor(-122.1019, device='cuda:0')
episode: 41 training return: tensor(-163.8320, device='cuda:0')
episode: 42 training return: tensor(-80.3499, device='cuda:0')
episode: 43 training return: tensor(-186.7645, device='cuda:0')
epoch: 11 test_true_pfm: 65.98189859539609 sim_pfm: -121.48269327879534
episode: 44 training return: tensor(-150.4914, device='cuda:0')
episode: 45 training return: tensor(-112.6018, device='cuda:0')
episode: 46 training return: tensor(-90.1433, device='cuda:0')
episode: 47 training return: tensor(-101.3106, device='cuda:0')
epoch: 12 test_true_pfm: 77.6905968912649 sim_pfm: -115.15136178893154
episode: 48 training return: tensor(-145.6965, device='cuda:0')
episode: 49 training return: tensor(-160.1814, device='cuda:0')
episode: 50 training return: tensor(-102.4586, device='cuda:0')
episode: 51 training return: tensor(-141.4730, device='cuda:0')
epoch: 13 test_true_pfm: 67.60378479861525 sim_pfm: -156.74020808532367
episode: 52 training return: tensor(-139.7149, device='cuda:0')
episode: 53 training return: tensor(-135.1064, device='cuda:0')
episode: 54 training return: tensor(-170.6363, device='cuda:0')
episode: 55 training return: tensor(-180.6292, device='cuda:0')
epoch: 14 test_true_pfm: 69.62439406214276 sim_pfm: -127.9853101056884
episode: 56 training return: tensor(-83.6123, device='cuda:0')
episode: 57 training return: tensor(-76.1142, device='cuda:0')
episode: 58 training return: tensor(-165.8185, device='cuda:0')
episode: 59 training return: tensor(-173.9839, device='cuda:0')
epoch: 15 test_true_pfm: 76.13796940543348 sim_pfm: -128.3229176215129
episode: 60 training return: tensor(-101.5534, device='cuda:0')
episode: 61 training return: tensor(-107.2937, device='cuda:0')
episode: 62 training return: tensor(-95.0187, device='cuda:0')
episode: 63 training return: tensor(-208.3240, device='cuda:0')
epoch: 16 test_true_pfm: 73.28806733926703 sim_pfm: -109.84057339183055
episode: 64 training return: tensor(-208.8862, device='cuda:0')
episode: 65 training return: tensor(-93.9198, device='cuda:0')
episode: 66 training return: tensor(-134.5146, device='cuda:0')
episode: 67 training return: tensor(-99.1493, device='cuda:0')
epoch: 17 test_true_pfm: 69.87510101773054 sim_pfm: -118.00287342041266
episode: 68 training return: tensor(-152.2723, device='cuda:0')
episode: 69 training return: tensor(-142.9198, device='cuda:0')
episode: 70 training return: tensor(-104.4726, device='cuda:0')
episode: 71 training return: tensor(-167.6343, device='cuda:0')
epoch: 18 test_true_pfm: 75.62793235259666 sim_pfm: -138.4985936622601
episode: 72 training return: tensor(-121.0820, device='cuda:0')
episode: 73 training return: tensor(-151.6592, device='cuda:0')
episode: 74 training return: tensor(-96.0846, device='cuda:0')
episode: 75 training return: tensor(-91.4497, device='cuda:0')
epoch: 19 test_true_pfm: 81.04365398584498 sim_pfm: -115.17285291387233
episode: 76 training return: tensor(-197.0494, device='cuda:0')
episode: 77 training return: tensor(-84.6085, device='cuda:0')
episode: 78 training return: tensor(-91.7387, device='cuda:0')
episode: 79 training return: tensor(-160.9044, device='cuda:0')
epoch: 20 test_true_pfm: 62.94593153037594 sim_pfm: -109.32481276779436
episode: 80 training return: tensor(-100.0358, device='cuda:0')
episode: 81 training return: tensor(-87.4659, device='cuda:0')
episode: 82 training return: tensor(-173.7295, device='cuda:0')
episode: 83 training return: tensor(-130.9112, device='cuda:0')
epoch: 21 test_true_pfm: 57.53772939943216 sim_pfm: -106.99686285000061
episode: 84 training return: tensor(-168.0994, device='cuda:0')
episode: 85 training return: tensor(-173.5447, device='cuda:0')
episode: 86 training return: tensor(-177.0965, device='cuda:0')
episode: 87 training return: tensor(-166.6021, device='cuda:0')
epoch: 22 test_true_pfm: 78.79335133622918 sim_pfm: -95.05151891181595
episode: 88 training return: tensor(-175.1572, device='cuda:0')
episode: 89 training return: tensor(-72.2702, device='cuda:0')
episode: 90 training return: tensor(-174.8457, device='cuda:0')
episode: 91 training return: tensor(-76.2656, device='cuda:0')
epoch: 23 test_true_pfm: 64.65923919095782 sim_pfm: -121.23627043426968
episode: 92 training return: tensor(-132.0857, device='cuda:0')
episode: 93 training return: tensor(-197.3976, device='cuda:0')
episode: 94 training return: tensor(-109.2575, device='cuda:0')
episode: 95 training return: tensor(-72.7508, device='cuda:0')
epoch: 24 test_true_pfm: 73.38550843418635 sim_pfm: -110.41159004098736
episode: 96 training return: tensor(-151.3616, device='cuda:0')
episode: 97 training return: tensor(-117.2642, device='cuda:0')
episode: 98 training return: tensor(-83.3430, device='cuda:0')
episode: 99 training return: tensor(-120.0711, device='cuda:0')
epoch: 25 test_true_pfm: 77.57336959779809 sim_pfm: -121.50508500118158
episode: 100 training return: tensor(-92.0972, device='cuda:0')
episode: 101 training return: tensor(-90.1149, device='cuda:0')
episode: 102 training return: tensor(-82.2242, device='cuda:0')
episode: 103 training return: tensor(-97.5998, device='cuda:0')
epoch: 26 test_true_pfm: 76.29520881980815 sim_pfm: -103.4523664539447
episode: 104 training return: tensor(-169.8237, device='cuda:0')
episode: 105 training return: tensor(-99.0958, device='cuda:0')
episode: 106 training return: tensor(-72.4240, device='cuda:0')
episode: 107 training return: tensor(-167.4670, device='cuda:0')
epoch: 27 test_true_pfm: 78.74881567662844 sim_pfm: -112.18152203912614
episode: 108 training return: tensor(-93.4176, device='cuda:0')
episode: 109 training return: tensor(-106.2247, device='cuda:0')
episode: 110 training return: tensor(-97.0802, device='cuda:0')
episode: 111 training return: tensor(-83.0008, device='cuda:0')
epoch: 28 test_true_pfm: 66.26068481744751 sim_pfm: -140.97743183116546
episode: 112 training return: tensor(-105.3391, device='cuda:0')
episode: 113 training return: tensor(-125.6010, device='cuda:0')
episode: 114 training return: tensor(-85.4330, device='cuda:0')
episode: 115 training return: tensor(-174.6125, device='cuda:0')
epoch: 29 test_true_pfm: 64.27106928419926 sim_pfm: -111.94011030236143
episode: 116 training return: tensor(-77.7408, device='cuda:0')
episode: 117 training return: tensor(-174.4437, device='cuda:0')
episode: 118 training return: tensor(-172.9283, device='cuda:0')
episode: 119 training return: tensor(-130.0594, device='cuda:0')
epoch: 30 test_true_pfm: 70.90295660187591 sim_pfm: -127.7687591586844
episode: 120 training return: tensor(-108.3647, device='cuda:0')
episode: 121 training return: tensor(-115.8831, device='cuda:0')
episode: 122 training return: tensor(-149.8243, device='cuda:0')
episode: 123 training return: tensor(-150.4696, device='cuda:0')
epoch: 31 test_true_pfm: 78.65408175290723 sim_pfm: -124.38765578254242
episode: 124 training return: tensor(-72.2768, device='cuda:0')
episode: 125 training return: tensor(-199.0124, device='cuda:0')
episode: 126 training return: tensor(-145.6177, device='cuda:0')
episode: 127 training return: tensor(-194.2816, device='cuda:0')
epoch: 32 test_true_pfm: 79.0175656772396 sim_pfm: -136.5454801830114
episode: 128 training return: tensor(-116.7728, device='cuda:0')
episode: 129 training return: tensor(-93.2897, device='cuda:0')
episode: 130 training return: tensor(-204.5073, device='cuda:0')
episode: 131 training return: tensor(-204.4477, device='cuda:0')
epoch: 33 test_true_pfm: 66.18619392535867 sim_pfm: -112.41833222273854
episode: 132 training return: tensor(-143.1111, device='cuda:0')
episode: 133 training return: tensor(-131.3928, device='cuda:0')
episode: 134 training return: tensor(-99.3385, device='cuda:0')
episode: 135 training return: tensor(-111.7074, device='cuda:0')
epoch: 34 test_true_pfm: 74.3330744200271 sim_pfm: -105.46659228713834
episode: 136 training return: tensor(-200.6944, device='cuda:0')
episode: 137 training return: tensor(-96.4458, device='cuda:0')
episode: 138 training return: tensor(-120.8384, device='cuda:0')
episode: 139 training return: tensor(-104.8463, device='cuda:0')
epoch: 35 test_true_pfm: 78.2399075585441 sim_pfm: -126.78958672397421
episode: 140 training return: tensor(-144.1755, device='cuda:0')
episode: 141 training return: tensor(-97.8666, device='cuda:0')
episode: 142 training return: tensor(-177.5852, device='cuda:0')
episode: 143 training return: tensor(-114.7759, device='cuda:0')
epoch: 36 test_true_pfm: 74.27936675942217 sim_pfm: -106.48063889428741
episode: 144 training return: tensor(-104.5552, device='cuda:0')
episode: 145 training return: tensor(-83.9668, device='cuda:0')
episode: 146 training return: tensor(-153.6924, device='cuda:0')
episode: 147 training return: tensor(-166.7454, device='cuda:0')
epoch: 37 test_true_pfm: 66.67604827587255 sim_pfm: -116.25728219823796
episode: 148 training return: tensor(-193.7449, device='cuda:0')
episode: 149 training return: tensor(-162.6745, device='cuda:0')
episode: 150 training return: tensor(-112.1163, device='cuda:0')
episode: 151 training return: tensor(-172.0003, device='cuda:0')
epoch: 38 test_true_pfm: 55.14684727748808 sim_pfm: -160.28591665783898
episode: 152 training return: tensor(-146.5269, device='cuda:0')
episode: 153 training return: tensor(-159.2151, device='cuda:0')
episode: 154 training return: tensor(-120.1801, device='cuda:0')
episode: 155 training return: tensor(-99.3817, device='cuda:0')
epoch: 39 test_true_pfm: 66.56247468924028 sim_pfm: -144.86091718205716
episode: 156 training return: tensor(-117.1992, device='cuda:0')
episode: 157 training return: tensor(-110.0819, device='cuda:0')
episode: 158 training return: tensor(-174.9675, device='cuda:0')
episode: 159 training return: tensor(-113.1282, device='cuda:0')
epoch: 40 test_true_pfm: 61.47221185083906 sim_pfm: -132.76595406713895
episode: 160 training return: tensor(-128.6048, device='cuda:0')
episode: 161 training return: tensor(-205.9327, device='cuda:0')
episode: 162 training return: tensor(-95.4836, device='cuda:0')
episode: 163 training return: tensor(-153.7502, device='cuda:0')
epoch: 41 test_true_pfm: 78.7026856602569 sim_pfm: -135.01369210483972
episode: 164 training return: tensor(-147.6098, device='cuda:0')
episode: 165 training return: tensor(-98.5653, device='cuda:0')
episode: 166 training return: tensor(-159.3754, device='cuda:0')
episode: 167 training return: tensor(-101.6525, device='cuda:0')
epoch: 42 test_true_pfm: 74.89237031306358 sim_pfm: -119.142375472805
episode: 168 training return: tensor(-205.5708, device='cuda:0')
episode: 169 training return: tensor(-82.6256, device='cuda:0')
episode: 170 training return: tensor(-85.3957, device='cuda:0')
episode: 171 training return: tensor(-77.1444, device='cuda:0')
epoch: 43 test_true_pfm: 71.92302430484656 sim_pfm: -151.20212584051768
episode: 172 training return: tensor(-171.7224, device='cuda:0')
episode: 173 training return: tensor(-115.4983, device='cuda:0')
episode: 174 training return: tensor(-90.4736, device='cuda:0')
episode: 175 training return: tensor(-173.3744, device='cuda:0')
epoch: 44 test_true_pfm: 84.16739738004503 sim_pfm: -105.25137072298094
episode: 176 training return: tensor(-91.4244, device='cuda:0')
episode: 177 training return: tensor(-104.6499, device='cuda:0')
episode: 178 training return: tensor(-104.7348, device='cuda:0')
episode: 179 training return: tensor(-87.0705, device='cuda:0')
epoch: 45 test_true_pfm: 77.59243028240954 sim_pfm: -106.0092632408021
episode: 180 training return: tensor(-100.1294, device='cuda:0')
episode: 181 training return: tensor(-104.3239, device='cuda:0')
episode: 182 training return: tensor(-168.4258, device='cuda:0')
episode: 183 training return: tensor(-172.0607, device='cuda:0')
epoch: 46 test_true_pfm: 82.8197720487639 sim_pfm: -107.92665398850804
episode: 184 training return: tensor(-98.5223, device='cuda:0')
episode: 185 training return: tensor(-96.6632, device='cuda:0')
episode: 186 training return: tensor(-108.5687, device='cuda:0')
episode: 187 training return: tensor(-196.9191, device='cuda:0')
epoch: 47 test_true_pfm: 69.23371159249555 sim_pfm: -106.30989276022883
episode: 188 training return: tensor(-152.8529, device='cuda:0')
episode: 189 training return: tensor(-107.2404, device='cuda:0')
episode: 190 training return: tensor(-69.8430, device='cuda:0')
episode: 191 training return: tensor(-46.8698, device='cuda:0')
epoch: 48 test_true_pfm: 71.41878874166471 sim_pfm: -103.80897022483987
episode: 192 training return: tensor(-121.8287, device='cuda:0')
episode: 193 training return: tensor(-181.0203, device='cuda:0')
episode: 194 training return: tensor(-105.4136, device='cuda:0')
episode: 195 training return: tensor(-91.6872, device='cuda:0')
epoch: 49 test_true_pfm: 61.38516652465738 sim_pfm: -129.94061857688357
episode: 196 training return: tensor(-93.6447, device='cuda:0')
episode: 197 training return: tensor(-90.0935, device='cuda:0')
episode: 198 training return: tensor(-152.6740, device='cuda:0')
episode: 199 training return: tensor(-85.9422, device='cuda:0')
epoch: 50 test_true_pfm: 71.2792739560628 sim_pfm: -119.41394673603354
episode: 200 training return: tensor(-138.4766, device='cuda:0')
episode: 201 training return: tensor(-78.3869, device='cuda:0')
episode: 202 training return: tensor(-115.0809, device='cuda:0')
episode: 203 training return: tensor(-113.5059, device='cuda:0')
epoch: 51 test_true_pfm: 69.14906579442786 sim_pfm: -140.54218518113484
episode: 204 training return: tensor(-179.9599, device='cuda:0')
episode: 205 training return: tensor(-86.8847, device='cuda:0')
episode: 206 training return: tensor(-75.0560, device='cuda:0')
episode: 207 training return: tensor(-187.8286, device='cuda:0')
epoch: 52 test_true_pfm: 71.07110237835505 sim_pfm: -146.94835038977908
episode: 208 training return: tensor(-202.2246, device='cuda:0')
episode: 209 training return: tensor(-197.6480, device='cuda:0')
episode: 210 training return: tensor(-103.2408, device='cuda:0')
episode: 211 training return: tensor(-141.7898, device='cuda:0')
epoch: 53 test_true_pfm: 67.89006654669012 sim_pfm: -86.28684932426549
episode: 212 training return: tensor(-137.4537, device='cuda:0')
episode: 213 training return: tensor(-135.7826, device='cuda:0')
episode: 214 training return: tensor(-99.7525, device='cuda:0')
episode: 215 training return: tensor(-102.9863, device='cuda:0')
epoch: 54 test_true_pfm: 66.51687180025696 sim_pfm: -135.11592324824886
episode: 216 training return: tensor(-116.9328, device='cuda:0')
episode: 217 training return: tensor(-114.7922, device='cuda:0')
episode: 218 training return: tensor(-193.7619, device='cuda:0')
episode: 219 training return: tensor(-109.0421, device='cuda:0')
epoch: 55 test_true_pfm: 53.1709308359669 sim_pfm: -117.69134086074773
episode: 220 training return: tensor(-84.6595, device='cuda:0')
episode: 221 training return: tensor(-103.6425, device='cuda:0')
episode: 222 training return: tensor(-196.6576, device='cuda:0')
episode: 223 training return: tensor(-150.7761, device='cuda:0')
epoch: 56 test_true_pfm: 67.40299504397277 sim_pfm: -104.93429632327752
episode: 224 training return: tensor(-123.5235, device='cuda:0')
episode: 225 training return: tensor(-186.7660, device='cuda:0')
episode: 226 training return: tensor(-90.2705, device='cuda:0')
episode: 227 training return: tensor(-106.0354, device='cuda:0')
epoch: 57 test_true_pfm: 76.3514630712663 sim_pfm: -137.42883491810645
episode: 228 training return: tensor(-192.9251, device='cuda:0')
episode: 229 training return: tensor(-191.4297, device='cuda:0')
episode: 230 training return: tensor(-167.4017, device='cuda:0')
episode: 231 training return: tensor(-94.1126, device='cuda:0')
epoch: 58 test_true_pfm: 60.97262787988261 sim_pfm: -137.8996450141887
episode: 232 training return: tensor(-86.0619, device='cuda:0')
episode: 233 training return: tensor(-120.2617, device='cuda:0')
episode: 234 training return: tensor(-104.9875, device='cuda:0')
episode: 235 training return: tensor(-98.7518, device='cuda:0')
epoch: 59 test_true_pfm: 77.73839345575101 sim_pfm: -128.4743709764909
episode: 236 training return: tensor(-146.8643, device='cuda:0')
episode: 237 training return: tensor(-167.2459, device='cuda:0')
episode: 238 training return: tensor(-81.2871, device='cuda:0')
episode: 239 training return: tensor(-97.5271, device='cuda:0')
epoch: 60 test_true_pfm: 56.02630075874191 sim_pfm: -97.7197088738787
episode: 240 training return: tensor(-196.7242, device='cuda:0')
episode: 241 training return: tensor(-91.3888, device='cuda:0')
episode: 242 training return: tensor(-98.5751, device='cuda:0')
episode: 243 training return: tensor(-175.2897, device='cuda:0')
epoch: 61 test_true_pfm: 73.50405926963285 sim_pfm: -104.25559040302178
episode: 244 training return: tensor(-190.2086, device='cuda:0')
episode: 245 training return: tensor(-112.3451, device='cuda:0')
episode: 246 training return: tensor(-117.7262, device='cuda:0')
episode: 247 training return: tensor(-114.1073, device='cuda:0')
epoch: 62 test_true_pfm: 77.59182692665776 sim_pfm: -123.20958614074625
episode: 248 training return: tensor(-124.5483, device='cuda:0')
episode: 249 training return: tensor(-95.0585, device='cuda:0')
episode: 250 training return: tensor(-98.1223, device='cuda:0')
episode: 251 training return: tensor(-97.9537, device='cuda:0')
epoch: 63 test_true_pfm: 64.73413105875281 sim_pfm: -102.14224779491197
episode: 252 training return: tensor(-98.9545, device='cuda:0')
episode: 253 training return: tensor(-102.0383, device='cuda:0')
episode: 254 training return: tensor(-105.0473, device='cuda:0')
episode: 255 training return: tensor(-158.4072, device='cuda:0')
epoch: 64 test_true_pfm: 67.20698713483635 sim_pfm: -150.86448681161272
episode: 256 training return: tensor(-150.8423, device='cuda:0')
episode: 257 training return: tensor(-168.8291, device='cuda:0')
episode: 258 training return: tensor(-92.1104, device='cuda:0')
episode: 259 training return: tensor(-193.7592, device='cuda:0')
epoch: 65 test_true_pfm: 76.49999681895872 sim_pfm: -113.20885964421905
episode: 260 training return: tensor(-103.5397, device='cuda:0')
episode: 261 training return: tensor(-123.6684, device='cuda:0')
episode: 262 training return: tensor(-98.8396, device='cuda:0')
episode: 263 training return: tensor(-91.7755, device='cuda:0')
epoch: 66 test_true_pfm: 87.29411988833883 sim_pfm: -110.80979118383257
episode: 264 training return: tensor(-184.9631, device='cuda:0')
episode: 265 training return: tensor(-93.6591, device='cuda:0')
episode: 266 training return: tensor(-200.2073, device='cuda:0')
episode: 267 training return: tensor(-97.3576, device='cuda:0')
epoch: 67 test_true_pfm: 55.114654161748675 sim_pfm: -115.75803034858545
episode: 268 training return: tensor(-109.7318, device='cuda:0')
episode: 269 training return: tensor(-98.9732, device='cuda:0')
episode: 270 training return: tensor(-101.6214, device='cuda:0')
episode: 271 training return: tensor(-7.5224, device='cuda:0')
epoch: 68 test_true_pfm: 70.40523054381991 sim_pfm: -159.02718434254638
episode: 272 training return: tensor(-100.1774, device='cuda:0')
episode: 273 training return: tensor(-98.0317, device='cuda:0')
episode: 274 training return: tensor(-120.4622, device='cuda:0')
episode: 275 training return: tensor(-49.1907, device='cuda:0')
epoch: 69 test_true_pfm: 78.21959044797747 sim_pfm: -98.66498925609048
episode: 276 training return: tensor(-93.2063, device='cuda:0')
episode: 277 training return: tensor(-18.4141, device='cuda:0')
episode: 278 training return: tensor(-113.0080, device='cuda:0')
episode: 279 training return: tensor(-144.4745, device='cuda:0')
epoch: 70 test_true_pfm: 79.29110250181807 sim_pfm: -88.93492940937286
episode: 280 training return: tensor(-155.9677, device='cuda:0')
episode: 281 training return: tensor(-110.0812, device='cuda:0')
episode: 282 training return: tensor(-205.3374, device='cuda:0')
episode: 283 training return: tensor(-184.3801, device='cuda:0')
epoch: 71 test_true_pfm: 73.2901384167441 sim_pfm: -104.33764791638241
episode: 284 training return: tensor(-93.9405, device='cuda:0')
episode: 285 training return: tensor(-109.5922, device='cuda:0')
episode: 286 training return: tensor(-81.4263, device='cuda:0')
episode: 287 training return: tensor(-109.7891, device='cuda:0')
epoch: 72 test_true_pfm: 73.97972768823925 sim_pfm: -117.64509199525347
episode: 288 training return: tensor(-94.1836, device='cuda:0')
episode: 289 training return: tensor(-95.2863, device='cuda:0')
episode: 290 training return: tensor(-94.7479, device='cuda:0')
episode: 291 training return: tensor(-104.4208, device='cuda:0')
epoch: 73 test_true_pfm: 57.58416432075056 sim_pfm: -109.77198826236418
episode: 292 training return: tensor(-132.9499, device='cuda:0')
episode: 293 training return: tensor(-97.6100, device='cuda:0')
episode: 294 training return: tensor(-201.2919, device='cuda:0')
episode: 295 training return: tensor(-88.8238, device='cuda:0')
epoch: 74 test_true_pfm: 61.27721359851388 sim_pfm: -137.3944682013942
episode: 296 training return: tensor(-109.5803, device='cuda:0')
episode: 297 training return: tensor(-102.5754, device='cuda:0')
episode: 298 training return: tensor(-38.2692, device='cuda:0')
episode: 299 training return: tensor(-163.5210, device='cuda:0')
epoch: 75 test_true_pfm: 74.35248904248797 sim_pfm: -51.98151330400142
episode: 300 training return: tensor(-103.4947, device='cuda:0')
episode: 301 training return: tensor(-92.1847, device='cuda:0')
episode: 302 training return: tensor(-100.8745, device='cuda:0')
episode: 303 training return: tensor(-189.1538, device='cuda:0')
epoch: 76 test_true_pfm: 88.29243901557888 sim_pfm: -165.3559019604465
episode: 304 training return: tensor(-95.8755, device='cuda:0')
episode: 305 training return: tensor(-154.3319, device='cuda:0')
episode: 306 training return: tensor(-94.0657, device='cuda:0')
episode: 307 training return: tensor(-93.2926, device='cuda:0')
epoch: 77 test_true_pfm: 87.73973239771027 sim_pfm: -111.724596924457
episode: 308 training return: tensor(-56.6134, device='cuda:0')
episode: 309 training return: tensor(-77.7798, device='cuda:0')
episode: 310 training return: tensor(-93.5601, device='cuda:0')
episode: 311 training return: tensor(-107.4536, device='cuda:0')
epoch: 78 test_true_pfm: 75.20046121384259 sim_pfm: -90.81115959900198
episode: 312 training return: tensor(-159.9273, device='cuda:0')
episode: 313 training return: tensor(-61.2715, device='cuda:0')
episode: 314 training return: tensor(-204.0033, device='cuda:0')
episode: 315 training return: tensor(-85.2397, device='cuda:0')
epoch: 79 test_true_pfm: 71.21718352075015 sim_pfm: -95.09012755565928
episode: 316 training return: tensor(-89.0189, device='cuda:0')
episode: 317 training return: tensor(-72.8280, device='cuda:0')
episode: 318 training return: tensor(-168.4691, device='cuda:0')
episode: 319 training return: tensor(-93.3345, device='cuda:0')
epoch: 80 test_true_pfm: 61.92931357667307 sim_pfm: -149.387127832626
episode: 320 training return: tensor(-92.7793, device='cuda:0')
episode: 321 training return: tensor(-92.1595, device='cuda:0')
episode: 322 training return: tensor(-94.8195, device='cuda:0')
episode: 323 training return: tensor(-197.6365, device='cuda:0')
epoch: 81 test_true_pfm: 61.16605457938571 sim_pfm: -112.58132326932392
episode: 324 training return: tensor(-52.7104, device='cuda:0')
episode: 325 training return: tensor(-139.7512, device='cuda:0')
episode: 326 training return: tensor(-85.5653, device='cuda:0')
episode: 327 training return: tensor(-113.3439, device='cuda:0')
epoch: 82 test_true_pfm: 72.88184043700139 sim_pfm: -116.75902909986908
episode: 328 training return: tensor(-86.5461, device='cuda:0')
episode: 329 training return: tensor(-93.0378, device='cuda:0')
episode: 330 training return: tensor(-36.2671, device='cuda:0')
episode: 331 training return: tensor(-96.9861, device='cuda:0')
epoch: 83 test_true_pfm: 80.12640338445348 sim_pfm: -93.98819179726415
episode: 332 training return: tensor(21.2417, device='cuda:0')
episode: 333 training return: tensor(-50.4818, device='cuda:0')
episode: 334 training return: tensor(-143.5359, device='cuda:0')
episode: 335 training return: tensor(-200.0118, device='cuda:0')
epoch: 84 test_true_pfm: 69.91915605740833 sim_pfm: -110.07074444730533
episode: 336 training return: tensor(-122.6836, device='cuda:0')
episode: 337 training return: tensor(-87.0332, device='cuda:0')
episode: 338 training return: tensor(-91.8752, device='cuda:0')
episode: 339 training return: tensor(-95.1150, device='cuda:0')
epoch: 85 test_true_pfm: 73.10811631456768 sim_pfm: -94.15033842273988
episode: 340 training return: tensor(-60.6702, device='cuda:0')
episode: 341 training return: tensor(32.6718, device='cuda:0')
episode: 342 training return: tensor(-80.7250, device='cuda:0')
episode: 343 training return: tensor(-185.7913, device='cuda:0')
epoch: 86 test_true_pfm: 62.85110405052535 sim_pfm: -127.07632313104696
episode: 344 training return: tensor(-1.1131, device='cuda:0')
episode: 345 training return: tensor(-79.1407, device='cuda:0')
episode: 346 training return: tensor(-85.6713, device='cuda:0')
episode: 347 training return: tensor(-55.9086, device='cuda:0')
epoch: 87 test_true_pfm: 51.09643714105086 sim_pfm: -95.1656859352137
episode: 348 training return: tensor(-190.2692, device='cuda:0')
episode: 349 training return: tensor(-106.7942, device='cuda:0')
episode: 350 training return: tensor(-26.9801, device='cuda:0')
episode: 351 training return: tensor(-145.2817, device='cuda:0')
epoch: 88 test_true_pfm: 78.85748333417575 sim_pfm: -110.66221925507998
episode: 352 training return: tensor(-85.8156, device='cuda:0')
episode: 353 training return: tensor(-50.8633, device='cuda:0')
episode: 354 training return: tensor(-128.3521, device='cuda:0')
episode: 355 training return: tensor(-82.2053, device='cuda:0')
epoch: 89 test_true_pfm: 60.62842535397431 sim_pfm: -101.93933601590689
episode: 356 training return: tensor(-195.4831, device='cuda:0')
episode: 357 training return: tensor(-81.4557, device='cuda:0')
episode: 358 training return: tensor(-80.4016, device='cuda:0')
episode: 359 training return: tensor(-203.6556, device='cuda:0')
epoch: 90 test_true_pfm: 73.16020047674124 sim_pfm: -78.96336692639161
episode: 360 training return: tensor(-187.5014, device='cuda:0')
episode: 361 training return: tensor(-102.3208, device='cuda:0')
episode: 362 training return: tensor(-17.6560, device='cuda:0')
episode: 363 training return: tensor(-172.2303, device='cuda:0')
epoch: 91 test_true_pfm: 75.42874792072641 sim_pfm: -146.62527773345937
episode: 364 training return: tensor(-195.9573, device='cuda:0')
episode: 365 training return: tensor(-88.1863, device='cuda:0')
episode: 366 training return: tensor(-84.1877, device='cuda:0')
episode: 367 training return: tensor(-91.6649, device='cuda:0')
epoch: 92 test_true_pfm: 87.81995471374077 sim_pfm: -126.1651637923438
episode: 368 training return: tensor(-168.5775, device='cuda:0')
episode: 369 training return: tensor(-78.8792, device='cuda:0')
episode: 370 training return: tensor(-151.9555, device='cuda:0')
episode: 371 training return: tensor(-145.0097, device='cuda:0')
epoch: 93 test_true_pfm: 71.43708738879009 sim_pfm: -105.24715711930767
episode: 372 training return: tensor(-138.3000, device='cuda:0')
episode: 373 training return: tensor(-90.9067, device='cuda:0')
episode: 374 training return: tensor(-82.4975, device='cuda:0')
episode: 375 training return: tensor(-106.9794, device='cuda:0')
epoch: 94 test_true_pfm: 59.50546005390511 sim_pfm: -105.40210851344163
episode: 376 training return: tensor(-202.4370, device='cuda:0')
episode: 377 training return: tensor(-184.9689, device='cuda:0')
episode: 378 training return: tensor(-89.2766, device='cuda:0')
episode: 379 training return: tensor(-38.0315, device='cuda:0')
epoch: 95 test_true_pfm: 50.56212365791613 sim_pfm: -89.05321356360801
episode: 380 training return: tensor(-74.5379, device='cuda:0')
episode: 381 training return: tensor(33.6130, device='cuda:0')
episode: 382 training return: tensor(-69.5501, device='cuda:0')
episode: 383 training return: tensor(-81.6270, device='cuda:0')
epoch: 96 test_true_pfm: 73.0419551286407 sim_pfm: -86.10789993684156
episode: 384 training return: tensor(-87.7086, device='cuda:0')
episode: 385 training return: tensor(-196.7289, device='cuda:0')
episode: 386 training return: tensor(-202.3405, device='cuda:0')
episode: 387 training return: tensor(-96.6111, device='cuda:0')
epoch: 97 test_true_pfm: 86.46574426015671 sim_pfm: -67.02683753157035
episode: 388 training return: tensor(-163.7943, device='cuda:0')
episode: 389 training return: tensor(-97.1146, device='cuda:0')
episode: 390 training return: tensor(-185.6637, device='cuda:0')
episode: 391 training return: tensor(-27.4522, device='cuda:0')
epoch: 98 test_true_pfm: 84.00277197810098 sim_pfm: -58.868243454780895
episode: 392 training return: tensor(-86.7164, device='cuda:0')
episode: 393 training return: tensor(29.3106, device='cuda:0')
episode: 394 training return: tensor(-78.2382, device='cuda:0')
episode: 395 training return: tensor(-205.0686, device='cuda:0')
epoch: 99 test_true_pfm: 80.26360350163003 sim_pfm: -95.28521480928175
episode: 396 training return: tensor(8.3109, device='cuda:0')
episode: 397 training return: tensor(-49.6704, device='cuda:0')
episode: 398 training return: tensor(-138.0137, device='cuda:0')
episode: 399 training return: tensor(-87.9060, device='cuda:0')
epoch: 100 test_true_pfm: 79.63976063831285 sim_pfm: -99.04941600820749
episode: 400 training return: tensor(-105.2106, device='cuda:0')
episode: 401 training return: tensor(-0.6985, device='cuda:0')
episode: 402 training return: tensor(-188.5252, device='cuda:0')
episode: 403 training return: tensor(-204.2905, device='cuda:0')
epoch: 101 test_true_pfm: 80.73385130767046 sim_pfm: -163.29492653023334
episode: 404 training return: tensor(-102.1980, device='cuda:0')
episode: 405 training return: tensor(-208.5467, device='cuda:0')
episode: 406 training return: tensor(-84.7818, device='cuda:0')
episode: 407 training return: tensor(-172.5773, device='cuda:0')
epoch: 102 test_true_pfm: 63.61809110245072 sim_pfm: -72.38288109598216
episode: 408 training return: tensor(-29.7925, device='cuda:0')
episode: 409 training return: tensor(-125.5849, device='cuda:0')
episode: 410 training return: tensor(-125.8639, device='cuda:0')
episode: 411 training return: tensor(-49.8335, device='cuda:0')
epoch: 103 test_true_pfm: 79.75336200507715 sim_pfm: -134.28148275945568
episode: 412 training return: tensor(48.8743, device='cuda:0')
episode: 413 training return: tensor(-152.1831, device='cuda:0')
episode: 414 training return: tensor(-98.5432, device='cuda:0')
episode: 415 training return: tensor(-94.8334, device='cuda:0')
epoch: 104 test_true_pfm: 79.64117966736765 sim_pfm: -99.73477157218149
episode: 416 training return: tensor(-69.8939, device='cuda:0')
episode: 417 training return: tensor(-99.8319, device='cuda:0')
episode: 418 training return: tensor(-173.6676, device='cuda:0')
episode: 419 training return: tensor(-85.0162, device='cuda:0')
epoch: 105 test_true_pfm: 83.73271088448578 sim_pfm: -144.86379533921718
episode: 420 training return: tensor(-98.8268, device='cuda:0')
episode: 421 training return: tensor(-79.6740, device='cuda:0')
episode: 422 training return: tensor(-80.0957, device='cuda:0')
episode: 423 training return: tensor(-90.1013, device='cuda:0')
epoch: 106 test_true_pfm: 77.35856117526494 sim_pfm: -81.06396353155723
episode: 424 training return: tensor(-166.8001, device='cuda:0')
episode: 425 training return: tensor(-109.4212, device='cuda:0')
episode: 426 training return: tensor(37.1547, device='cuda:0')
episode: 427 training return: tensor(-85.4638, device='cuda:0')
epoch: 107 test_true_pfm: 72.7096602558833 sim_pfm: -119.16985813867068
episode: 428 training return: tensor(-45.7741, device='cuda:0')
episode: 429 training return: tensor(-98.0229, device='cuda:0')
episode: 430 training return: tensor(-46.8381, device='cuda:0')
episode: 431 training return: tensor(-117.8399, device='cuda:0')
epoch: 108 test_true_pfm: 80.6685040665075 sim_pfm: -118.48470538521069
episode: 432 training return: tensor(-89.4236, device='cuda:0')
episode: 433 training return: tensor(-171.6070, device='cuda:0')
episode: 434 training return: tensor(-81.5793, device='cuda:0')
episode: 435 training return: tensor(-90.8636, device='cuda:0')
epoch: 109 test_true_pfm: 78.84827882523352 sim_pfm: -69.74358274198603
episode: 436 training return: tensor(-112.2211, device='cuda:0')
episode: 437 training return: tensor(-83.4723, device='cuda:0')
episode: 438 training return: tensor(-140.8882, device='cuda:0')
episode: 439 training return: tensor(-193.3938, device='cuda:0')
epoch: 110 test_true_pfm: 75.72198934028567 sim_pfm: -89.95958584813052
episode: 440 training return: tensor(-78.4591, device='cuda:0')
episode: 441 training return: tensor(-88.8355, device='cuda:0')
episode: 442 training return: tensor(-173.1364, device='cuda:0')
episode: 443 training return: tensor(-139.1740, device='cuda:0')
epoch: 111 test_true_pfm: 72.61294162825122 sim_pfm: -81.66017253227183
episode: 444 training return: tensor(-91.0795, device='cuda:0')
episode: 445 training return: tensor(-192.0858, device='cuda:0')
episode: 446 training return: tensor(-77.4908, device='cuda:0')
episode: 447 training return: tensor(-92.9517, device='cuda:0')
epoch: 112 test_true_pfm: 71.62924659056577 sim_pfm: -91.69003748827382
episode: 448 training return: tensor(-176.8692, device='cuda:0')
episode: 449 training return: tensor(-37.4183, device='cuda:0')
episode: 450 training return: tensor(-99.6257, device='cuda:0')
episode: 451 training return: tensor(-90.5902, device='cuda:0')
epoch: 113 test_true_pfm: 72.83426473223676 sim_pfm: -62.94949652614305
episode: 452 training return: tensor(-120.3443, device='cuda:0')
episode: 453 training return: tensor(-84.5246, device='cuda:0')
episode: 454 training return: tensor(-78.7714, device='cuda:0')
episode: 455 training return: tensor(-144.4597, device='cuda:0')
epoch: 114 test_true_pfm: 71.7074013016399 sim_pfm: -66.46171331873629
episode: 456 training return: tensor(-110.5890, device='cuda:0')
episode: 457 training return: tensor(-80.0586, device='cuda:0')
episode: 458 training return: tensor(38.0261, device='cuda:0')
episode: 459 training return: tensor(-95.2159, device='cuda:0')
epoch: 115 test_true_pfm: 74.11881365734361 sim_pfm: -79.02503045575577
episode: 460 training return: tensor(50.4056, device='cuda:0')
episode: 461 training return: tensor(-64.9541, device='cuda:0')
episode: 462 training return: tensor(13.7543, device='cuda:0')
episode: 463 training return: tensor(-165.0417, device='cuda:0')
epoch: 116 test_true_pfm: 87.05588521378344 sim_pfm: -92.44270306405379
episode: 464 training return: tensor(-83.0706, device='cuda:0')
episode: 465 training return: tensor(-96.2312, device='cuda:0')
episode: 466 training return: tensor(-94.4121, device='cuda:0')
episode: 467 training return: tensor(-150.0838, device='cuda:0')
epoch: 117 test_true_pfm: 69.91487991866184 sim_pfm: -110.9601964189962
episode: 468 training return: tensor(-136.8398, device='cuda:0')
episode: 469 training return: tensor(-192.2379, device='cuda:0')
episode: 470 training return: tensor(-13.3702, device='cuda:0')
episode: 471 training return: tensor(-101.5504, device='cuda:0')
epoch: 118 test_true_pfm: 76.39977215415091 sim_pfm: -74.23580873415922
episode: 472 training return: tensor(-89.0080, device='cuda:0')
episode: 473 training return: tensor(-174.3507, device='cuda:0')
episode: 474 training return: tensor(-105.7143, device='cuda:0')
episode: 475 training return: tensor(-183.5853, device='cuda:0')
epoch: 119 test_true_pfm: 75.90612405174467 sim_pfm: -82.47970826529199
episode: 476 training return: tensor(-64.0231, device='cuda:0')
episode: 477 training return: tensor(-37.0383, device='cuda:0')
episode: 478 training return: tensor(-107.8686, device='cuda:0')
episode: 479 training return: tensor(-180.5766, device='cuda:0')
epoch: 120 test_true_pfm: 70.46251343697875 sim_pfm: -71.54784618548584
episode: 480 training return: tensor(-176.3919, device='cuda:0')
episode: 481 training return: tensor(-40.9534, device='cuda:0')
episode: 482 training return: tensor(-196.9149, device='cuda:0')
episode: 483 training return: tensor(-86.7264, device='cuda:0')
epoch: 121 test_true_pfm: 52.985063666862516 sim_pfm: -114.16741477941396
episode: 484 training return: tensor(70.8359, device='cuda:0')
episode: 485 training return: tensor(-70.6575, device='cuda:0')
episode: 486 training return: tensor(-42.0603, device='cuda:0')
episode: 487 training return: tensor(-157.7581, device='cuda:0')
epoch: 122 test_true_pfm: 69.0418357765609 sim_pfm: -120.97245344916591
episode: 488 training return: tensor(-181.0288, device='cuda:0')
episode: 489 training return: tensor(-105.3466, device='cuda:0')
episode: 490 training return: tensor(-46.5755, device='cuda:0')
episode: 491 training return: tensor(-107.6223, device='cuda:0')
epoch: 123 test_true_pfm: 90.00045710252206 sim_pfm: -110.11243459073594
episode: 492 training return: tensor(-80.9951, device='cuda:0')
episode: 493 training return: tensor(-49.4523, device='cuda:0')
episode: 494 training return: tensor(-82.7805, device='cuda:0')
episode: 495 training return: tensor(-65.5080, device='cuda:0')
epoch: 124 test_true_pfm: 74.11447408237743 sim_pfm: -106.93649776456877
episode: 496 training return: tensor(-166.4290, device='cuda:0')
episode: 497 training return: tensor(-51.3844, device='cuda:0')
episode: 498 training return: tensor(-153.5538, device='cuda:0')
episode: 499 training return: tensor(-98.3127, device='cuda:0')
epoch: 125 test_true_pfm: 70.6450867843791 sim_pfm: -138.7448665567499
episode: 500 training return: tensor(-94.3227, device='cuda:0')
episode: 501 training return: tensor(-88.2640, device='cuda:0')
episode: 502 training return: tensor(-169.7659, device='cuda:0')
episode: 503 training return: tensor(21.1648, device='cuda:0')
epoch: 126 test_true_pfm: 93.58060783320931 sim_pfm: -73.63027709409361
episode: 504 training return: tensor(-196.5049, device='cuda:0')
episode: 505 training return: tensor(-100.7799, device='cuda:0')
episode: 506 training return: tensor(-180.9232, device='cuda:0')
episode: 507 training return: tensor(-70.3653, device='cuda:0')
epoch: 127 test_true_pfm: 72.30819103097883 sim_pfm: -98.24276969147613
episode: 508 training return: tensor(6.9007, device='cuda:0')
episode: 509 training return: tensor(-90.1358, device='cuda:0')
episode: 510 training return: tensor(-159.0623, device='cuda:0')
episode: 511 training return: tensor(-75.9515, device='cuda:0')
epoch: 128 test_true_pfm: 65.53633250765202 sim_pfm: -93.20387757834396
episode: 512 training return: tensor(-142.9475, device='cuda:0')
episode: 513 training return: tensor(-148.7318, device='cuda:0')
episode: 514 training return: tensor(-90.7562, device='cuda:0')
episode: 515 training return: tensor(-173.4032, device='cuda:0')
epoch: 129 test_true_pfm: 73.7430338642309 sim_pfm: -119.60402601000969
episode: 516 training return: tensor(-103.3715, device='cuda:0')
episode: 517 training return: tensor(92.8043, device='cuda:0')
episode: 518 training return: tensor(-166.8240, device='cuda:0')
episode: 519 training return: tensor(-81.9132, device='cuda:0')
epoch: 130 test_true_pfm: 66.04406747687361 sim_pfm: -102.42261046838249
episode: 520 training return: tensor(-87.6130, device='cuda:0')
episode: 521 training return: tensor(-36.8449, device='cuda:0')
episode: 522 training return: tensor(-75.9317, device='cuda:0')
episode: 523 training return: tensor(-82.9580, device='cuda:0')
epoch: 131 test_true_pfm: 80.87665699423599 sim_pfm: -59.60646177362069
episode: 524 training return: tensor(-103.3427, device='cuda:0')
episode: 525 training return: tensor(-116.4288, device='cuda:0')
episode: 526 training return: tensor(-164.9516, device='cuda:0')
episode: 527 training return: tensor(-100.0415, device='cuda:0')
epoch: 132 test_true_pfm: 80.3469631591803 sim_pfm: -122.16940181818791
episode: 528 training return: tensor(-116.9219, device='cuda:0')
episode: 529 training return: tensor(-158.0020, device='cuda:0')
episode: 530 training return: tensor(-34.5048, device='cuda:0')
episode: 531 training return: tensor(-89.9201, device='cuda:0')
epoch: 133 test_true_pfm: 70.15357044441689 sim_pfm: -116.1060737624066
episode: 532 training return: tensor(-101.0918, device='cuda:0')
episode: 533 training return: tensor(-79.3241, device='cuda:0')
episode: 534 training return: tensor(39.3364, device='cuda:0')
episode: 535 training return: tensor(-174.3060, device='cuda:0')
epoch: 134 test_true_pfm: 59.338130833798495 sim_pfm: -66.42386710610008
episode: 536 training return: tensor(-133.4203, device='cuda:0')
episode: 537 training return: tensor(-114.4195, device='cuda:0')
episode: 538 training return: tensor(-110.6731, device='cuda:0')
episode: 539 training return: tensor(-70.9503, device='cuda:0')
epoch: 135 test_true_pfm: 78.91403647224797 sim_pfm: -99.07846628809929
episode: 540 training return: tensor(-78.0614, device='cuda:0')
episode: 541 training return: tensor(74.0584, device='cuda:0')
episode: 542 training return: tensor(-36.6051, device='cuda:0')
episode: 543 training return: tensor(-77.0075, device='cuda:0')
epoch: 136 test_true_pfm: 80.9492278787471 sim_pfm: -130.77231583763495
episode: 544 training return: tensor(-61.7171, device='cuda:0')
episode: 545 training return: tensor(-90.2551, device='cuda:0')
episode: 546 training return: tensor(-80.6449, device='cuda:0')
episode: 547 training return: tensor(-74.3233, device='cuda:0')
epoch: 137 test_true_pfm: 88.84734718837458 sim_pfm: -82.37847297516419
episode: 548 training return: tensor(-107.7806, device='cuda:0')
episode: 549 training return: tensor(-90.2317, device='cuda:0')
episode: 550 training return: tensor(-69.3332, device='cuda:0')
episode: 551 training return: tensor(-80.1664, device='cuda:0')
epoch: 138 test_true_pfm: 89.78115272701238 sim_pfm: -117.71331499224762
episode: 552 training return: tensor(-86.8799, device='cuda:0')
episode: 553 training return: tensor(-167.2719, device='cuda:0')
episode: 554 training return: tensor(-41.5277, device='cuda:0')
episode: 555 training return: tensor(-46.5494, device='cuda:0')
epoch: 139 test_true_pfm: 69.95771884483152 sim_pfm: -84.97557578971028
episode: 556 training return: tensor(-65.2588, device='cuda:0')
episode: 557 training return: tensor(-98.8507, device='cuda:0')
episode: 558 training return: tensor(-84.6226, device='cuda:0')
episode: 559 training return: tensor(-144.4344, device='cuda:0')
epoch: 140 test_true_pfm: 67.70504004215915 sim_pfm: -87.3218100719736
episode: 560 training return: tensor(9.0559, device='cuda:0')
episode: 561 training return: tensor(-30.7093, device='cuda:0')
episode: 562 training return: tensor(-11.4749, device='cuda:0')
episode: 563 training return: tensor(-75.5538, device='cuda:0')
epoch: 141 test_true_pfm: 84.83104804850156 sim_pfm: -131.18963109601754
episode: 564 training return: tensor(-168.0980, device='cuda:0')
episode: 565 training return: tensor(-79.5054, device='cuda:0')
episode: 566 training return: tensor(-104.1083, device='cuda:0')
episode: 567 training return: tensor(-83.5373, device='cuda:0')
epoch: 142 test_true_pfm: 64.46469106660959 sim_pfm: -116.32393406552146
episode: 568 training return: tensor(-174.8411, device='cuda:0')
episode: 569 training return: tensor(-83.2023, device='cuda:0')
episode: 570 training return: tensor(-53.1288, device='cuda:0')
episode: 571 training return: tensor(-98.8007, device='cuda:0')
epoch: 143 test_true_pfm: 77.4546927357122 sim_pfm: -163.8559999963094
episode: 572 training return: tensor(-107.1782, device='cuda:0')
episode: 573 training return: tensor(-83.8783, device='cuda:0')
episode: 574 training return: tensor(-171.1973, device='cuda:0')
episode: 575 training return: tensor(-92.8759, device='cuda:0')
epoch: 144 test_true_pfm: 75.53378742677876 sim_pfm: -61.27480380408233
episode: 576 training return: tensor(-98.1936, device='cuda:0')
episode: 577 training return: tensor(-161.5322, device='cuda:0')
episode: 578 training return: tensor(2.9993, device='cuda:0')
episode: 579 training return: tensor(-199.5993, device='cuda:0')
epoch: 145 test_true_pfm: 70.59010861030713 sim_pfm: -118.06790740012075
episode: 580 training return: tensor(-79.9253, device='cuda:0')
episode: 581 training return: tensor(-88.1910, device='cuda:0')
episode: 582 training return: tensor(-82.5234, device='cuda:0')
episode: 583 training return: tensor(-174.8658, device='cuda:0')
epoch: 146 test_true_pfm: 65.61344910518324 sim_pfm: -153.06403816581587
episode: 584 training return: tensor(-193.4214, device='cuda:0')
episode: 585 training return: tensor(-83.7734, device='cuda:0')
episode: 586 training return: tensor(-183.9466, device='cuda:0')
episode: 587 training return: tensor(-71.1033, device='cuda:0')
epoch: 147 test_true_pfm: 71.26234838400518 sim_pfm: -69.42030675094574
episode: 588 training return: tensor(-155.9404, device='cuda:0')
episode: 589 training return: tensor(-191.0837, device='cuda:0')
episode: 590 training return: tensor(-80.5653, device='cuda:0')
episode: 591 training return: tensor(-73.6436, device='cuda:0')
epoch: 148 test_true_pfm: 80.67909387495445 sim_pfm: -87.46855870371219
episode: 592 training return: tensor(-77.6541, device='cuda:0')
episode: 593 training return: tensor(-84.2664, device='cuda:0')
episode: 594 training return: tensor(-166.6920, device='cuda:0')
episode: 595 training return: tensor(57.4144, device='cuda:0')
epoch: 149 test_true_pfm: 78.19789530345565 sim_pfm: -61.648474898486164
episode: 596 training return: tensor(-127.6116, device='cuda:0')
episode: 597 training return: tensor(-22.6886, device='cuda:0')
episode: 598 training return: tensor(-69.3710, device='cuda:0')
episode: 599 training return: tensor(-168.0540, device='cuda:0')
epoch: 150 test_true_pfm: 73.14258542882251 sim_pfm: -70.22073080469855
