['--alg', 'ddpg', '--env', 'Swimmer-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '4']
epoch: 0 training_loss 0.45316075831651687 test_loss: 0.3122772216796875
epoch: 1 training_loss 0.2843222209811211 test_loss: 0.24683618545532227
epoch: 2 training_loss 0.25457357421517374 test_loss: 0.24248285293579103
epoch: 3 training_loss 0.2444653382897377 test_loss: 0.22967305183410644
epoch: 4 training_loss 0.24389105170965195 test_loss: 0.23722925186157226
epoch: 5 training_loss 0.22732066348195076 test_loss: 0.19939011335372925
epoch: 6 training_loss 0.24053240224719047 test_loss: 0.22519495487213134
epoch: 7 training_loss 0.22509459741413593 test_loss: 0.22097451686859132
epoch: 8 training_loss 0.2159399273991585 test_loss: 0.21775953769683837
epoch: 9 training_loss 0.20964946806430818 test_loss: 0.25107879638671876
epoch: 10 training_loss 0.21599999763071537 test_loss: 0.21109542846679688
epoch: 11 training_loss 0.2159067054092884 test_loss: 0.20483179092407228
epoch: 12 training_loss 0.21839451156556605 test_loss: 0.22360703945159913
epoch: 13 training_loss 0.20867089189589025 test_loss: 0.2175692319869995
epoch: 14 training_loss 0.20897710382938384 test_loss: 0.2040158987045288
epoch: 15 training_loss 0.21633554890751838 test_loss: 0.2144312620162964
epoch: 16 training_loss 0.20109656147658825 test_loss: 0.20302822589874267
epoch: 17 training_loss 0.20195608295500278 test_loss: 0.2051567554473877
epoch: 18 training_loss 0.21064240969717501 test_loss: 0.2134610652923584
epoch: 19 training_loss 0.2122257925570011 test_loss: 0.19991763830184936
epoch: 20 training_loss 0.20843287788331508 test_loss: 0.19934673309326173
epoch: 21 training_loss 0.19347566559910775 test_loss: 0.19831454753875732
epoch: 22 training_loss 0.20512823775410652 test_loss: 0.19487614631652833
epoch: 23 training_loss 0.21118669614195823 test_loss: 0.20760710239410402
epoch: 24 training_loss 0.19433906704187393 test_loss: 0.2179025173187256
epoch: 25 training_loss 0.2044469714909792 test_loss: 0.21979496479034424
epoch: 26 training_loss 0.201931327059865 test_loss: 0.21289436817169188
epoch: 27 training_loss 0.20090153947472572 test_loss: 0.1940779447555542
epoch: 28 training_loss 0.1973278035968542 test_loss: 0.18849608898162842
epoch: 29 training_loss 0.20171971783041953 test_loss: 0.19058102369308472
epoch: 30 training_loss 0.2010776600241661 test_loss: 0.23638429641723632
epoch: 31 training_loss 0.20081435985863208 test_loss: 0.21011743545532227
epoch: 32 training_loss 0.1948114687204361 test_loss: 0.197995924949646
epoch: 33 training_loss 0.19382027179002762 test_loss: 0.21494815349578858
epoch: 34 training_loss 0.20108996473252774 test_loss: 0.19472911357879638
epoch: 35 training_loss 0.19167334631085395 test_loss: 0.18726744651794433
epoch: 36 training_loss 0.1905782251060009 test_loss: 0.21123046875
epoch: 37 training_loss 0.19489171288907528 test_loss: 0.190901517868042
epoch: 38 training_loss 0.19719249434769154 test_loss: 0.1995886206626892
epoch: 39 training_loss 0.19419190809130668 test_loss: 0.1977674126625061
epoch: 40 training_loss 0.20235250294208526 test_loss: 0.19065345525741578
epoch: 41 training_loss 0.19220714032649994 test_loss: 0.20810408592224122
epoch: 42 training_loss 0.19252552755177021 test_loss: 0.19072928428649902
epoch: 43 training_loss 0.1929087143391371 test_loss: 0.20632102489471435
epoch: 44 training_loss 0.20108920983970166 test_loss: 0.23360757827758788
epoch: 45 training_loss 0.1976282775402069 test_loss: 0.18167393207550048
epoch: 46 training_loss 0.189525004401803 test_loss: 0.18359682559967042
epoch: 47 training_loss 0.2035630103200674 test_loss: 0.18182357549667358
epoch: 48 training_loss 0.1915982647240162 test_loss: 0.21409411430358888
epoch: 49 training_loss 0.19598440259695052 test_loss: 0.19370144605636597
epoch: 50 training_loss 0.1928909345716238 test_loss: 0.197215473651886
epoch: 51 training_loss 0.19503267839550972 test_loss: 0.1919482946395874
epoch: 52 training_loss 0.19957993179559708 test_loss: 0.18703290224075317
epoch: 53 training_loss 0.19239489041268826 test_loss: 0.18826967477798462
epoch: 54 training_loss 0.1934461285918951 test_loss: 0.21402347087860107
epoch: 55 training_loss 0.1899541024118662 test_loss: 0.18182090520858765
epoch: 56 training_loss 0.19865145534276962 test_loss: 0.20502910614013672
epoch: 57 training_loss 0.19592328235507012 test_loss: 0.1939091444015503
epoch: 58 training_loss 0.1912158478051424 test_loss: 0.19629985094070435
epoch: 59 training_loss 0.1989379369467497 test_loss: 0.20045182704925538
epoch: 60 training_loss 0.18382157377898692 test_loss: 0.17892760038375854
epoch: 61 training_loss 0.1927332355827093 test_loss: 0.1808178663253784
epoch: 62 training_loss 0.19313082933425904 test_loss: 0.19189815521240233
epoch: 63 training_loss 0.18595354683697224 test_loss: 0.1957605242729187
epoch: 64 training_loss 0.19279220163822175 test_loss: 0.19636332988739014
epoch: 65 training_loss 0.19226229771971703 test_loss: 0.18632956743240356
epoch: 66 training_loss 0.19109980680048466 test_loss: 0.19950710535049437
epoch: 67 training_loss 0.19506104692816734 test_loss: 0.17960014343261718
epoch: 68 training_loss 0.19605957433581353 test_loss: 0.17900712490081788
epoch: 69 training_loss 0.19231971345841883 test_loss: 0.187816846370697
epoch: 70 training_loss 0.19012161016464232 test_loss: 0.19722306728363037
epoch: 71 training_loss 0.19028905220329762 test_loss: 0.1914588689804077
epoch: 72 training_loss 0.1888383736461401 test_loss: 0.17806918621063234
epoch: 73 training_loss 0.18002174176275731 test_loss: 0.17118023633956908
epoch: 74 training_loss 0.19177079640328884 test_loss: 0.18219521045684814
epoch: 75 training_loss 0.1878670220822096 test_loss: 0.20879137516021729
epoch: 76 training_loss 0.19431496731936931 test_loss: 0.21245875358581542
epoch: 77 training_loss 0.1905904170125723 test_loss: 0.18445063829421998
epoch: 78 training_loss 0.18399773351848125 test_loss: 0.18689159154891968
epoch: 79 training_loss 0.18274160109460355 test_loss: 0.1736459970474243
epoch: 80 training_loss 0.1894224690645933 test_loss: 0.1974130392074585
epoch: 81 training_loss 0.18907251164317132 test_loss: 0.19054702520370484
epoch: 82 training_loss 0.18805537573993206 test_loss: 0.1940546989440918
epoch: 83 training_loss 0.188764089345932 test_loss: 0.19065475463867188
epoch: 84 training_loss 0.18638274312019348 test_loss: 0.18728803396224974
epoch: 85 training_loss 0.1921327905356884 test_loss: 0.1979292154312134
epoch: 86 training_loss 0.182754043340683 test_loss: 0.18201900720596315
epoch: 87 training_loss 0.1797404880821705 test_loss: 0.2019287347793579
epoch: 88 training_loss 0.18487354539334774 test_loss: 0.21222963333129882
epoch: 89 training_loss 0.1858938155323267 test_loss: 0.18312710523605347
epoch: 90 training_loss 0.18710040099918843 test_loss: 0.21473653316497804
epoch: 91 training_loss 0.1870433023571968 test_loss: 0.18916721343994142
epoch: 92 training_loss 0.19245648346841335 test_loss: 0.19619352817535402
epoch: 93 training_loss 0.19115021526813508 test_loss: 0.18252735137939452
epoch: 94 training_loss 0.19586214035749436 test_loss: 0.17472296953201294
epoch: 95 training_loss 0.18556622058153152 test_loss: 0.18149197101593018
epoch: 96 training_loss 0.1876547335088253 test_loss: 0.1815904498100281
epoch: 97 training_loss 0.1861225499212742 test_loss: 0.1799811005592346
epoch: 98 training_loss 0.18938130147755147 test_loss: 0.191292142868042
epoch: 99 training_loss 0.17721012987196447 test_loss: 0.16789335012435913
epoch: 100 training_loss 0.18579104162752627 test_loss: 0.18181456327438356
epoch: 101 training_loss 0.19177592158317566 test_loss: 0.1865431547164917
epoch: 102 training_loss 0.19365925788879396 test_loss: 0.18521547317504883
epoch: 103 training_loss 0.18502576403319837 test_loss: 0.17374043464660643
epoch: 104 training_loss 0.18185202896595 test_loss: 0.19572557210922242
epoch: 105 training_loss 0.18537243485450744 test_loss: 0.17317161560058594
epoch: 106 training_loss 0.18242315731942654 test_loss: 0.1784515619277954
epoch: 107 training_loss 0.18069614261388778 test_loss: 0.1814051628112793
epoch: 108 training_loss 0.1781171213835478 test_loss: 0.18578590154647828
epoch: 109 training_loss 0.17539510563015936 test_loss: 0.18341649770736695
epoch: 110 training_loss 0.186927335485816 test_loss: 0.18811075687408446
epoch: 111 training_loss 0.18284484885632993 test_loss: 0.1897303581237793
epoch: 112 training_loss 0.18913267195224762 test_loss: 0.17314524650573732
epoch: 113 training_loss 0.17952526092529297 test_loss: 0.16958695650100708
epoch: 114 training_loss 0.18315822258591652 test_loss: 0.18919694423675537
epoch: 115 training_loss 0.18572485357522964 test_loss: 0.1899530529975891
epoch: 116 training_loss 0.1861654371023178 test_loss: 0.17678886651992798
epoch: 117 training_loss 0.18338555075228213 test_loss: 0.1848389744758606
epoch: 118 training_loss 0.18363528892397882 test_loss: 0.20314009189605714
epoch: 119 training_loss 0.18328317441046238 test_loss: 0.17672542333602906
epoch: 120 training_loss 0.1824735204130411 test_loss: 0.17854611873626708
epoch: 121 training_loss 0.1818822943419218 test_loss: 0.19087331295013427
epoch: 122 training_loss 0.18819308646023272 test_loss: 0.1779239296913147
epoch: 123 training_loss 0.19029903933405876 test_loss: 0.189150869846344
epoch: 124 training_loss 0.1825935786217451 test_loss: 0.1858532667160034
epoch: 125 training_loss 0.18882317893207073 test_loss: 0.17746808528900146
epoch: 126 training_loss 0.18775448054075242 test_loss: 0.22061028480529785
epoch: 127 training_loss 0.18301512002944947 test_loss: 0.19516915082931519
epoch: 128 training_loss 0.1786177708953619 test_loss: 0.20635530948638917
epoch: 129 training_loss 0.18673279769718648 test_loss: 0.2006147623062134
epoch: 130 training_loss 0.18550690628588198 test_loss: 0.16570554971694945
epoch: 131 training_loss 0.1841848800331354 test_loss: 0.19413725137710572
epoch: 132 training_loss 0.1810827363282442 test_loss: 0.1974175214767456
epoch: 133 training_loss 0.18934396855533123 test_loss: 0.19776110649108886
epoch: 134 training_loss 0.18240393690764903 test_loss: 0.18807101249694824
epoch: 135 training_loss 0.18238032534718512 test_loss: 0.1783855676651001
epoch: 136 training_loss 0.1882396302372217 test_loss: 0.1706269145011902
epoch: 137 training_loss 0.1867037747055292 test_loss: 0.18463374376296998
epoch: 138 training_loss 0.18026565484702586 test_loss: 0.19447101354599
epoch: 139 training_loss 0.18261566527187825 test_loss: 0.1793574571609497
epoch: 140 training_loss 0.182980547696352 test_loss: 0.19677585363388062
epoch: 141 training_loss 0.179136313572526 test_loss: 0.19515016078948974
epoch: 142 training_loss 0.1784145390987396 test_loss: 0.19962115287780763
epoch: 143 training_loss 0.18639359816908838 test_loss: 0.19573501348495484
epoch: 144 training_loss 0.17838291116058827 test_loss: 0.18500723838806152
epoch: 145 training_loss 0.17604749962687494 test_loss: 0.17882694005966188
epoch: 146 training_loss 0.18001996524631977 test_loss: 0.19532605409622192
epoch: 147 training_loss 0.18231270529329777 test_loss: 0.20014286041259766
epoch: 148 training_loss 0.18156809478998184 test_loss: 0.19673819541931153
epoch: 149 training_loss 0.1866027931123972 test_loss: 0.18752470016479492
epoch: 0 training_loss 45.17325819015503 test_loss: 26.551089477539062
epoch: 1 training_loss 21.029675912857055 test_loss: 17.496292114257812
epoch: 2 training_loss 15.683658542633056 test_loss: 14.020191955566407
epoch: 3 training_loss 13.14065029144287 test_loss: 12.501579284667969
epoch: 4 training_loss 11.856392040252686 test_loss: 10.922795867919922
epoch: 5 training_loss 10.90842453956604 test_loss: 10.617848968505859
epoch: 6 training_loss 10.390440378189087 test_loss: 10.753855133056641
epoch: 7 training_loss 10.001577920913697 test_loss: 9.564299011230469
epoch: 8 training_loss 9.346680517196654 test_loss: 9.541020965576172
epoch: 9 training_loss 9.0529212808609 test_loss: 8.97109603881836
epoch: 10 training_loss 8.590177946090698 test_loss: 8.66116714477539
epoch: 11 training_loss 8.380275225639343 test_loss: 8.17079849243164
epoch: 12 training_loss 8.12426513671875 test_loss: 7.702462005615234
epoch: 13 training_loss 7.476290049552918 test_loss: 8.006444549560547
epoch: 14 training_loss 7.428630647659301 test_loss: 7.49139175415039
epoch: 15 training_loss 7.1970734167099 test_loss: 7.314859008789062
epoch: 16 training_loss 7.0187311935424805 test_loss: 7.097880554199219
epoch: 17 training_loss 6.884081082344055 test_loss: 6.826988983154297
epoch: 18 training_loss 6.619453282356262 test_loss: 6.357558822631836
epoch: 19 training_loss 6.436615772247315 test_loss: 6.90185317993164
epoch: 20 training_loss 6.384668531417847 test_loss: 6.2254291534423825
epoch: 21 training_loss 5.965352740287781 test_loss: 5.850088119506836
epoch: 22 training_loss 6.009177222251892 test_loss: 5.597372436523438
epoch: 23 training_loss 5.684349660873413 test_loss: 5.585184860229492
epoch: 24 training_loss 5.546025905609131 test_loss: 5.396114730834961
epoch: 25 training_loss 5.516240108013153 test_loss: 5.6748512268066404
epoch: 26 training_loss 5.548293542861939 test_loss: 5.025613403320312
epoch: 27 training_loss 5.234990131855011 test_loss: 5.407014083862305
epoch: 28 training_loss 5.450332493782043 test_loss: 5.696093368530273
epoch: 29 training_loss 5.035379331111908 test_loss: 5.015251541137696
epoch: 30 training_loss 4.954693250656128 test_loss: 5.151834487915039
epoch: 31 training_loss 5.05131174325943 test_loss: 4.910005950927735
epoch: 32 training_loss 4.969206809997559 test_loss: 4.963731384277343
epoch: 33 training_loss 4.807250061035156 test_loss: 4.781456756591797
epoch: 34 training_loss 4.838040511608124 test_loss: 5.194081878662109
epoch: 35 training_loss 4.853801546096801 test_loss: 4.528926849365234
epoch: 36 training_loss 4.820337870121002 test_loss: 4.64067497253418
epoch: 37 training_loss 4.775679676532746 test_loss: 4.726261138916016
epoch: 38 training_loss 4.467788598537445 test_loss: 4.435939788818359
epoch: 39 training_loss 4.483443624973297 test_loss: 4.574921035766602
epoch: 40 training_loss 4.446381747722626 test_loss: 4.200961303710938
epoch: 41 training_loss 4.398210091590881 test_loss: 4.063299560546875
epoch: 42 training_loss 4.416103644371033 test_loss: 3.971901702880859
epoch: 43 training_loss 4.393993358612061 test_loss: 4.688692474365235
epoch: 44 training_loss 4.257211301326752 test_loss: 4.164512634277344
epoch: 45 training_loss 4.129083542823792 test_loss: 4.057880401611328
epoch: 46 training_loss 4.113831746578216 test_loss: 4.119499588012696
epoch: 47 training_loss 4.069008741378784 test_loss: 4.446348571777344
epoch: 48 training_loss 4.071601228713989 test_loss: 4.042727661132813
epoch: 49 training_loss 3.9595261311531065 test_loss: 3.9273307800292967
epoch: 50 training_loss 4.117174923419952 test_loss: 4.622638320922851
epoch: 51 training_loss 3.9704552602767946 test_loss: 3.8676116943359373
epoch: 52 training_loss 3.8480473613739012 test_loss: 3.7582733154296877
epoch: 53 training_loss 4.143375856876373 test_loss: 4.016970825195313
epoch: 54 training_loss 4.0090111017227175 test_loss: 3.8796630859375
epoch: 55 training_loss 3.859044315814972 test_loss: 3.872637176513672
epoch: 56 training_loss 3.8215032172203065 test_loss: 3.516368865966797
epoch: 57 training_loss 3.904167602062225 test_loss: 3.8971553802490235
epoch: 58 training_loss 3.706936058998108 test_loss: 3.4091522216796877
epoch: 59 training_loss 3.6467219376564026 test_loss: 3.677119827270508
epoch: 60 training_loss 3.6809352350234987 test_loss: 3.9063011169433595
epoch: 61 training_loss 3.910666038990021 test_loss: 3.6812572479248047
epoch: 62 training_loss 3.8344852542877197 test_loss: 3.64868278503418
epoch: 63 training_loss 3.7200156259536743 test_loss: 3.7971439361572266
epoch: 64 training_loss 3.560077784061432 test_loss: 3.456924057006836
epoch: 65 training_loss 3.538656053543091 test_loss: 3.4036640167236327
epoch: 66 training_loss 3.5069138383865357 test_loss: 3.6140880584716797
epoch: 67 training_loss 3.483951826095581 test_loss: 3.238325500488281
epoch: 68 training_loss 3.4375348651409148 test_loss: 3.2508949279785155
epoch: 69 training_loss 3.213548264503479 test_loss: 3.6164356231689454
epoch: 70 training_loss 3.360574195384979 test_loss: 3.1003677368164064
epoch: 71 training_loss 3.270672370195389 test_loss: 3.2239448547363283
epoch: 72 training_loss 3.231151332855225 test_loss: 3.040632438659668
epoch: 73 training_loss 3.1043845295906065 test_loss: 3.281878662109375
epoch: 74 training_loss 3.1376434075832367 test_loss: 3.064565086364746
epoch: 75 training_loss 3.118100788593292 test_loss: 2.875071907043457
epoch: 76 training_loss 3.0623872900009155 test_loss: 3.2293560028076174
epoch: 77 training_loss 3.0136220049858093 test_loss: 2.8963998794555663
epoch: 78 training_loss 3.079570084810257 test_loss: 2.789420509338379
epoch: 79 training_loss 3.0161535036563873 test_loss: 2.8107473373413088
epoch: 80 training_loss 3.143498508930206 test_loss: 2.9897844314575197
epoch: 81 training_loss 3.067530858516693 test_loss: 3.0475481033325194
epoch: 82 training_loss 3.053757665157318 test_loss: 2.934971237182617
epoch: 83 training_loss 2.8967495155334473 test_loss: 2.921379280090332
epoch: 84 training_loss 2.8158214473724366 test_loss: 2.989735984802246
epoch: 85 training_loss 3.0116538405418396 test_loss: 3.044540023803711
epoch: 86 training_loss 3.017111792564392 test_loss: 3.149995231628418
epoch: 87 training_loss 2.7511045956611633 test_loss: 2.6070219039916993
epoch: 88 training_loss 2.926594707965851 test_loss: 2.9788270950317384
epoch: 89 training_loss 2.8698879075050354 test_loss: 2.926632118225098
epoch: 90 training_loss 2.9845959043502805 test_loss: 2.767574691772461
epoch: 91 training_loss 2.8658522856235504 test_loss: 2.6230632781982424
epoch: 92 training_loss 2.8052443182468414 test_loss: 2.968128967285156
epoch: 93 training_loss 2.9379926252365114 test_loss: 2.943492126464844
epoch: 94 training_loss 3.0449677228927614 test_loss: 3.2581485748291015
epoch: 95 training_loss 3.1841647481918334 test_loss: 2.627670669555664
epoch: 96 training_loss 2.7602952802181244 test_loss: 2.774280548095703
epoch: 97 training_loss 2.8186255490779875 test_loss: 3.4624149322509767
epoch: 98 training_loss 2.820724684000015 test_loss: 2.8193971633911135
epoch: 99 training_loss 2.779176858663559 test_loss: 2.8635616302490234
epoch: 100 training_loss 2.6852076375484466 test_loss: 2.6733728408813477
epoch: 101 training_loss 2.7426031649112703 test_loss: 2.5424272537231447
epoch: 102 training_loss 2.805691546201706 test_loss: 2.80737247467041
epoch: 103 training_loss 2.765390430688858 test_loss: 2.5009765625
epoch: 104 training_loss 2.728764218091965 test_loss: 2.8024133682250976
epoch: 105 training_loss 2.7247878456115724 test_loss: 2.7256290435791017
epoch: 106 training_loss 2.8363213038444517 test_loss: 2.631549072265625
epoch: 107 training_loss 2.7541549825668334 test_loss: 2.6006011962890625
epoch: 108 training_loss 2.650949395895004 test_loss: 2.6726003646850587
epoch: 109 training_loss 2.6873823547363282 test_loss: 3.3859878540039063
epoch: 110 training_loss 2.8130944740772246 test_loss: 2.5547788619995115
epoch: 111 training_loss 2.5221974742412567 test_loss: 2.4606534957885744
epoch: 112 training_loss 2.740037044286728 test_loss: 2.749283218383789
epoch: 113 training_loss 2.6770577776432036 test_loss: 2.5223997116088865
epoch: 114 training_loss 2.519815136194229 test_loss: 2.7243972778320313
epoch: 115 training_loss 2.61238804936409 test_loss: 2.4449968338012695
epoch: 116 training_loss 2.5603426933288573 test_loss: 2.4541297912597657
epoch: 117 training_loss 2.585143481492996 test_loss: 2.5473894119262694
epoch: 118 training_loss 2.6732349336147307 test_loss: 2.6115139007568358
epoch: 119 training_loss 2.516890810728073 test_loss: 2.308125686645508
epoch: 120 training_loss 2.6630727887153625 test_loss: 2.313393783569336
epoch: 121 training_loss 2.6333097434043884 test_loss: 2.280927276611328
epoch: 122 training_loss 2.5314574885368346 test_loss: 2.411248207092285
epoch: 123 training_loss 2.426654988527298 test_loss: 2.593893814086914
epoch: 124 training_loss 2.469920130968094 test_loss: 2.9241863250732423
epoch: 125 training_loss 2.5831849253177643 test_loss: 2.4223718643188477
epoch: 126 training_loss 2.6023526906967165 test_loss: 3.2000587463378904
epoch: 127 training_loss 2.503395400047302 test_loss: 2.622633171081543
epoch: 128 training_loss 2.528144795894623 test_loss: 2.3081615447998045
epoch: 129 training_loss 2.443757425546646 test_loss: 2.3324148178100588
epoch: 130 training_loss 2.419685369729996 test_loss: 2.3969802856445312
epoch: 131 training_loss 2.5319423127174376 test_loss: 2.649452972412109
epoch: 132 training_loss 2.5236964511871336 test_loss: 2.647805404663086
epoch: 133 training_loss 2.4371604788303376 test_loss: 2.7001562118530273
epoch: 134 training_loss 2.458108937740326 test_loss: 2.6636165618896483
epoch: 135 training_loss 2.4576876747608183 test_loss: 3.2634761810302733
epoch: 136 training_loss 2.5352301919460296 test_loss: 2.1397321701049803
epoch: 137 training_loss 2.5663386678695677 test_loss: 2.3451019287109376
epoch: 138 training_loss 2.2660789346694945 test_loss: 2.223040962219238
epoch: 139 training_loss 2.6289500117301943 test_loss: 2.4516828536987303
epoch: 140 training_loss 2.333510150909424 test_loss: 2.3053028106689455
epoch: 141 training_loss 2.348294620513916 test_loss: 2.1608470916748046
epoch: 142 training_loss 2.5097102868556975 test_loss: 2.711228370666504
epoch: 143 training_loss 2.439458522796631 test_loss: 3.2185367584228515
epoch: 144 training_loss 2.4543788123130796 test_loss: 2.9326730728149415
epoch: 145 training_loss 2.5161575901508333 test_loss: 2.491115951538086
epoch: 146 training_loss 2.3733320343494415 test_loss: 2.3084163665771484
epoch: 147 training_loss 2.332156080007553 test_loss: 2.3620981216430663
epoch: 148 training_loss 2.3734186804294586 test_loss: 2.315934944152832
epoch: 149 training_loss 2.367472038269043 test_loss: 2.1275943756103515
54.57293787414801
episode: 0 training return: tensor(-157.8784, device='cuda:0')
episode: 1 training return: tensor(-175.0351, device='cuda:0')
episode: 2 training return: tensor(-207.2729, device='cuda:0')
episode: 3 training return: tensor(-112.5603, device='cuda:0')
epoch: 1 test_true_pfm: 62.89792048209747 sim_pfm: -177.63136395252076
episode: 4 training return: tensor(-206.9957, device='cuda:0')
episode: 5 training return: tensor(-126.8133, device='cuda:0')
episode: 6 training return: tensor(-181.0220, device='cuda:0')
episode: 7 training return: tensor(-76.3089, device='cuda:0')
epoch: 2 test_true_pfm: 52.760417049382454 sim_pfm: -154.8352387635794
episode: 8 training return: tensor(-164.9882, device='cuda:0')
episode: 9 training return: tensor(-66.1663, device='cuda:0')
episode: 10 training return: tensor(-164.6967, device='cuda:0')
episode: 11 training return: tensor(-193.2884, device='cuda:0')
epoch: 3 test_true_pfm: 58.882601533670126 sim_pfm: -129.7013151318184
episode: 12 training return: tensor(-187.4335, device='cuda:0')
episode: 13 training return: tensor(-124.3272, device='cuda:0')
episode: 14 training return: tensor(-70.8970, device='cuda:0')
episode: 15 training return: tensor(-131.4047, device='cuda:0')
epoch: 4 test_true_pfm: 55.94708326592952 sim_pfm: -141.17130645379657
episode: 16 training return: tensor(-93.5037, device='cuda:0')
episode: 17 training return: tensor(-211.3222, device='cuda:0')
episode: 18 training return: tensor(-203.3051, device='cuda:0')
episode: 19 training return: tensor(-195.4018, device='cuda:0')
epoch: 5 test_true_pfm: 70.02670724047766 sim_pfm: -180.00269690983697
episode: 20 training return: tensor(-49.1702, device='cuda:0')
episode: 21 training return: tensor(-203.0834, device='cuda:0')
episode: 22 training return: tensor(-90.8651, device='cuda:0')
episode: 23 training return: tensor(-115.4252, device='cuda:0')
epoch: 6 test_true_pfm: 46.88427149766402 sim_pfm: -151.96199508928694
episode: 24 training return: tensor(-80.7104, device='cuda:0')
episode: 25 training return: tensor(-164.8386, device='cuda:0')
episode: 26 training return: tensor(-125.4286, device='cuda:0')
episode: 27 training return: tensor(-173.7557, device='cuda:0')
epoch: 7 test_true_pfm: 49.88427736212546 sim_pfm: -180.0387035735417
episode: 28 training return: tensor(-182.7660, device='cuda:0')
episode: 29 training return: tensor(-163.7580, device='cuda:0')
episode: 30 training return: tensor(-116.6216, device='cuda:0')
episode: 31 training return: tensor(-62.2645, device='cuda:0')
epoch: 8 test_true_pfm: 47.31772421562033 sim_pfm: -151.32876317556946
episode: 32 training return: tensor(-191.9980, device='cuda:0')
episode: 33 training return: tensor(-116.9808, device='cuda:0')
episode: 34 training return: tensor(-200.2124, device='cuda:0')
episode: 35 training return: tensor(-201.6776, device='cuda:0')
epoch: 9 test_true_pfm: 46.60759235234326 sim_pfm: -169.4099587959121
episode: 36 training return: tensor(-202.3933, device='cuda:0')
episode: 37 training return: tensor(-200.1559, device='cuda:0')
episode: 38 training return: tensor(-83.5431, device='cuda:0')
episode: 39 training return: tensor(-74.5727, device='cuda:0')
epoch: 10 test_true_pfm: 45.05574846467923 sim_pfm: -135.61027969710412
episode: 40 training return: tensor(-193.4468, device='cuda:0')
episode: 41 training return: tensor(-122.8681, device='cuda:0')
episode: 42 training return: tensor(-193.6906, device='cuda:0')
episode: 43 training return: tensor(-127.7956, device='cuda:0')
epoch: 11 test_true_pfm: 51.518641739695376 sim_pfm: -153.9870844298217
episode: 44 training return: tensor(-196.1121, device='cuda:0')
episode: 45 training return: tensor(-103.7695, device='cuda:0')
episode: 46 training return: tensor(-205.2997, device='cuda:0')
episode: 47 training return: tensor(-197.4249, device='cuda:0')
epoch: 12 test_true_pfm: 48.33032006427602 sim_pfm: -151.52031547846272
episode: 48 training return: tensor(-111.2823, device='cuda:0')
episode: 49 training return: tensor(-201.6566, device='cuda:0')
episode: 50 training return: tensor(-169.1847, device='cuda:0')
episode: 51 training return: tensor(-197.2905, device='cuda:0')
epoch: 13 test_true_pfm: 49.734148868250756 sim_pfm: -108.75152478179662
episode: 52 training return: tensor(-191.5999, device='cuda:0')
episode: 53 training return: tensor(-150.8645, device='cuda:0')
episode: 54 training return: tensor(-120.2776, device='cuda:0')
episode: 55 training return: tensor(-125.0074, device='cuda:0')
epoch: 14 test_true_pfm: 47.99545574691583 sim_pfm: -170.03477212362924
episode: 56 training return: tensor(-117.4174, device='cuda:0')
episode: 57 training return: tensor(-149.2818, device='cuda:0')
episode: 58 training return: tensor(-170.6811, device='cuda:0')
episode: 59 training return: tensor(-86.3611, device='cuda:0')
epoch: 15 test_true_pfm: 51.1631902740326 sim_pfm: -148.61528226168593
episode: 60 training return: tensor(-152.2949, device='cuda:0')
episode: 61 training return: tensor(-121.5063, device='cuda:0')
episode: 62 training return: tensor(-77.5690, device='cuda:0')
episode: 63 training return: tensor(-204.3390, device='cuda:0')
epoch: 16 test_true_pfm: 53.0574018987174 sim_pfm: -159.3867219670792
episode: 64 training return: tensor(-128.3083, device='cuda:0')
episode: 65 training return: tensor(-197.8364, device='cuda:0')
episode: 66 training return: tensor(-83.6665, device='cuda:0')
episode: 67 training return: tensor(-164.5282, device='cuda:0')
epoch: 17 test_true_pfm: 47.76649337387419 sim_pfm: -154.4165640673309
episode: 68 training return: tensor(-190.9111, device='cuda:0')
episode: 69 training return: tensor(-69.3704, device='cuda:0')
episode: 70 training return: tensor(-170.5824, device='cuda:0')
episode: 71 training return: tensor(-155.9570, device='cuda:0')
epoch: 18 test_true_pfm: 74.27260474943924 sim_pfm: -156.57574797875714
episode: 72 training return: tensor(-129.4147, device='cuda:0')
episode: 73 training return: tensor(-190.5726, device='cuda:0')
episode: 74 training return: tensor(-141.2243, device='cuda:0')
episode: 75 training return: tensor(-131.9643, device='cuda:0')
epoch: 19 test_true_pfm: 51.30481121970613 sim_pfm: -138.44301922079177
episode: 76 training return: tensor(-172.8930, device='cuda:0')
episode: 77 training return: tensor(-125.1191, device='cuda:0')
episode: 78 training return: tensor(-129.1231, device='cuda:0')
episode: 79 training return: tensor(-157.8451, device='cuda:0')
epoch: 20 test_true_pfm: 56.59658526530518 sim_pfm: -119.32031976889702
episode: 80 training return: tensor(-118.6794, device='cuda:0')
episode: 81 training return: tensor(-190.0749, device='cuda:0')
episode: 82 training return: tensor(-169.1594, device='cuda:0')
episode: 83 training return: tensor(-174.0351, device='cuda:0')
epoch: 21 test_true_pfm: 57.09661157121186 sim_pfm: -163.90395578399185
episode: 84 training return: tensor(-197.9969, device='cuda:0')
episode: 85 training return: tensor(-128.8043, device='cuda:0')
episode: 86 training return: tensor(-181.6266, device='cuda:0')
episode: 87 training return: tensor(-128.9982, device='cuda:0')
epoch: 22 test_true_pfm: 63.40573249499015 sim_pfm: -134.8229969726177
episode: 88 training return: tensor(-120.6859, device='cuda:0')
episode: 89 training return: tensor(-133.4162, device='cuda:0')
episode: 90 training return: tensor(-153.6770, device='cuda:0')
episode: 91 training return: tensor(-137.3349, device='cuda:0')
epoch: 23 test_true_pfm: 60.054930125980874 sim_pfm: -143.77660785673652
episode: 92 training return: tensor(-188.3995, device='cuda:0')
episode: 93 training return: tensor(-136.0863, device='cuda:0')
episode: 94 training return: tensor(-201.4774, device='cuda:0')
episode: 95 training return: tensor(-144.1137, device='cuda:0')
epoch: 24 test_true_pfm: 57.39900857638263 sim_pfm: -106.68501931295032
episode: 96 training return: tensor(-150.7989, device='cuda:0')
episode: 97 training return: tensor(-121.6614, device='cuda:0')
episode: 98 training return: tensor(-182.7404, device='cuda:0')
episode: 99 training return: tensor(-134.3688, device='cuda:0')
epoch: 25 test_true_pfm: 52.01901794388997 sim_pfm: -126.82233731133165
episode: 100 training return: tensor(-135.2394, device='cuda:0')
episode: 101 training return: tensor(-184.7417, device='cuda:0')
episode: 102 training return: tensor(-173.2257, device='cuda:0')
episode: 103 training return: tensor(-176.9760, device='cuda:0')
epoch: 26 test_true_pfm: 48.96645342513219 sim_pfm: -161.13709000963135
episode: 104 training return: tensor(-144.8451, device='cuda:0')
episode: 105 training return: tensor(-135.8227, device='cuda:0')
episode: 106 training return: tensor(-169.9703, device='cuda:0')
episode: 107 training return: tensor(-130.3477, device='cuda:0')
epoch: 27 test_true_pfm: 52.01602167996322 sim_pfm: -146.16837008922593
episode: 108 training return: tensor(-166.8542, device='cuda:0')
episode: 109 training return: tensor(-166.1327, device='cuda:0')
episode: 110 training return: tensor(-165.1631, device='cuda:0')
episode: 111 training return: tensor(-173.2994, device='cuda:0')
epoch: 28 test_true_pfm: 51.688279089361934 sim_pfm: -157.0081258262333
episode: 112 training return: tensor(-185.7729, device='cuda:0')
episode: 113 training return: tensor(-147.7703, device='cuda:0')
episode: 114 training return: tensor(-119.5423, device='cuda:0')
episode: 115 training return: tensor(-169.7485, device='cuda:0')
epoch: 29 test_true_pfm: 49.602779009305685 sim_pfm: -144.4768961359514
episode: 116 training return: tensor(-165.5135, device='cuda:0')
episode: 117 training return: tensor(-195.1533, device='cuda:0')
episode: 118 training return: tensor(-193.1967, device='cuda:0')
episode: 119 training return: tensor(-142.5823, device='cuda:0')
epoch: 30 test_true_pfm: 55.0565378687278 sim_pfm: -145.364788991824
episode: 120 training return: tensor(-168.8514, device='cuda:0')
episode: 121 training return: tensor(-175.8941, device='cuda:0')
episode: 122 training return: tensor(-175.0164, device='cuda:0')
episode: 123 training return: tensor(-56.7318, device='cuda:0')
epoch: 31 test_true_pfm: 56.754153762225336 sim_pfm: -132.59876697903965
episode: 124 training return: tensor(-190.6518, device='cuda:0')
episode: 125 training return: tensor(-164.3399, device='cuda:0')
episode: 126 training return: tensor(-175.6837, device='cuda:0')
episode: 127 training return: tensor(-115.5993, device='cuda:0')
epoch: 32 test_true_pfm: 58.14398201731653 sim_pfm: -123.90372749163653
episode: 128 training return: tensor(-85.7277, device='cuda:0')
episode: 129 training return: tensor(-180.8265, device='cuda:0')
episode: 130 training return: tensor(-50.5886, device='cuda:0')
episode: 131 training return: tensor(-140.9423, device='cuda:0')
epoch: 33 test_true_pfm: 47.91476009895581 sim_pfm: -150.9770312334993
episode: 132 training return: tensor(-139.3075, device='cuda:0')
episode: 133 training return: tensor(-198.7137, device='cuda:0')
episode: 134 training return: tensor(-169.4509, device='cuda:0')
episode: 135 training return: tensor(-113.4690, device='cuda:0')
epoch: 34 test_true_pfm: 47.084403557046095 sim_pfm: -174.01806623537558
episode: 136 training return: tensor(-185.1777, device='cuda:0')
episode: 137 training return: tensor(-202.6913, device='cuda:0')
episode: 138 training return: tensor(-176.6837, device='cuda:0')
episode: 139 training return: tensor(-156.2215, device='cuda:0')
epoch: 35 test_true_pfm: 61.106055722799056 sim_pfm: -157.793776149489
episode: 140 training return: tensor(-101.5146, device='cuda:0')
episode: 141 training return: tensor(-136.4202, device='cuda:0')
episode: 142 training return: tensor(-113.3672, device='cuda:0')
episode: 143 training return: tensor(-166.9599, device='cuda:0')
epoch: 36 test_true_pfm: 66.2465504441787 sim_pfm: -144.58809178418014
episode: 144 training return: tensor(-132.9640, device='cuda:0')
episode: 145 training return: tensor(-125.2432, device='cuda:0')
episode: 146 training return: tensor(-109.0076, device='cuda:0')
episode: 147 training return: tensor(-164.9411, device='cuda:0')
epoch: 37 test_true_pfm: 50.968418933642454 sim_pfm: -160.40270883573686
episode: 148 training return: tensor(-138.9590, device='cuda:0')
episode: 149 training return: tensor(-188.1605, device='cuda:0')
episode: 150 training return: tensor(-127.9001, device='cuda:0')
episode: 151 training return: tensor(-198.6092, device='cuda:0')
epoch: 38 test_true_pfm: 55.38265830873759 sim_pfm: -166.6998191587918
episode: 152 training return: tensor(-144.0909, device='cuda:0')
episode: 153 training return: tensor(-187.7107, device='cuda:0')
episode: 154 training return: tensor(-105.9489, device='cuda:0')
episode: 155 training return: tensor(-179.4466, device='cuda:0')
epoch: 39 test_true_pfm: 48.612596297109405 sim_pfm: -172.29365646850783
episode: 156 training return: tensor(-181.5433, device='cuda:0')
episode: 157 training return: tensor(-167.5783, device='cuda:0')
episode: 158 training return: tensor(-121.5021, device='cuda:0')
episode: 159 training return: tensor(-189.8226, device='cuda:0')
epoch: 40 test_true_pfm: 61.87522847349267 sim_pfm: -154.0236500518804
episode: 160 training return: tensor(-121.8244, device='cuda:0')
episode: 161 training return: tensor(-94.0755, device='cuda:0')
episode: 162 training return: tensor(-190.1836, device='cuda:0')
episode: 163 training return: tensor(-103.5981, device='cuda:0')
epoch: 41 test_true_pfm: 56.03810521699279 sim_pfm: -184.86834332097789
episode: 164 training return: tensor(-129.8333, device='cuda:0')
episode: 165 training return: tensor(-129.8197, device='cuda:0')
episode: 166 training return: tensor(-202.2527, device='cuda:0')
episode: 167 training return: tensor(-165.9227, device='cuda:0')
epoch: 42 test_true_pfm: 55.10086722583022 sim_pfm: -149.9056497429614
episode: 168 training return: tensor(-82.4169, device='cuda:0')
episode: 169 training return: tensor(-140.7691, device='cuda:0')
episode: 170 training return: tensor(-55.0816, device='cuda:0')
episode: 171 training return: tensor(-200.1349, device='cuda:0')
epoch: 43 test_true_pfm: 51.093831580684565 sim_pfm: -143.7488879703451
episode: 172 training return: tensor(-102.1667, device='cuda:0')
episode: 173 training return: tensor(-134.0619, device='cuda:0')
episode: 174 training return: tensor(-177.2368, device='cuda:0')
episode: 175 training return: tensor(-110.3559, device='cuda:0')
epoch: 44 test_true_pfm: 46.95144632509257 sim_pfm: -159.0033947402204
episode: 176 training return: tensor(-144.6645, device='cuda:0')
episode: 177 training return: tensor(-135.6120, device='cuda:0')
episode: 178 training return: tensor(-187.6341, device='cuda:0')
episode: 179 training return: tensor(-131.8791, device='cuda:0')
epoch: 45 test_true_pfm: 50.63225037324005 sim_pfm: -120.0778514912934
episode: 180 training return: tensor(-69.9065, device='cuda:0')
episode: 181 training return: tensor(-169.2319, device='cuda:0')
episode: 182 training return: tensor(-131.9680, device='cuda:0')
episode: 183 training return: tensor(-105.6315, device='cuda:0')
epoch: 46 test_true_pfm: 50.63060678872971 sim_pfm: -142.69479391514324
episode: 184 training return: tensor(-103.5198, device='cuda:0')
episode: 185 training return: tensor(-100.5643, device='cuda:0')
episode: 186 training return: tensor(-202.5612, device='cuda:0')
episode: 187 training return: tensor(-137.1956, device='cuda:0')
epoch: 47 test_true_pfm: 54.96765025388538 sim_pfm: -159.95670062954304
episode: 188 training return: tensor(-188.6844, device='cuda:0')
episode: 189 training return: tensor(-149.8190, device='cuda:0')
episode: 190 training return: tensor(-193.6116, device='cuda:0')
episode: 191 training return: tensor(-115.7843, device='cuda:0')
epoch: 48 test_true_pfm: 48.445920055300846 sim_pfm: -116.68132586834254
episode: 192 training return: tensor(-149.5974, device='cuda:0')
episode: 193 training return: tensor(-120.9868, device='cuda:0')
episode: 194 training return: tensor(-192.5609, device='cuda:0')
episode: 195 training return: tensor(-170.1566, device='cuda:0')
epoch: 49 test_true_pfm: 54.493834567888904 sim_pfm: -156.30290966914035
episode: 196 training return: tensor(-139.9958, device='cuda:0')
episode: 197 training return: tensor(-205.6963, device='cuda:0')
episode: 198 training return: tensor(-167.8126, device='cuda:0')
episode: 199 training return: tensor(-200.9464, device='cuda:0')
epoch: 50 test_true_pfm: 55.40443448100682 sim_pfm: -120.49066437787842
episode: 200 training return: tensor(-121.1929, device='cuda:0')
episode: 201 training return: tensor(-138.4192, device='cuda:0')
episode: 202 training return: tensor(-123.5693, device='cuda:0')
episode: 203 training return: tensor(-122.9904, device='cuda:0')
epoch: 51 test_true_pfm: 49.60244409925488 sim_pfm: -158.4823340510833
episode: 204 training return: tensor(-180.4637, device='cuda:0')
episode: 205 training return: tensor(-137.8153, device='cuda:0')
episode: 206 training return: tensor(-140.8893, device='cuda:0')
episode: 207 training return: tensor(-136.7447, device='cuda:0')
epoch: 52 test_true_pfm: 47.687874665236805 sim_pfm: -113.86205970796873
episode: 208 training return: tensor(-170.6785, device='cuda:0')
episode: 209 training return: tensor(-196.8529, device='cuda:0')
episode: 210 training return: tensor(-175.2594, device='cuda:0')
episode: 211 training return: tensor(-124.9374, device='cuda:0')
epoch: 53 test_true_pfm: 48.661202131794816 sim_pfm: -123.10175925960066
episode: 212 training return: tensor(-195.7938, device='cuda:0')
episode: 213 training return: tensor(-174.4127, device='cuda:0')
episode: 214 training return: tensor(-201.0249, device='cuda:0')
episode: 215 training return: tensor(-124.6673, device='cuda:0')
epoch: 54 test_true_pfm: 50.43328443166121 sim_pfm: -166.42026178873493
episode: 216 training return: tensor(-122.9324, device='cuda:0')
episode: 217 training return: tensor(-169.1234, device='cuda:0')
episode: 218 training return: tensor(-113.2684, device='cuda:0')
episode: 219 training return: tensor(-165.0851, device='cuda:0')
epoch: 55 test_true_pfm: 56.336689362412436 sim_pfm: -164.64306063757977
episode: 220 training return: tensor(-54.5734, device='cuda:0')
episode: 221 training return: tensor(-132.9851, device='cuda:0')
episode: 222 training return: tensor(-167.7183, device='cuda:0')
episode: 223 training return: tensor(-197.5939, device='cuda:0')
epoch: 56 test_true_pfm: 47.12435285506531 sim_pfm: -122.04371579661965
episode: 224 training return: tensor(-169.7864, device='cuda:0')
episode: 225 training return: tensor(-106.5343, device='cuda:0')
episode: 226 training return: tensor(-132.2760, device='cuda:0')
episode: 227 training return: tensor(-138.8581, device='cuda:0')
epoch: 57 test_true_pfm: 71.17089885552949 sim_pfm: -143.73039010132197
episode: 228 training return: tensor(-120.6971, device='cuda:0')
episode: 229 training return: tensor(-103.3647, device='cuda:0')
episode: 230 training return: tensor(-96.0963, device='cuda:0')
episode: 231 training return: tensor(-123.1086, device='cuda:0')
epoch: 58 test_true_pfm: 58.37933169413172 sim_pfm: -117.60894773798063
episode: 232 training return: tensor(-138.0040, device='cuda:0')
episode: 233 training return: tensor(-187.5368, device='cuda:0')
episode: 234 training return: tensor(-115.9816, device='cuda:0')
episode: 235 training return: tensor(-114.8403, device='cuda:0')
epoch: 59 test_true_pfm: 56.210573254145956 sim_pfm: -111.81500432942994
episode: 236 training return: tensor(-46.6531, device='cuda:0')
episode: 237 training return: tensor(-125.2207, device='cuda:0')
episode: 238 training return: tensor(-102.6046, device='cuda:0')
episode: 239 training return: tensor(-185.4569, device='cuda:0')
epoch: 60 test_true_pfm: 57.23139121232033 sim_pfm: -139.89166890906054
episode: 240 training return: tensor(-170.7393, device='cuda:0')
episode: 241 training return: tensor(-109.5734, device='cuda:0')
episode: 242 training return: tensor(-138.1815, device='cuda:0')
episode: 243 training return: tensor(-168.3845, device='cuda:0')
epoch: 61 test_true_pfm: 46.23449270058146 sim_pfm: -125.33832564393524
episode: 244 training return: tensor(-128.9162, device='cuda:0')
episode: 245 training return: tensor(-118.9013, device='cuda:0')
episode: 246 training return: tensor(-164.5024, device='cuda:0')
episode: 247 training return: tensor(-77.8514, device='cuda:0')
epoch: 62 test_true_pfm: 60.75606720407649 sim_pfm: -122.32592985469383
episode: 248 training return: tensor(-137.5024, device='cuda:0')
episode: 249 training return: tensor(-129.8101, device='cuda:0')
episode: 250 training return: tensor(-121.3050, device='cuda:0')
episode: 251 training return: tensor(-119.2234, device='cuda:0')
epoch: 63 test_true_pfm: 57.49314652009033 sim_pfm: -134.47056797529802
episode: 252 training return: tensor(-63.5590, device='cuda:0')
episode: 253 training return: tensor(-117.9595, device='cuda:0')
episode: 254 training return: tensor(-156.3902, device='cuda:0')
episode: 255 training return: tensor(-91.9673, device='cuda:0')
epoch: 64 test_true_pfm: 51.63669653327048 sim_pfm: -137.7041038172145
episode: 256 training return: tensor(-165.0365, device='cuda:0')
episode: 257 training return: tensor(-133.0412, device='cuda:0')
episode: 258 training return: tensor(-188.4250, device='cuda:0')
episode: 259 training return: tensor(-188.6784, device='cuda:0')
epoch: 65 test_true_pfm: 49.76315230898477 sim_pfm: -103.53874995751539
episode: 260 training return: tensor(-122.8852, device='cuda:0')
episode: 261 training return: tensor(-192.6296, device='cuda:0')
episode: 262 training return: tensor(-166.3484, device='cuda:0')
episode: 263 training return: tensor(-96.6705, device='cuda:0')
epoch: 66 test_true_pfm: 60.04305243976209 sim_pfm: -132.09170050700195
episode: 264 training return: tensor(-162.1636, device='cuda:0')
episode: 265 training return: tensor(-131.0317, device='cuda:0')
episode: 266 training return: tensor(-86.5558, device='cuda:0')
episode: 267 training return: tensor(-113.7982, device='cuda:0')
epoch: 67 test_true_pfm: 49.684390584676734 sim_pfm: -137.40988706932404
episode: 268 training return: tensor(-180.5863, device='cuda:0')
episode: 269 training return: tensor(-127.7509, device='cuda:0')
episode: 270 training return: tensor(-132.9919, device='cuda:0')
episode: 271 training return: tensor(-164.9407, device='cuda:0')
epoch: 68 test_true_pfm: 58.4055697951229 sim_pfm: -126.6359564542363
episode: 272 training return: tensor(-121.0169, device='cuda:0')
episode: 273 training return: tensor(-120.0059, device='cuda:0')
episode: 274 training return: tensor(-165.0366, device='cuda:0')
episode: 275 training return: tensor(-127.9923, device='cuda:0')
epoch: 69 test_true_pfm: 57.24818231727322 sim_pfm: -122.88163916977355
episode: 276 training return: tensor(-122.7062, device='cuda:0')
episode: 277 training return: tensor(-110.6662, device='cuda:0')
episode: 278 training return: tensor(-58.2306, device='cuda:0')
episode: 279 training return: tensor(-118.3898, device='cuda:0')
epoch: 70 test_true_pfm: 47.62095349218598 sim_pfm: -167.95144587255783
episode: 280 training return: tensor(-190.8104, device='cuda:0')
episode: 281 training return: tensor(-191.5531, device='cuda:0')
episode: 282 training return: tensor(-112.3690, device='cuda:0')
episode: 283 training return: tensor(-123.7008, device='cuda:0')
epoch: 71 test_true_pfm: 53.31459834341437 sim_pfm: -121.87539133799729
episode: 284 training return: tensor(-96.1007, device='cuda:0')
episode: 285 training return: tensor(-118.7885, device='cuda:0')
episode: 286 training return: tensor(-100.7453, device='cuda:0')
episode: 287 training return: tensor(-133.1396, device='cuda:0')
epoch: 72 test_true_pfm: 61.45469139902235 sim_pfm: -115.55245505501516
episode: 288 training return: tensor(-39.9610, device='cuda:0')
episode: 289 training return: tensor(-118.1732, device='cuda:0')
episode: 290 training return: tensor(-71.7817, device='cuda:0')
episode: 291 training return: tensor(-37.4356, device='cuda:0')
epoch: 73 test_true_pfm: 52.130181571332855 sim_pfm: -128.93552778828888
episode: 292 training return: tensor(-84.1249, device='cuda:0')
episode: 293 training return: tensor(-70.0972, device='cuda:0')
episode: 294 training return: tensor(-97.1141, device='cuda:0')
episode: 295 training return: tensor(-175.5284, device='cuda:0')
epoch: 74 test_true_pfm: 58.830625286902055 sim_pfm: -151.02646232525004
episode: 296 training return: tensor(-190.2914, device='cuda:0')
episode: 297 training return: tensor(-107.2119, device='cuda:0')
episode: 298 training return: tensor(-108.1897, device='cuda:0')
episode: 299 training return: tensor(-193.7593, device='cuda:0')
epoch: 75 test_true_pfm: 47.18743968900327 sim_pfm: -149.3271672337898
episode: 300 training return: tensor(-180.6502, device='cuda:0')
episode: 301 training return: tensor(-192.4227, device='cuda:0')
episode: 302 training return: tensor(-111.0130, device='cuda:0')
episode: 303 training return: tensor(-189.1861, device='cuda:0')
epoch: 76 test_true_pfm: 49.06570510222953 sim_pfm: -130.11573701353046
episode: 304 training return: tensor(-158.1154, device='cuda:0')
episode: 305 training return: tensor(-171.8952, device='cuda:0')
episode: 306 training return: tensor(-144.6797, device='cuda:0')
episode: 307 training return: tensor(-161.8261, device='cuda:0')
epoch: 77 test_true_pfm: 49.870477057667465 sim_pfm: -142.80229820052045
episode: 308 training return: tensor(-190.7399, device='cuda:0')
episode: 309 training return: tensor(-180.3806, device='cuda:0')
episode: 310 training return: tensor(-184.0786, device='cuda:0')
episode: 311 training return: tensor(-140.4254, device='cuda:0')
epoch: 78 test_true_pfm: 60.778867632033325 sim_pfm: -127.90314114826033
episode: 312 training return: tensor(-65.1308, device='cuda:0')
episode: 313 training return: tensor(-117.8170, device='cuda:0')
episode: 314 training return: tensor(-124.6993, device='cuda:0')
episode: 315 training return: tensor(-96.5566, device='cuda:0')
epoch: 79 test_true_pfm: 57.837492644009515 sim_pfm: -111.3908634267049
episode: 316 training return: tensor(-186.1706, device='cuda:0')
episode: 317 training return: tensor(-122.0100, device='cuda:0')
episode: 318 training return: tensor(-97.8564, device='cuda:0')
episode: 319 training return: tensor(-107.8525, device='cuda:0')
epoch: 80 test_true_pfm: 65.58701693899013 sim_pfm: -119.3430495093111
episode: 320 training return: tensor(-87.9343, device='cuda:0')
episode: 321 training return: tensor(-92.3384, device='cuda:0')
episode: 322 training return: tensor(-179.2467, device='cuda:0')
episode: 323 training return: tensor(-124.1648, device='cuda:0')
epoch: 81 test_true_pfm: 49.392513285499966 sim_pfm: -136.0981271664612
episode: 324 training return: tensor(-190.6341, device='cuda:0')
episode: 325 training return: tensor(-110.3536, device='cuda:0')
episode: 326 training return: tensor(-115.5263, device='cuda:0')
episode: 327 training return: tensor(-185.8091, device='cuda:0')
epoch: 82 test_true_pfm: 49.713900653952976 sim_pfm: -132.73296762523242
episode: 328 training return: tensor(-175.8536, device='cuda:0')
episode: 329 training return: tensor(-184.7036, device='cuda:0')
episode: 330 training return: tensor(-188.5084, device='cuda:0')
episode: 331 training return: tensor(-135.6519, device='cuda:0')
epoch: 83 test_true_pfm: 57.08413230809233 sim_pfm: -156.6002949245216
episode: 332 training return: tensor(-186.4267, device='cuda:0')
episode: 333 training return: tensor(-190.1629, device='cuda:0')
episode: 334 training return: tensor(-162.2584, device='cuda:0')
episode: 335 training return: tensor(-122.4432, device='cuda:0')
epoch: 84 test_true_pfm: 49.489121591523514 sim_pfm: -117.04195534092723
episode: 336 training return: tensor(-130.6763, device='cuda:0')
episode: 337 training return: tensor(-185.2859, device='cuda:0')
episode: 338 training return: tensor(-170.5847, device='cuda:0')
episode: 339 training return: tensor(-121.5297, device='cuda:0')
epoch: 85 test_true_pfm: 54.131716179712726 sim_pfm: -129.9350681137119
episode: 340 training return: tensor(-60.3314, device='cuda:0')
episode: 341 training return: tensor(-104.4166, device='cuda:0')
episode: 342 training return: tensor(-131.6577, device='cuda:0')
episode: 343 training return: tensor(-50.4639, device='cuda:0')
epoch: 86 test_true_pfm: 62.844667347260575 sim_pfm: -134.10867322753765
episode: 344 training return: tensor(-185.9167, device='cuda:0')
episode: 345 training return: tensor(-121.0705, device='cuda:0')
episode: 346 training return: tensor(-85.9325, device='cuda:0')
episode: 347 training return: tensor(-112.3479, device='cuda:0')
epoch: 87 test_true_pfm: 47.33006112206006 sim_pfm: -143.23427311995766
episode: 348 training return: tensor(-110.0922, device='cuda:0')
episode: 349 training return: tensor(-188.0301, device='cuda:0')
episode: 350 training return: tensor(-177.8860, device='cuda:0')
episode: 351 training return: tensor(-89.3258, device='cuda:0')
epoch: 88 test_true_pfm: 52.907476109594356 sim_pfm: -128.60424508306315
episode: 352 training return: tensor(-162.5770, device='cuda:0')
episode: 353 training return: tensor(-189.3944, device='cuda:0')
episode: 354 training return: tensor(-90.3388, device='cuda:0')
episode: 355 training return: tensor(-165.5184, device='cuda:0')
epoch: 89 test_true_pfm: 58.66333262327653 sim_pfm: -134.75312057458797
episode: 356 training return: tensor(-152.9390, device='cuda:0')
episode: 357 training return: tensor(-192.3476, device='cuda:0')
episode: 358 training return: tensor(-125.8723, device='cuda:0')
episode: 359 training return: tensor(-45.2663, device='cuda:0')
epoch: 90 test_true_pfm: 48.349510134041125 sim_pfm: -116.74558539097197
episode: 360 training return: tensor(-111.4762, device='cuda:0')
episode: 361 training return: tensor(-105.5228, device='cuda:0')
episode: 362 training return: tensor(-110.8333, device='cuda:0')
episode: 363 training return: tensor(-190.2364, device='cuda:0')
epoch: 91 test_true_pfm: 49.03104098045044 sim_pfm: -137.23925358027336
episode: 364 training return: tensor(-111.6669, device='cuda:0')
episode: 365 training return: tensor(-184.5836, device='cuda:0')
episode: 366 training return: tensor(-189.5521, device='cuda:0')
episode: 367 training return: tensor(-125.6659, device='cuda:0')
epoch: 92 test_true_pfm: 57.93581997236765 sim_pfm: -65.23066123372992
episode: 368 training return: tensor(-189.5181, device='cuda:0')
episode: 369 training return: tensor(-171.5298, device='cuda:0')
episode: 370 training return: tensor(-126.8511, device='cuda:0')
episode: 371 training return: tensor(-105.4538, device='cuda:0')
epoch: 93 test_true_pfm: 45.920752112373535 sim_pfm: -129.73030085791834
episode: 372 training return: tensor(-185.7089, device='cuda:0')
episode: 373 training return: tensor(-58.5822, device='cuda:0')
episode: 374 training return: tensor(-125.7446, device='cuda:0')
episode: 375 training return: tensor(-164.8880, device='cuda:0')
epoch: 94 test_true_pfm: 51.90887453755348 sim_pfm: -101.18685894119554
episode: 376 training return: tensor(-111.2838, device='cuda:0')
episode: 377 training return: tensor(-115.4968, device='cuda:0')
episode: 378 training return: tensor(-121.9661, device='cuda:0')
episode: 379 training return: tensor(-184.7435, device='cuda:0')
epoch: 95 test_true_pfm: 48.36049638732811 sim_pfm: -82.6219138923625
episode: 380 training return: tensor(-160.2356, device='cuda:0')
episode: 381 training return: tensor(-98.1312, device='cuda:0')
episode: 382 training return: tensor(-111.0733, device='cuda:0')
episode: 383 training return: tensor(-99.8029, device='cuda:0')
epoch: 96 test_true_pfm: 64.15309391742063 sim_pfm: -144.6962771765713
episode: 384 training return: tensor(-90.4072, device='cuda:0')
episode: 385 training return: tensor(-194.6664, device='cuda:0')
episode: 386 training return: tensor(-185.7381, device='cuda:0')
episode: 387 training return: tensor(-83.5956, device='cuda:0')
epoch: 97 test_true_pfm: 58.35947699496584 sim_pfm: -149.37813422890613
episode: 388 training return: tensor(-119.5337, device='cuda:0')
episode: 389 training return: tensor(-86.7257, device='cuda:0')
episode: 390 training return: tensor(-176.4022, device='cuda:0')
episode: 391 training return: tensor(-163.1164, device='cuda:0')
epoch: 98 test_true_pfm: 50.5313836078335 sim_pfm: -102.5744932266185
episode: 392 training return: tensor(-89.6232, device='cuda:0')
episode: 393 training return: tensor(-110.6411, device='cuda:0')
episode: 394 training return: tensor(-173.7161, device='cuda:0')
episode: 395 training return: tensor(-134.8163, device='cuda:0')
epoch: 99 test_true_pfm: 48.8680798090142 sim_pfm: -103.05474492260255
episode: 396 training return: tensor(-153.6004, device='cuda:0')
episode: 397 training return: tensor(-107.7446, device='cuda:0')
episode: 398 training return: tensor(-105.3456, device='cuda:0')
episode: 399 training return: tensor(-152.1608, device='cuda:0')
epoch: 100 test_true_pfm: 57.61695348125568 sim_pfm: -124.624828734732
episode: 400 training return: tensor(-105.2171, device='cuda:0')
episode: 401 training return: tensor(-56.0346, device='cuda:0')
episode: 402 training return: tensor(-79.5679, device='cuda:0')
episode: 403 training return: tensor(-56.8808, device='cuda:0')
epoch: 101 test_true_pfm: 52.36009837124584 sim_pfm: -175.98452586147468
episode: 404 training return: tensor(-116.6222, device='cuda:0')
episode: 405 training return: tensor(-166.6554, device='cuda:0')
episode: 406 training return: tensor(-185.2925, device='cuda:0')
episode: 407 training return: tensor(-116.5473, device='cuda:0')
epoch: 102 test_true_pfm: 66.55420729478355 sim_pfm: -157.4412006228231
episode: 408 training return: tensor(-188.3075, device='cuda:0')
episode: 409 training return: tensor(-168.0452, device='cuda:0')
episode: 410 training return: tensor(-58.0067, device='cuda:0')
episode: 411 training return: tensor(-98.0577, device='cuda:0')
epoch: 103 test_true_pfm: 55.48232848594064 sim_pfm: -118.82580761598074
episode: 412 training return: tensor(-188.6337, device='cuda:0')
episode: 413 training return: tensor(-82.8971, device='cuda:0')
episode: 414 training return: tensor(-76.9866, device='cuda:0')
episode: 415 training return: tensor(-190.5186, device='cuda:0')
epoch: 104 test_true_pfm: 48.189728266112546 sim_pfm: -121.0725192707323
episode: 416 training return: tensor(-54.5248, device='cuda:0')
episode: 417 training return: tensor(-173.5436, device='cuda:0')
episode: 418 training return: tensor(-163.7521, device='cuda:0')
episode: 419 training return: tensor(-125.8817, device='cuda:0')
epoch: 105 test_true_pfm: 48.26866055360791 sim_pfm: -118.36957070742501
episode: 420 training return: tensor(-130.5157, device='cuda:0')
episode: 421 training return: tensor(-105.4036, device='cuda:0')
episode: 422 training return: tensor(-168.7778, device='cuda:0')
episode: 423 training return: tensor(-129.6971, device='cuda:0')
epoch: 106 test_true_pfm: 64.90497556537089 sim_pfm: -131.10024254337185
episode: 424 training return: tensor(-96.5404, device='cuda:0')
episode: 425 training return: tensor(-168.6845, device='cuda:0')
episode: 426 training return: tensor(-112.6674, device='cuda:0')
episode: 427 training return: tensor(-185.4662, device='cuda:0')
epoch: 107 test_true_pfm: 63.03810447956772 sim_pfm: -105.12024139352143
episode: 428 training return: tensor(-131.2804, device='cuda:0')
episode: 429 training return: tensor(-188.8565, device='cuda:0')
episode: 430 training return: tensor(-58.0507, device='cuda:0')
episode: 431 training return: tensor(-117.9085, device='cuda:0')
epoch: 108 test_true_pfm: 52.20265480477599 sim_pfm: -140.1524744825205
episode: 432 training return: tensor(-191.6990, device='cuda:0')
episode: 433 training return: tensor(-193.1505, device='cuda:0')
episode: 434 training return: tensor(-111.0137, device='cuda:0')
episode: 435 training return: tensor(-62.7511, device='cuda:0')
epoch: 109 test_true_pfm: 64.70900265487268 sim_pfm: -89.42037283748505
episode: 436 training return: tensor(-163.9188, device='cuda:0')
episode: 437 training return: tensor(-136.8521, device='cuda:0')
episode: 438 training return: tensor(-129.6859, device='cuda:0')
episode: 439 training return: tensor(-106.0592, device='cuda:0')
epoch: 110 test_true_pfm: 50.48049596432661 sim_pfm: -92.27326403598417
episode: 440 training return: tensor(-115.8266, device='cuda:0')
episode: 441 training return: tensor(-190.0908, device='cuda:0')
episode: 442 training return: tensor(-102.0270, device='cuda:0')
episode: 443 training return: tensor(-187.2597, device='cuda:0')
epoch: 111 test_true_pfm: 54.416382455783115 sim_pfm: -138.34742993599502
episode: 444 training return: tensor(-118.9009, device='cuda:0')
episode: 445 training return: tensor(-181.0499, device='cuda:0')
episode: 446 training return: tensor(-190.4365, device='cuda:0')
episode: 447 training return: tensor(-109.4577, device='cuda:0')
epoch: 112 test_true_pfm: 50.649042095199384 sim_pfm: -134.72879317353363
episode: 448 training return: tensor(-62.1251, device='cuda:0')
episode: 449 training return: tensor(-110.6496, device='cuda:0')
episode: 450 training return: tensor(-74.7770, device='cuda:0')
episode: 451 training return: tensor(-56.6637, device='cuda:0')
epoch: 113 test_true_pfm: 58.38130988721603 sim_pfm: -115.97678814862157
episode: 452 training return: tensor(-174.7016, device='cuda:0')
episode: 453 training return: tensor(-103.4902, device='cuda:0')
episode: 454 training return: tensor(-95.6227, device='cuda:0')
episode: 455 training return: tensor(-179.9958, device='cuda:0')
epoch: 114 test_true_pfm: 52.85495859259603 sim_pfm: -99.01689409371465
episode: 456 training return: tensor(-104.9336, device='cuda:0')
episode: 457 training return: tensor(-52.6843, device='cuda:0')
episode: 458 training return: tensor(-191.1378, device='cuda:0')
episode: 459 training return: tensor(-99.6922, device='cuda:0')
epoch: 115 test_true_pfm: 50.04412132019489 sim_pfm: -103.91162832531263
episode: 460 training return: tensor(-135.7479, device='cuda:0')
episode: 461 training return: tensor(-46.8876, device='cuda:0')
episode: 462 training return: tensor(-164.0932, device='cuda:0')
episode: 463 training return: tensor(-163.6622, device='cuda:0')
epoch: 116 test_true_pfm: 74.50673071097435 sim_pfm: -77.49015511844773
episode: 464 training return: tensor(-110.1798, device='cuda:0')
episode: 465 training return: tensor(-55.9398, device='cuda:0')
episode: 466 training return: tensor(-90.7615, device='cuda:0')
episode: 467 training return: tensor(-102.7053, device='cuda:0')
epoch: 117 test_true_pfm: 56.067052078538076 sim_pfm: -114.07412120068912
episode: 468 training return: tensor(-43.3007, device='cuda:0')
episode: 469 training return: tensor(-47.2863, device='cuda:0')
episode: 470 training return: tensor(-152.1643, device='cuda:0')
episode: 471 training return: tensor(-64.6387, device='cuda:0')
epoch: 118 test_true_pfm: 58.472949372570156 sim_pfm: -124.13038534825318
episode: 472 training return: tensor(-166.8090, device='cuda:0')
episode: 473 training return: tensor(-182.9467, device='cuda:0')
episode: 474 training return: tensor(-188.2680, device='cuda:0')
episode: 475 training return: tensor(-190.9422, device='cuda:0')
epoch: 119 test_true_pfm: 51.738458683497825 sim_pfm: -87.51455433982774
episode: 476 training return: tensor(-147.9978, device='cuda:0')
episode: 477 training return: tensor(-196.1048, device='cuda:0')
episode: 478 training return: tensor(-186.1250, device='cuda:0')
episode: 479 training return: tensor(-96.7961, device='cuda:0')
epoch: 120 test_true_pfm: 56.778138815390605 sim_pfm: -134.3729403576348
episode: 480 training return: tensor(-110.4937, device='cuda:0')
episode: 481 training return: tensor(-95.0451, device='cuda:0')
episode: 482 training return: tensor(-39.8744, device='cuda:0')
episode: 483 training return: tensor(-154.2577, device='cuda:0')
epoch: 121 test_true_pfm: 51.21652627721753 sim_pfm: -114.74307914067177
episode: 484 training return: tensor(-50.3197, device='cuda:0')
episode: 485 training return: tensor(-34.5989, device='cuda:0')
episode: 486 training return: tensor(-163.2990, device='cuda:0')
episode: 487 training return: tensor(-189.7623, device='cuda:0')
epoch: 122 test_true_pfm: 48.21508895913438 sim_pfm: -154.21241846067133
episode: 488 training return: tensor(-192.7089, device='cuda:0')
episode: 489 training return: tensor(-89.5194, device='cuda:0')
episode: 490 training return: tensor(-190.1843, device='cuda:0')
episode: 491 training return: tensor(-187.5386, device='cuda:0')
epoch: 123 test_true_pfm: 65.93377332597758 sim_pfm: -106.90273522287025
episode: 492 training return: tensor(-189.0095, device='cuda:0')
episode: 493 training return: tensor(-121.6649, device='cuda:0')
episode: 494 training return: tensor(-188.1080, device='cuda:0')
episode: 495 training return: tensor(-111.3651, device='cuda:0')
epoch: 124 test_true_pfm: 66.822797508212 sim_pfm: -163.0058047074941
episode: 496 training return: tensor(-177.0087, device='cuda:0')
episode: 497 training return: tensor(-111.7700, device='cuda:0')
episode: 498 training return: tensor(-181.3978, device='cuda:0')
episode: 499 training return: tensor(-87.8885, device='cuda:0')
epoch: 125 test_true_pfm: 50.051345659733705 sim_pfm: -131.94848259586723
episode: 500 training return: tensor(-179.9317, device='cuda:0')
episode: 501 training return: tensor(-164.2479, device='cuda:0')
episode: 502 training return: tensor(-73.7271, device='cuda:0')
episode: 503 training return: tensor(-118.2432, device='cuda:0')
epoch: 126 test_true_pfm: 56.34785868385745 sim_pfm: -104.07605115926125
episode: 504 training return: tensor(-46.9013, device='cuda:0')
episode: 505 training return: tensor(-129.5360, device='cuda:0')
episode: 506 training return: tensor(-73.4446, device='cuda:0')
episode: 507 training return: tensor(-52.0506, device='cuda:0')
epoch: 127 test_true_pfm: 57.01696704200464 sim_pfm: -94.10888803768322
episode: 508 training return: tensor(-60.0100, device='cuda:0')
episode: 509 training return: tensor(-188.4202, device='cuda:0')
episode: 510 training return: tensor(-163.0124, device='cuda:0')
episode: 511 training return: tensor(-104.9112, device='cuda:0')
epoch: 128 test_true_pfm: 50.162942196475704 sim_pfm: -171.70443893580813
episode: 512 training return: tensor(-190.4535, device='cuda:0')
episode: 513 training return: tensor(-182.7312, device='cuda:0')
episode: 514 training return: tensor(-194.9472, device='cuda:0')
episode: 515 training return: tensor(-190.8796, device='cuda:0')
epoch: 129 test_true_pfm: 58.23563662569846 sim_pfm: -132.25325922980556
episode: 516 training return: tensor(-111.3017, device='cuda:0')
episode: 517 training return: tensor(-128.5192, device='cuda:0')
episode: 518 training return: tensor(-191.5391, device='cuda:0')
episode: 519 training return: tensor(-55.9552, device='cuda:0')
epoch: 130 test_true_pfm: 48.65018622689718 sim_pfm: -146.59961212683703
episode: 520 training return: tensor(-188.3454, device='cuda:0')
episode: 521 training return: tensor(-121.5985, device='cuda:0')
episode: 522 training return: tensor(-43.1438, device='cuda:0')
episode: 523 training return: tensor(-88.8320, device='cuda:0')
epoch: 131 test_true_pfm: 48.53149562782366 sim_pfm: -124.04619278644677
episode: 524 training return: tensor(-146.3271, device='cuda:0')
episode: 525 training return: tensor(-172.0603, device='cuda:0')
episode: 526 training return: tensor(-72.2946, device='cuda:0')
episode: 527 training return: tensor(-195.9159, device='cuda:0')
epoch: 132 test_true_pfm: 49.8987604294643 sim_pfm: -141.4603402056964
episode: 528 training return: tensor(-193.7178, device='cuda:0')
episode: 529 training return: tensor(-187.4967, device='cuda:0')
episode: 530 training return: tensor(-167.0052, device='cuda:0')
episode: 531 training return: tensor(-48.7968, device='cuda:0')
epoch: 133 test_true_pfm: 48.970414597944064 sim_pfm: -137.30643032145454
episode: 532 training return: tensor(-57.0847, device='cuda:0')
episode: 533 training return: tensor(-189.4291, device='cuda:0')
episode: 534 training return: tensor(-109.8817, device='cuda:0')
episode: 535 training return: tensor(-189.1513, device='cuda:0')
epoch: 134 test_true_pfm: 55.639131976674264 sim_pfm: -103.54920991480466
episode: 536 training return: tensor(-95.3618, device='cuda:0')
episode: 537 training return: tensor(-126.3355, device='cuda:0')
episode: 538 training return: tensor(-185.6654, device='cuda:0')
episode: 539 training return: tensor(-178.7297, device='cuda:0')
epoch: 135 test_true_pfm: 50.67167992561968 sim_pfm: -85.3967769601848
episode: 540 training return: tensor(-191.1840, device='cuda:0')
episode: 541 training return: tensor(-190.3923, device='cuda:0')
episode: 542 training return: tensor(-176.8728, device='cuda:0')
episode: 543 training return: tensor(-185.4162, device='cuda:0')
epoch: 136 test_true_pfm: 58.12002909024949 sim_pfm: -121.91853840866824
episode: 544 training return: tensor(-193.0975, device='cuda:0')
episode: 545 training return: tensor(-109.4090, device='cuda:0')
episode: 546 training return: tensor(-176.3535, device='cuda:0')
episode: 547 training return: tensor(-118.1127, device='cuda:0')
epoch: 137 test_true_pfm: 53.16279908056034 sim_pfm: -126.404366243782
episode: 548 training return: tensor(-75.5568, device='cuda:0')
episode: 549 training return: tensor(-125.3639, device='cuda:0')
episode: 550 training return: tensor(-159.2219, device='cuda:0')
episode: 551 training return: tensor(-120.4827, device='cuda:0')
epoch: 138 test_true_pfm: 58.43696686619603 sim_pfm: -150.1026805631118
episode: 552 training return: tensor(-55.3537, device='cuda:0')
episode: 553 training return: tensor(-63.6616, device='cuda:0')
episode: 554 training return: tensor(-51.4520, device='cuda:0')
episode: 555 training return: tensor(-123.7167, device='cuda:0')
epoch: 139 test_true_pfm: 58.5371166948608 sim_pfm: -156.23206883619423
episode: 556 training return: tensor(-119.8584, device='cuda:0')
episode: 557 training return: tensor(-115.6166, device='cuda:0')
episode: 558 training return: tensor(-191.1313, device='cuda:0')
episode: 559 training return: tensor(-124.5208, device='cuda:0')
epoch: 140 test_true_pfm: 50.31763704757354 sim_pfm: -137.661728394276
episode: 560 training return: tensor(-100.3539, device='cuda:0')
episode: 561 training return: tensor(-102.4995, device='cuda:0')
episode: 562 training return: tensor(-107.7553, device='cuda:0')
episode: 563 training return: tensor(-87.1319, device='cuda:0')
epoch: 141 test_true_pfm: 49.91152907940048 sim_pfm: -103.40699925740482
episode: 564 training return: tensor(-179.8085, device='cuda:0')
episode: 565 training return: tensor(-79.5880, device='cuda:0')
episode: 566 training return: tensor(-176.1373, device='cuda:0')
episode: 567 training return: tensor(-174.9037, device='cuda:0')
epoch: 142 test_true_pfm: 52.34781484860552 sim_pfm: -157.9522476530983
episode: 568 training return: tensor(-190.9002, device='cuda:0')
episode: 569 training return: tensor(-92.3047, device='cuda:0')
episode: 570 training return: tensor(-117.0515, device='cuda:0')
episode: 571 training return: tensor(-92.8831, device='cuda:0')
epoch: 143 test_true_pfm: 55.90832918137098 sim_pfm: -156.5268830437446
episode: 572 training return: tensor(-128.6487, device='cuda:0')
episode: 573 training return: tensor(-69.4260, device='cuda:0')
episode: 574 training return: tensor(-106.5198, device='cuda:0')
episode: 575 training return: tensor(-116.5858, device='cuda:0')
epoch: 144 test_true_pfm: 51.47103234223191 sim_pfm: -116.40577310794033
episode: 576 training return: tensor(-121.9799, device='cuda:0')
episode: 577 training return: tensor(-189.9985, device='cuda:0')
episode: 578 training return: tensor(-192.4809, device='cuda:0')
episode: 579 training return: tensor(-153.4988, device='cuda:0')
epoch: 145 test_true_pfm: 56.36942549707894 sim_pfm: -150.32283006111393
episode: 580 training return: tensor(-192.5947, device='cuda:0')
episode: 581 training return: tensor(-107.3803, device='cuda:0')
episode: 582 training return: tensor(-189.0558, device='cuda:0')
episode: 583 training return: tensor(-106.0115, device='cuda:0')
epoch: 146 test_true_pfm: 63.221762396051226 sim_pfm: -99.39230438946397
episode: 584 training return: tensor(-186.8288, device='cuda:0')
episode: 585 training return: tensor(-81.6389, device='cuda:0')
episode: 586 training return: tensor(-103.6128, device='cuda:0')
episode: 587 training return: tensor(-105.9242, device='cuda:0')
epoch: 147 test_true_pfm: 51.30445773145605 sim_pfm: -146.37261511670076
episode: 588 training return: tensor(-104.3327, device='cuda:0')
episode: 589 training return: tensor(-181.0248, device='cuda:0')
episode: 590 training return: tensor(-116.8390, device='cuda:0')
episode: 591 training return: tensor(-186.3690, device='cuda:0')
epoch: 148 test_true_pfm: 51.21913287233042 sim_pfm: -129.99390405946178
episode: 592 training return: tensor(-183.7060, device='cuda:0')
episode: 593 training return: tensor(-139.3529, device='cuda:0')
episode: 594 training return: tensor(-185.4088, device='cuda:0')
episode: 595 training return: tensor(-110.7759, device='cuda:0')
epoch: 149 test_true_pfm: 62.953226790644905 sim_pfm: -148.8582869500213
episode: 596 training return: tensor(-99.9146, device='cuda:0')
episode: 597 training return: tensor(-87.2962, device='cuda:0')
episode: 598 training return: tensor(-186.5954, device='cuda:0')
episode: 599 training return: tensor(-122.0875, device='cuda:0')
epoch: 150 test_true_pfm: 65.87100638316137 sim_pfm: -110.2871085245395
