['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '3', '--data', '30000']
epoch: 0 training_loss 0.26135852947831156 test_loss: 0.19238176345825195
epoch: 1 training_loss 0.20618604734539986 test_loss: 0.19748367071151735
epoch: 2 training_loss 0.19804325014352797 test_loss: 0.19781478643417358
epoch: 3 training_loss 0.20100138396024703 test_loss: 0.16757189035415648
epoch: 4 training_loss 0.1963395519554615 test_loss: 0.19366191625595092
epoch: 5 training_loss 0.18594453915953635 test_loss: 0.1825830817222595
epoch: 6 training_loss 0.1964032020419836 test_loss: 0.19225393533706664
epoch: 7 training_loss 0.19476122342050076 test_loss: 0.1856366515159607
epoch: 8 training_loss 0.18657714433968067 test_loss: 0.1695164442062378
epoch: 9 training_loss 0.1881216311454773 test_loss: 0.18827965259552001
epoch: 10 training_loss 0.18494718395173548 test_loss: 0.17628650665283202
epoch: 11 training_loss 0.18693913348019123 test_loss: 0.18330979347229004
epoch: 12 training_loss 0.18160763680934905 test_loss: 0.18836926221847533
epoch: 13 training_loss 0.19589498922228812 test_loss: 0.20214252471923827
epoch: 14 training_loss 0.19429508075118065 test_loss: 0.1803386092185974
epoch: 15 training_loss 0.19183679960668087 test_loss: 0.19259357452392578
epoch: 16 training_loss 0.18362579472362994 test_loss: 0.1879199981689453
epoch: 17 training_loss 0.1796649807691574 test_loss: 0.19398131370544433
epoch: 18 training_loss 0.18392811551690103 test_loss: 0.16691116094589234
epoch: 19 training_loss 0.1862506377696991 test_loss: 0.18018655776977538
epoch: 20 training_loss 0.18074407897889613 test_loss: 0.16985427141189574
epoch: 21 training_loss 0.18195431619882585 test_loss: 0.17206523418426514
epoch: 22 training_loss 0.1860021125525236 test_loss: 0.16295653581619263
epoch: 23 training_loss 0.18265795081853867 test_loss: 0.18198068141937257
epoch: 24 training_loss 0.18133624717593194 test_loss: 0.17160370349884033
epoch: 25 training_loss 0.18652143701910973 test_loss: 0.18225799798965453
epoch: 26 training_loss 0.18147451177239418 test_loss: 0.18239961862564086
epoch: 27 training_loss 0.18470193281769753 test_loss: 0.16791222095489503
epoch: 28 training_loss 0.18875535242259503 test_loss: 0.1832061767578125
epoch: 29 training_loss 0.1819705767929554 test_loss: 0.1842939019203186
epoch: 30 training_loss 0.18147574618458748 test_loss: 0.17822504043579102
epoch: 31 training_loss 0.173524172604084 test_loss: 0.16742295026779175
epoch: 32 training_loss 0.1848489109426737 test_loss: 0.17460265159606933
epoch: 33 training_loss 0.1867090103775263 test_loss: 0.16331495046615602
epoch: 34 training_loss 0.18696369215846062 test_loss: 0.17359368801116942
epoch: 35 training_loss 0.17661755658686162 test_loss: 0.17612937688827515
epoch: 36 training_loss 0.17389560736715792 test_loss: 0.16758679151535033
epoch: 37 training_loss 0.18636586353182794 test_loss: 0.17157663106918336
epoch: 38 training_loss 0.1869124612212181 test_loss: 0.18232908248901367
epoch: 39 training_loss 0.18114921741187573 test_loss: 0.1777483344078064
epoch: 40 training_loss 0.1864701735973358 test_loss: 0.17594257593154908
epoch: 41 training_loss 0.1852347218245268 test_loss: 0.1771836757659912
epoch: 42 training_loss 0.1789423831552267 test_loss: 0.16062866449356078
epoch: 43 training_loss 0.1840944718569517 test_loss: 0.1669771671295166
epoch: 44 training_loss 0.1908703887462616 test_loss: 0.16631323099136353
epoch: 45 training_loss 0.17711799658834934 test_loss: 0.1711820602416992
epoch: 46 training_loss 0.1775948613882065 test_loss: 0.1786954164505005
epoch: 47 training_loss 0.17846246756613254 test_loss: 0.16826193332672118
epoch: 48 training_loss 0.17755479879677297 test_loss: 0.17016633749008178
epoch: 49 training_loss 0.17892224505543708 test_loss: 0.16968497037887573
epoch: 50 training_loss 0.18062944933772088 test_loss: 0.17709200382232665
epoch: 51 training_loss 0.18043088100850582 test_loss: 0.17529771327972413
epoch: 52 training_loss 0.16916764318943023 test_loss: 0.18412820100784302
epoch: 53 training_loss 0.18253169588744642 test_loss: 0.17654902935028077
epoch: 54 training_loss 0.18156196683645248 test_loss: 0.17503480911254882
epoch: 55 training_loss 0.18546124398708344 test_loss: 0.18509613275527953
epoch: 56 training_loss 0.18480961091816425 test_loss: 0.17715206146240234
epoch: 57 training_loss 0.1840806119889021 test_loss: 0.17456036806106567
epoch: 58 training_loss 0.1908626589924097 test_loss: 0.17817745208740235
epoch: 59 training_loss 0.17351783387362957 test_loss: 0.1727040410041809
epoch: 60 training_loss 0.19137368932366372 test_loss: 0.17507014274597169
epoch: 61 training_loss 0.17472390726208686 test_loss: 0.1745609998703003
epoch: 62 training_loss 0.1860191686451435 test_loss: 0.17837905883789062
epoch: 63 training_loss 0.19102049492299555 test_loss: 0.1697414517402649
epoch: 64 training_loss 0.18469124637544154 test_loss: 0.17125208377838136
epoch: 65 training_loss 0.18142080426216126 test_loss: 0.16485772132873536
epoch: 66 training_loss 0.17920695044100285 test_loss: 0.18791214227676392
epoch: 67 training_loss 0.18170221045613288 test_loss: 0.18649340867996217
epoch: 68 training_loss 0.18111265979707242 test_loss: 0.19499545097351073
epoch: 69 training_loss 0.1837082551419735 test_loss: 0.16894513368606567
epoch: 70 training_loss 0.1756858528405428 test_loss: 0.18020415306091309
epoch: 71 training_loss 0.1848191147297621 test_loss: 0.17406467199325562
epoch: 72 training_loss 0.17893453404307366 test_loss: 0.16681818962097167
epoch: 73 training_loss 0.1814750649780035 test_loss: 0.17213386297225952
epoch: 74 training_loss 0.183751934915781 test_loss: 0.1828633427619934
epoch: 75 training_loss 0.1794916331022978 test_loss: 0.1893907070159912
epoch: 76 training_loss 0.18224749602377416 test_loss: 0.16781014204025269
epoch: 77 training_loss 0.1780588489025831 test_loss: 0.17261050939559935
epoch: 78 training_loss 0.18715742506086827 test_loss: 0.17576290369033815
epoch: 79 training_loss 0.1885911376774311 test_loss: 0.1822258472442627
epoch: 80 training_loss 0.17146695397794245 test_loss: 0.17799841165542601
epoch: 81 training_loss 0.1688840651512146 test_loss: 0.19278168678283691
epoch: 82 training_loss 0.18726761072874068 test_loss: 0.16149063110351564
epoch: 83 training_loss 0.179564550742507 test_loss: 0.14433345794677735
epoch: 84 training_loss 0.17685451716184616 test_loss: 0.15116019248962403
epoch: 85 training_loss 0.18420026861131192 test_loss: 0.16686060428619384
epoch: 86 training_loss 0.1802809623628855 test_loss: 0.18074498176574708
epoch: 87 training_loss 0.1815807718038559 test_loss: 0.17740567922592163
epoch: 88 training_loss 0.17142443850636482 test_loss: 0.172735333442688
epoch: 89 training_loss 0.18390978299081326 test_loss: 0.16887496709823607
epoch: 90 training_loss 0.17807496815919877 test_loss: 0.1760211706161499
epoch: 91 training_loss 0.1784244041144848 test_loss: 0.16236087083816528
epoch: 92 training_loss 0.17521635495126248 test_loss: 0.16849859952926635
epoch: 93 training_loss 0.17963946014642715 test_loss: 0.17278919219970704
epoch: 94 training_loss 0.17596804916858674 test_loss: 0.18154866695404054
epoch: 95 training_loss 0.17387825656682254 test_loss: 0.17088806629180908
epoch: 96 training_loss 0.17686920017004013 test_loss: 0.18155722618103026
epoch: 97 training_loss 0.1815398121625185 test_loss: 0.17079203128814696
epoch: 98 training_loss 0.17625391118228437 test_loss: 0.17859019041061402
epoch: 99 training_loss 0.17601704590022563 test_loss: 0.16903045177459716
epoch: 100 training_loss 0.18118863563984633 test_loss: 0.17528923749923705
epoch: 101 training_loss 0.17310621589422226 test_loss: 0.17072001695632935
epoch: 102 training_loss 0.1741391398012638 test_loss: 0.17686439752578736
epoch: 103 training_loss 0.18028949536383151 test_loss: 0.16404385566711427
epoch: 104 training_loss 0.1830599981546402 test_loss: 0.1639864683151245
epoch: 105 training_loss 0.1834793856739998 test_loss: 0.1697826623916626
epoch: 106 training_loss 0.175993193089962 test_loss: 0.17841737270355223
epoch: 107 training_loss 0.175763056576252 test_loss: 0.1864824891090393
epoch: 108 training_loss 0.180834421440959 test_loss: 0.18209309577941896
epoch: 109 training_loss 0.17310973584651948 test_loss: 0.17397255897521974
epoch: 110 training_loss 0.18071968112140893 test_loss: 0.17953965663909913
epoch: 111 training_loss 0.17523437328636646 test_loss: 0.1658324956893921
epoch: 112 training_loss 0.18169539615511895 test_loss: 0.18411331176757811
epoch: 113 training_loss 0.17698101215064527 test_loss: 0.1729931950569153
epoch: 114 training_loss 0.18092326276004314 test_loss: 0.16661472320556642
epoch: 115 training_loss 0.17093137189745902 test_loss: 0.1543351173400879
epoch: 116 training_loss 0.17529538691043853 test_loss: 0.1667860507965088
epoch: 117 training_loss 0.18323402523994445 test_loss: 0.16414120197296142
epoch: 118 training_loss 0.1853621143847704 test_loss: 0.1752341628074646
epoch: 119 training_loss 0.1726655776798725 test_loss: 0.18213505744934083
epoch: 120 training_loss 0.18300087995827197 test_loss: 0.17703961133956908
epoch: 121 training_loss 0.18060865268111229 test_loss: 0.18267892599105834
epoch: 122 training_loss 0.1837540778517723 test_loss: 0.18007947206497193
epoch: 123 training_loss 0.17991822823882103 test_loss: 0.1770650863647461
epoch: 124 training_loss 0.17745892688632012 test_loss: 0.17406595945358277
epoch: 125 training_loss 0.18190899305045605 test_loss: 0.16879323720932007
epoch: 126 training_loss 0.17414042126387358 test_loss: 0.1711665630340576
epoch: 127 training_loss 0.1798630154132843 test_loss: 0.17011572122573854
epoch: 128 training_loss 0.17610988207161427 test_loss: 0.1603040337562561
epoch: 129 training_loss 0.17810546718537806 test_loss: 0.16739641427993773
epoch: 130 training_loss 0.1775827896595001 test_loss: 0.16993991136550904
epoch: 131 training_loss 0.16556582175195217 test_loss: 0.17623070478439332
epoch: 132 training_loss 0.1796894486248493 test_loss: 0.1958809494972229
epoch: 133 training_loss 0.1792627378553152 test_loss: 0.17918155193328858
epoch: 134 training_loss 0.1737720289081335 test_loss: 0.1935502052307129
epoch: 135 training_loss 0.18160738475620747 test_loss: 0.1775166392326355
epoch: 136 training_loss 0.17769102238118648 test_loss: 0.17245422601699828
epoch: 137 training_loss 0.18403102718293668 test_loss: 0.16961398124694824
epoch: 138 training_loss 0.1801600930094719 test_loss: 0.16830543279647828
epoch: 139 training_loss 0.16645689122378826 test_loss: 0.18468403816223145
epoch: 140 training_loss 0.17544814050197602 test_loss: 0.18043117523193358
epoch: 141 training_loss 0.1774009708315134 test_loss: 0.16753357648849487
epoch: 142 training_loss 0.17081148110330105 test_loss: 0.1840362310409546
epoch: 143 training_loss 0.17289629817008972 test_loss: 0.16138904094696044
epoch: 144 training_loss 0.17578674875199796 test_loss: 0.1643385648727417
epoch: 145 training_loss 0.1809915465861559 test_loss: 0.16927430629730225
epoch: 146 training_loss 0.1743490970134735 test_loss: 0.17621896266937256
epoch: 147 training_loss 0.18305014677345752 test_loss: 0.1685331106185913
epoch: 148 training_loss 0.18055159702897072 test_loss: 0.18616949319839476
epoch: 149 training_loss 0.1833053383231163 test_loss: 0.17312625646591187
epoch: 0 training_loss 8.06294180393219 test_loss: 4.594101333618164
epoch: 1 training_loss 3.6885120105743407 test_loss: 3.03171329498291
epoch: 2 training_loss 2.646680245399475 test_loss: 2.3175775527954103
epoch: 3 training_loss 2.153325556516647 test_loss: 2.0180856704711916
epoch: 4 training_loss 1.8588883793354034 test_loss: 1.7738985061645507
epoch: 5 training_loss 1.6803514277935028 test_loss: 1.6033058166503906
epoch: 6 training_loss 1.5362772977352142 test_loss: 1.4775548934936524
epoch: 7 training_loss 1.4412703561782836 test_loss: 1.4456543922424316
epoch: 8 training_loss 1.3647064113616942 test_loss: 1.3153246879577636
epoch: 9 training_loss 1.2804550218582154 test_loss: 1.2498770713806153
epoch: 10 training_loss 1.2139324057102203 test_loss: 1.22955322265625
epoch: 11 training_loss 1.1802240312099457 test_loss: 1.143747901916504
epoch: 12 training_loss 1.1291011488437652 test_loss: 1.1335161209106446
epoch: 13 training_loss 1.070580080151558 test_loss: 1.1093254089355469
epoch: 14 training_loss 1.0602734768390656 test_loss: 1.0311559677124023
epoch: 15 training_loss 1.017602540254593 test_loss: 1.019685935974121
epoch: 16 training_loss 1.0126069682836532 test_loss: 1.0326613426208495
epoch: 17 training_loss 0.9583258652687072 test_loss: 0.9441743850708008
epoch: 18 training_loss 0.9373416167497635 test_loss: 0.9232197761535644
epoch: 19 training_loss 0.9243908876180649 test_loss: 0.9292740821838379
epoch: 20 training_loss 0.9049812519550323 test_loss: 0.9145941734313965
epoch: 21 training_loss 0.8842319142818451 test_loss: 0.9153698921203614
epoch: 22 training_loss 0.8527625155448914 test_loss: 0.8613160133361817
epoch: 23 training_loss 0.84934605717659 test_loss: 0.853095531463623
epoch: 24 training_loss 0.8373967915773392 test_loss: 0.852181339263916
epoch: 25 training_loss 0.8253931021690368 test_loss: 0.8153666496276856
epoch: 26 training_loss 0.809646663069725 test_loss: 0.8077446937561035
epoch: 27 training_loss 0.8054671156406402 test_loss: 0.7965622901916504
epoch: 28 training_loss 0.7938233095407486 test_loss: 0.8324822425842285
epoch: 29 training_loss 0.7796629971265793 test_loss: 0.7937441825866699
epoch: 30 training_loss 0.7761668413877487 test_loss: 0.7599403381347656
epoch: 31 training_loss 0.7601411473751069 test_loss: 0.7672296524047851
epoch: 32 training_loss 0.750024054646492 test_loss: 0.7395169258117675
epoch: 33 training_loss 0.7421655023097992 test_loss: 0.7483837127685546
epoch: 34 training_loss 0.7358387613296509 test_loss: 0.75473051071167
epoch: 35 training_loss 0.7373566740751266 test_loss: 0.7407728672027588
epoch: 36 training_loss 0.7166794282197952 test_loss: 0.7250836849212646
epoch: 37 training_loss 0.7157963246107102 test_loss: 0.725197696685791
epoch: 38 training_loss 0.7184240198135377 test_loss: 0.7200716018676758
epoch: 39 training_loss 0.7081394964456558 test_loss: 0.7292399406433105
epoch: 40 training_loss 0.7218098616600037 test_loss: 0.6934463500976562
epoch: 41 training_loss 0.6903317898511887 test_loss: 0.7091281890869141
epoch: 42 training_loss 0.6868941271305085 test_loss: 0.7080860614776612
epoch: 43 training_loss 0.6862362408638001 test_loss: 0.6990399360656738
epoch: 44 training_loss 0.6725029337406159 test_loss: 0.6789291381835938
epoch: 45 training_loss 0.6725886857509613 test_loss: 0.6822510242462159
epoch: 46 training_loss 0.669643161892891 test_loss: 0.6652897834777832
epoch: 47 training_loss 0.6751573985815048 test_loss: 0.6633501529693604
epoch: 48 training_loss 0.6691690129041672 test_loss: 0.6668427944183349
epoch: 49 training_loss 0.6609738510847092 test_loss: 0.6684846401214599
epoch: 50 training_loss 0.6466408246755599 test_loss: 0.6905364990234375
epoch: 51 training_loss 0.648982127904892 test_loss: 0.6556715488433837
epoch: 52 training_loss 0.6410057628154755 test_loss: 0.6436773777008057
epoch: 53 training_loss 0.6391947972774505 test_loss: 0.6618637561798095
epoch: 54 training_loss 0.6496384060382843 test_loss: 0.6823346138000488
epoch: 55 training_loss 0.641243526339531 test_loss: 0.6288913249969482
epoch: 56 training_loss 0.6330781006813049 test_loss: 0.6300716876983643
epoch: 57 training_loss 0.6256938689947128 test_loss: 0.6458534717559814
epoch: 58 training_loss 0.6252716308832169 test_loss: 0.6355602264404296
epoch: 59 training_loss 0.6317954421043396 test_loss: 0.6311745166778564
epoch: 60 training_loss 0.6139380306005477 test_loss: 0.6097578048706055
epoch: 61 training_loss 0.6070958018302918 test_loss: 0.6163098335266113
epoch: 62 training_loss 0.6041010558605194 test_loss: 0.6116490840911866
epoch: 63 training_loss 0.6073407220840454 test_loss: 0.616706371307373
epoch: 64 training_loss 0.6218453770875931 test_loss: 0.6057363510131836
epoch: 65 training_loss 0.6250224781036376 test_loss: 0.6410592555999756
epoch: 66 training_loss 0.6074607014656067 test_loss: 0.6120468139648437
epoch: 67 training_loss 0.604583985209465 test_loss: 0.5894598960876465
epoch: 68 training_loss 0.5993121898174286 test_loss: 0.6072101593017578
epoch: 69 training_loss 0.6006170004606247 test_loss: 0.6211450099945068
epoch: 70 training_loss 0.5905838692188263 test_loss: 0.5995983600616455
epoch: 71 training_loss 0.5900397849082947 test_loss: 0.5750819206237793
epoch: 72 training_loss 0.5998065346479415 test_loss: 0.5832910060882568
epoch: 73 training_loss 0.5888972997665405 test_loss: 0.6238755702972412
epoch: 74 training_loss 0.5825581187009812 test_loss: 0.5966565132141113
epoch: 75 training_loss 0.5815632390975952 test_loss: 0.6107739448547364
epoch: 76 training_loss 0.5882979339361191 test_loss: 0.6154035091400146
epoch: 77 training_loss 0.5909450322389602 test_loss: 0.5844852447509765
epoch: 78 training_loss 0.5751611879467964 test_loss: 0.5732890605926514
epoch: 79 training_loss 0.5704462277889252 test_loss: 0.5778346538543702
epoch: 80 training_loss 0.5727930021286011 test_loss: 0.6121142864227295
epoch: 81 training_loss 0.576278617978096 test_loss: 0.5837994575500488
epoch: 82 training_loss 0.5621843510866165 test_loss: 0.5780002117156983
epoch: 83 training_loss 0.5651074028015137 test_loss: 0.5696064949035644
epoch: 84 training_loss 0.5571279150247573 test_loss: 0.5763459205627441
epoch: 85 training_loss 0.5627077674865723 test_loss: 0.5502610683441163
epoch: 86 training_loss 0.5560221046209335 test_loss: 0.5504979610443115
epoch: 87 training_loss 0.5560980555415154 test_loss: 0.5730375289916992
epoch: 88 training_loss 0.5617937806248665 test_loss: 0.5588728427886963
epoch: 89 training_loss 0.5733985131978989 test_loss: 0.5779719352722168
epoch: 90 training_loss 0.5625893062353134 test_loss: 0.5437098503112793
epoch: 91 training_loss 0.557025456726551 test_loss: 0.5421351909637451
epoch: 92 training_loss 0.5541165962815284 test_loss: 0.5537737846374512
epoch: 93 training_loss 0.5490342274308204 test_loss: 0.5685694694519043
epoch: 94 training_loss 0.5577798599004745 test_loss: 0.5556387424468994
epoch: 95 training_loss 0.5424745565652848 test_loss: 0.5404767036437989
epoch: 96 training_loss 0.5469713941216469 test_loss: 0.5753117084503174
epoch: 97 training_loss 0.5415916657447815 test_loss: 0.5616065979003906
epoch: 98 training_loss 0.5443687447905541 test_loss: 0.5501798629760742
epoch: 99 training_loss 0.5462042930722236 test_loss: 0.6068124771118164
epoch: 100 training_loss 0.5576195877790451 test_loss: 0.5609650611877441
epoch: 101 training_loss 0.5363795840740204 test_loss: 0.5492301940917969
epoch: 102 training_loss 0.5295341753959656 test_loss: 0.5312043190002441
epoch: 103 training_loss 0.5484968167543411 test_loss: 0.5752145290374756
epoch: 104 training_loss 0.5358101272583008 test_loss: 0.5523271560668945
epoch: 105 training_loss 0.5491680985689164 test_loss: 0.5452318668365479
epoch: 106 training_loss 0.54423638433218 test_loss: 0.52577805519104
epoch: 107 training_loss 0.5282620245218277 test_loss: 0.5279587268829345
epoch: 108 training_loss 0.5215376955270767 test_loss: 0.549161434173584
epoch: 109 training_loss 0.5251219582557678 test_loss: 0.5247477531433106
epoch: 110 training_loss 0.5330115252733231 test_loss: 0.5647634506225586
epoch: 111 training_loss 0.5270168018341065 test_loss: 0.5304337501525879
epoch: 112 training_loss 0.5278838190436363 test_loss: 0.5765135765075684
epoch: 113 training_loss 0.5336985149979592 test_loss: 0.5442895412445068
epoch: 114 training_loss 0.5301608288288117 test_loss: 0.5393265724182129
epoch: 115 training_loss 0.5287088760733605 test_loss: 0.5405037879943848
epoch: 116 training_loss 0.528025948703289 test_loss: 0.537672472000122
epoch: 117 training_loss 0.5194716310501098 test_loss: 0.5291347980499268
epoch: 118 training_loss 0.5225673776865005 test_loss: 0.5309386253356934
epoch: 119 training_loss 0.5216049107909203 test_loss: 0.5361488342285157
epoch: 120 training_loss 0.5194035598635673 test_loss: 0.5270933151245117
epoch: 121 training_loss 0.5308955132961273 test_loss: 0.5227504253387452
epoch: 122 training_loss 0.5150212800502777 test_loss: 0.5287572860717773
epoch: 123 training_loss 0.5081946372985839 test_loss: 0.510561752319336
epoch: 124 training_loss 0.5184854927659035 test_loss: 0.5516212463378907
epoch: 125 training_loss 0.5185259893536568 test_loss: 0.5205324649810791
epoch: 126 training_loss 0.514283284842968 test_loss: 0.5180408477783203
epoch: 127 training_loss 0.5086033925414085 test_loss: 0.532125186920166
epoch: 128 training_loss 0.5121746295690537 test_loss: 0.5140624523162842
epoch: 129 training_loss 0.5083525735139847 test_loss: 0.5004682540893555
epoch: 130 training_loss 0.5199540078639984 test_loss: 0.5514856815338135
epoch: 131 training_loss 0.5119350546598435 test_loss: 0.5028604507446289
epoch: 132 training_loss 0.5098970088362694 test_loss: 0.5076848983764648
epoch: 133 training_loss 0.5162355896830558 test_loss: 0.5243109703063965
epoch: 134 training_loss 0.4983501783013344 test_loss: 0.5380992412567138
epoch: 135 training_loss 0.498865168094635 test_loss: 0.5148220062255859
epoch: 136 training_loss 0.5035288274288178 test_loss: 0.5000175952911377
epoch: 137 training_loss 0.5056971117854119 test_loss: 0.5016470432281495
epoch: 138 training_loss 0.5042348918318749 test_loss: 0.5127007007598877
epoch: 139 training_loss 0.5163657057285309 test_loss: 0.5232090950012207
epoch: 140 training_loss 0.5009451669454574 test_loss: 0.49756550788879395
epoch: 141 training_loss 0.5117101696133614 test_loss: 0.5015218734741211
epoch: 142 training_loss 0.49627677977085116 test_loss: 0.508320426940918
epoch: 143 training_loss 0.4949109768867493 test_loss: 0.5204653263092041
epoch: 144 training_loss 0.5080954119563103 test_loss: 0.5003361225128173
epoch: 145 training_loss 0.49701877623796464 test_loss: 0.4932608127593994
epoch: 146 training_loss 0.5079069888591766 test_loss: 0.5295709609985352
epoch: 147 training_loss 0.4936823117733002 test_loss: 0.5040150165557862
epoch: 148 training_loss 0.5020777091383934 test_loss: 0.4912885665893555
epoch: 149 training_loss 0.49800404369831086 test_loss: 0.5168097019195557
2505.2837913203393
episode: 0 training return: tensor(-269.4321, device='cuda:0')
episode: 1 training return: tensor(115.5708, device='cuda:0')
episode: 2 training return: tensor(168.9229, device='cuda:0')
episode: 3 training return: tensor(-273.0660, device='cuda:0')
epoch: 1 test_true_pfm: 2473.8362631771474 sim_pfm: -83.93438969063573
episode: 4 training return: tensor(-298.3530, device='cuda:0')
episode: 5 training return: tensor(-352.7614, device='cuda:0')
episode: 6 training return: tensor(-262.5042, device='cuda:0')
episode: 7 training return: tensor(-303.8086, device='cuda:0')
epoch: 2 test_true_pfm: 1406.8464071684775 sim_pfm: -311.70716363928904
episode: 8 training return: tensor(225.8706, device='cuda:0')
episode: 9 training return: tensor(-309.4153, device='cuda:0')
episode: 10 training return: tensor(-345.7311, device='cuda:0')
episode: 11 training return: tensor(-250.9323, device='cuda:0')
epoch: 3 test_true_pfm: 1354.6947945548793 sim_pfm: -345.12189254302456
episode: 12 training return: tensor(-305.4317, device='cuda:0')
episode: 13 training return: tensor(-353.4300, device='cuda:0')
episode: 14 training return: tensor(-55.7763, device='cuda:0')
episode: 15 training return: tensor(-251.7794, device='cuda:0')
epoch: 4 test_true_pfm: 2207.5717028897075 sim_pfm: 12.888764203254444
episode: 16 training return: tensor(-366.0486, device='cuda:0')
episode: 17 training return: tensor(-354.1910, device='cuda:0')
episode: 18 training return: tensor(-364.3958, device='cuda:0')
episode: 19 training return: tensor(263.0403, device='cuda:0')
epoch: 5 test_true_pfm: 1628.9486403193648 sim_pfm: -127.53371777323385
episode: 20 training return: tensor(-334.4466, device='cuda:0')
episode: 21 training return: tensor(-221.6221, device='cuda:0')
episode: 22 training return: tensor(-359.8866, device='cuda:0')
episode: 23 training return: tensor(-312.6789, device='cuda:0')
epoch: 6 test_true_pfm: 2586.429398673499 sim_pfm: 24.640071345299173
episode: 24 training return: tensor(-357.4253, device='cuda:0')
episode: 25 training return: tensor(-107.1407, device='cuda:0')
episode: 26 training return: tensor(-126.3529, device='cuda:0')
episode: 27 training return: tensor(-369.8098, device='cuda:0')
epoch: 7 test_true_pfm: 2143.6589961310015 sim_pfm: 136.8795218662514
episode: 28 training return: tensor(121.5462, device='cuda:0')
episode: 29 training return: tensor(-237.9347, device='cuda:0')
episode: 30 training return: tensor(-225.5940, device='cuda:0')
episode: 31 training return: tensor(-223.9103, device='cuda:0')
epoch: 8 test_true_pfm: 2665.6491708931244 sim_pfm: 48.769325716391904
episode: 32 training return: tensor(75.3183, device='cuda:0')
episode: 33 training return: tensor(-70.6850, device='cuda:0')
episode: 34 training return: tensor(-106.7515, device='cuda:0')
episode: 35 training return: tensor(-370.0412, device='cuda:0')
epoch: 9 test_true_pfm: 2341.16958941748 sim_pfm: -91.28918810080116
episode: 36 training return: tensor(-362.2671, device='cuda:0')
episode: 37 training return: tensor(-125.8525, device='cuda:0')
episode: 38 training return: tensor(-199.6525, device='cuda:0')
episode: 39 training return: tensor(-220.0991, device='cuda:0')
epoch: 10 test_true_pfm: 2542.5625867614485 sim_pfm: 163.47499691372892
episode: 40 training return: tensor(-121.9286, device='cuda:0')
episode: 41 training return: tensor(-219.0585, device='cuda:0')
episode: 42 training return: tensor(-360.7880, device='cuda:0')
episode: 43 training return: tensor(-233.1768, device='cuda:0')
epoch: 11 test_true_pfm: 2044.916440102527 sim_pfm: -233.7127722381459
episode: 44 training return: tensor(-201.1742, device='cuda:0')
episode: 45 training return: tensor(-217.4332, device='cuda:0')
episode: 46 training return: tensor(-215.5369, device='cuda:0')
episode: 47 training return: tensor(209.9566, device='cuda:0')
epoch: 12 test_true_pfm: 2214.68782923729 sim_pfm: -184.42413022524366
episode: 48 training return: tensor(-363.8865, device='cuda:0')
episode: 49 training return: tensor(-329.1227, device='cuda:0')
episode: 50 training return: tensor(-332.5986, device='cuda:0')
episode: 51 training return: tensor(-199.7709, device='cuda:0')
epoch: 13 test_true_pfm: 2450.6854358224664 sim_pfm: -40.81190598783238
episode: 52 training return: tensor(-315.6462, device='cuda:0')
episode: 53 training return: tensor(-232.2417, device='cuda:0')
episode: 54 training return: tensor(-76.5192, device='cuda:0')
episode: 55 training return: tensor(50.7807, device='cuda:0')
epoch: 14 test_true_pfm: 1585.502943163252 sim_pfm: -33.007375063103005
episode: 56 training return: tensor(133.9357, device='cuda:0')
episode: 57 training return: tensor(-120.5118, device='cuda:0')
episode: 58 training return: tensor(-309.6243, device='cuda:0')
episode: 59 training return: tensor(-221.3494, device='cuda:0')
epoch: 15 test_true_pfm: 2420.429471625754 sim_pfm: -59.725046457004886
episode: 60 training return: tensor(-333.6607, device='cuda:0')
episode: 61 training return: tensor(-173.4441, device='cuda:0')
episode: 62 training return: tensor(-264.6416, device='cuda:0')
episode: 63 training return: tensor(-324.5842, device='cuda:0')
epoch: 16 test_true_pfm: 1687.4827801732054 sim_pfm: -229.3678692095758
episode: 64 training return: tensor(-47.8436, device='cuda:0')
episode: 65 training return: tensor(-234.4594, device='cuda:0')
episode: 66 training return: tensor(271.6021, device='cuda:0')
episode: 67 training return: tensor(-226.2115, device='cuda:0')
epoch: 17 test_true_pfm: 1595.1500745784372 sim_pfm: 58.8008756521837
episode: 68 training return: tensor(-221.1624, device='cuda:0')
episode: 69 training return: tensor(-52.8157, device='cuda:0')
episode: 70 training return: tensor(-228.0660, device='cuda:0')
episode: 71 training return: tensor(-376.1949, device='cuda:0')
epoch: 18 test_true_pfm: 1729.1376067523368 sim_pfm: -165.36300223468183
episode: 72 training return: tensor(-293.1144, device='cuda:0')
episode: 73 training return: tensor(-91.5661, device='cuda:0')
episode: 74 training return: tensor(219.4379, device='cuda:0')
episode: 75 training return: tensor(-270.4932, device='cuda:0')
epoch: 19 test_true_pfm: 1687.6523397890794 sim_pfm: -19.659139481256716
episode: 76 training return: tensor(-317.9103, device='cuda:0')
episode: 77 training return: tensor(-207.4556, device='cuda:0')
episode: 78 training return: tensor(-341.9072, device='cuda:0')
episode: 79 training return: tensor(-280.1114, device='cuda:0')
epoch: 20 test_true_pfm: 1593.7598168099782 sim_pfm: -235.6724127124568
episode: 80 training return: tensor(-220.8651, device='cuda:0')
episode: 81 training return: tensor(-118.5952, device='cuda:0')
episode: 82 training return: tensor(-338.4245, device='cuda:0')
episode: 83 training return: tensor(-155.3634, device='cuda:0')
epoch: 21 test_true_pfm: 2255.6054639297186 sim_pfm: -145.1752947779993
episode: 84 training return: tensor(-284.8352, device='cuda:0')
episode: 85 training return: tensor(396.8312, device='cuda:0')
episode: 86 training return: tensor(-301.3396, device='cuda:0')
episode: 87 training return: tensor(-216.8384, device='cuda:0')
epoch: 22 test_true_pfm: 1609.0651191224063 sim_pfm: 40.899467533939365
episode: 88 training return: tensor(-59.1316, device='cuda:0')
episode: 89 training return: tensor(-229.6862, device='cuda:0')
episode: 90 training return: tensor(121.6249, device='cuda:0')
episode: 91 training return: tensor(-135.1559, device='cuda:0')
epoch: 23 test_true_pfm: 2379.176542485711 sim_pfm: -243.95173574815271
episode: 92 training return: tensor(-252.6342, device='cuda:0')
episode: 93 training return: tensor(354.8174, device='cuda:0')
episode: 94 training return: tensor(-235.0493, device='cuda:0')
episode: 95 training return: tensor(340.3567, device='cuda:0')
epoch: 24 test_true_pfm: 1593.3885906374237 sim_pfm: -243.25367088096877
episode: 96 training return: tensor(-310.0119, device='cuda:0')
episode: 97 training return: tensor(-345.5339, device='cuda:0')
episode: 98 training return: tensor(-228.9144, device='cuda:0')
episode: 99 training return: tensor(348.4197, device='cuda:0')
epoch: 25 test_true_pfm: 2223.043832888836 sim_pfm: -243.49588141427375
episode: 100 training return: tensor(-378.6058, device='cuda:0')
episode: 101 training return: tensor(-269.8232, device='cuda:0')
episode: 102 training return: tensor(-356.2395, device='cuda:0')
episode: 103 training return: tensor(-326.1904, device='cuda:0')
epoch: 26 test_true_pfm: 1614.61872170248 sim_pfm: -107.03535450346924
episode: 104 training return: tensor(79.5151, device='cuda:0')
episode: 105 training return: tensor(296.3676, device='cuda:0')
episode: 106 training return: tensor(-138.4994, device='cuda:0')
episode: 107 training return: tensor(386.4702, device='cuda:0')
epoch: 27 test_true_pfm: 1604.671501418419 sim_pfm: -104.3941864099276
episode: 108 training return: tensor(-238.9358, device='cuda:0')
episode: 109 training return: tensor(224.0000, device='cuda:0')
episode: 110 training return: tensor(357.9002, device='cuda:0')
episode: 111 training return: tensor(-327.0668, device='cuda:0')
epoch: 28 test_true_pfm: 2271.9042952240734 sim_pfm: -163.34764582229158
episode: 112 training return: tensor(-133.6821, device='cuda:0')
episode: 113 training return: tensor(279.1050, device='cuda:0')
episode: 114 training return: tensor(-164.5545, device='cuda:0')
episode: 115 training return: tensor(-353.1469, device='cuda:0')
epoch: 29 test_true_pfm: 2142.2951478856216 sim_pfm: -32.64478020868652
episode: 116 training return: tensor(-118.9577, device='cuda:0')
episode: 117 training return: tensor(-79.8653, device='cuda:0')
episode: 118 training return: tensor(-275.4446, device='cuda:0')
episode: 119 training return: tensor(-225.2574, device='cuda:0')
epoch: 30 test_true_pfm: 2168.1488370939082 sim_pfm: -63.35103377882236
episode: 120 training return: tensor(277.2405, device='cuda:0')
episode: 121 training return: tensor(-347.2485, device='cuda:0')
episode: 122 training return: tensor(87.4221, device='cuda:0')
episode: 123 training return: tensor(-366.2115, device='cuda:0')
epoch: 31 test_true_pfm: 1787.3276981557526 sim_pfm: -44.84176836892342
episode: 124 training return: tensor(-146.0342, device='cuda:0')
episode: 125 training return: tensor(-48.6646, device='cuda:0')
episode: 126 training return: tensor(-204.6732, device='cuda:0')
episode: 127 training return: tensor(-300.7364, device='cuda:0')
epoch: 32 test_true_pfm: 1617.720322247944 sim_pfm: -250.1864318747345
episode: 128 training return: tensor(100.0109, device='cuda:0')
episode: 129 training return: tensor(-212.3306, device='cuda:0')
episode: 130 training return: tensor(-162.2408, device='cuda:0')
episode: 131 training return: tensor(-133.5500, device='cuda:0')
epoch: 33 test_true_pfm: 1613.3406427218542 sim_pfm: -196.28912361695743
episode: 132 training return: tensor(80.1994, device='cuda:0')
episode: 133 training return: tensor(-337.5156, device='cuda:0')
episode: 134 training return: tensor(-323.7124, device='cuda:0')
episode: 135 training return: tensor(-211.0628, device='cuda:0')
epoch: 34 test_true_pfm: 2150.3809153199913 sim_pfm: -199.9418245277678
episode: 136 training return: tensor(264.4320, device='cuda:0')
episode: 137 training return: tensor(154.9288, device='cuda:0')
episode: 138 training return: tensor(-150.3672, device='cuda:0')
episode: 139 training return: tensor(148.3033, device='cuda:0')
epoch: 35 test_true_pfm: 2062.3921538885197 sim_pfm: -233.82773352684066
episode: 140 training return: tensor(-282.6312, device='cuda:0')
episode: 141 training return: tensor(-62.9907, device='cuda:0')
episode: 142 training return: tensor(-310.9665, device='cuda:0')
episode: 143 training return: tensor(-135.5952, device='cuda:0')
epoch: 36 test_true_pfm: 1637.744367096767 sim_pfm: -227.60657255149758
episode: 144 training return: tensor(-253.5809, device='cuda:0')
episode: 145 training return: tensor(-308.1329, device='cuda:0')
episode: 146 training return: tensor(-223.1571, device='cuda:0')
episode: 147 training return: tensor(-219.9734, device='cuda:0')
epoch: 37 test_true_pfm: 2075.2868579424135 sim_pfm: -239.06328998700096
episode: 148 training return: tensor(63.0511, device='cuda:0')
episode: 149 training return: tensor(-40.5101, device='cuda:0')
episode: 150 training return: tensor(-128.0883, device='cuda:0')
episode: 151 training return: tensor(-105.4140, device='cuda:0')
epoch: 38 test_true_pfm: 2329.4871613170135 sim_pfm: -28.06402010685997
episode: 152 training return: tensor(-294.1384, device='cuda:0')
episode: 153 training return: tensor(-179.6727, device='cuda:0')
episode: 154 training return: tensor(-349.5681, device='cuda:0')
episode: 155 training return: tensor(-304.7359, device='cuda:0')
epoch: 39 test_true_pfm: 1944.6026335371087 sim_pfm: -110.53904257761315
episode: 156 training return: tensor(72.9941, device='cuda:0')
episode: 157 training return: tensor(-259.9617, device='cuda:0')
episode: 158 training return: tensor(-359.2850, device='cuda:0')
episode: 159 training return: tensor(377.2981, device='cuda:0')
epoch: 40 test_true_pfm: 2062.0418235739276 sim_pfm: 149.58059590736715
episode: 160 training return: tensor(-19.5608, device='cuda:0')
episode: 161 training return: tensor(-32.5127, device='cuda:0')
episode: 162 training return: tensor(233.8096, device='cuda:0')
episode: 163 training return: tensor(-96.6132, device='cuda:0')
epoch: 41 test_true_pfm: 2361.691788384691 sim_pfm: -99.99120831332402
episode: 164 training return: tensor(176.7617, device='cuda:0')
episode: 165 training return: tensor(-24.3929, device='cuda:0')
episode: 166 training return: tensor(-253.5572, device='cuda:0')
episode: 167 training return: tensor(329.2696, device='cuda:0')
epoch: 42 test_true_pfm: 2464.455766995447 sim_pfm: -194.37520851951558
episode: 168 training return: tensor(-320.7558, device='cuda:0')
episode: 169 training return: tensor(-324.3203, device='cuda:0')
episode: 170 training return: tensor(329.7843, device='cuda:0')
episode: 171 training return: tensor(-211.2742, device='cuda:0')
epoch: 43 test_true_pfm: 2032.125464132406 sim_pfm: -222.82383600925095
episode: 172 training return: tensor(156.5972, device='cuda:0')
episode: 173 training return: tensor(-239.7574, device='cuda:0')
episode: 174 training return: tensor(-128.3719, device='cuda:0')
episode: 175 training return: tensor(-28.2287, device='cuda:0')
epoch: 44 test_true_pfm: 2845.5491853767653 sim_pfm: -94.71553679532371
episode: 176 training return: tensor(-317.1465, device='cuda:0')
episode: 177 training return: tensor(358.7806, device='cuda:0')
episode: 178 training return: tensor(57.4143, device='cuda:0')
episode: 179 training return: tensor(-88.3810, device='cuda:0')
epoch: 45 test_true_pfm: 1904.2407519415694 sim_pfm: -35.211471940545984
episode: 180 training return: tensor(-202.5126, device='cuda:0')
episode: 181 training return: tensor(-27.6715, device='cuda:0')
episode: 182 training return: tensor(-84.7663, device='cuda:0')
episode: 183 training return: tensor(342.9299, device='cuda:0')
epoch: 46 test_true_pfm: 2483.612410877218 sim_pfm: -229.18237456857847
episode: 184 training return: tensor(-256.7475, device='cuda:0')
episode: 185 training return: tensor(6.7286, device='cuda:0')
episode: 186 training return: tensor(-114.6377, device='cuda:0')
episode: 187 training return: tensor(-251.1241, device='cuda:0')
epoch: 47 test_true_pfm: 2182.078948861487 sim_pfm: -226.007197213592
episode: 188 training return: tensor(-128.1063, device='cuda:0')
episode: 189 training return: tensor(-189.3086, device='cuda:0')
episode: 190 training return: tensor(322.2169, device='cuda:0')
episode: 191 training return: tensor(350.7324, device='cuda:0')
epoch: 48 test_true_pfm: 1905.9092825263451 sim_pfm: -215.04481533903163
episode: 192 training return: tensor(350.9448, device='cuda:0')
episode: 193 training return: tensor(361.4095, device='cuda:0')
episode: 194 training return: tensor(160.2070, device='cuda:0')
episode: 195 training return: tensor(97.8414, device='cuda:0')
epoch: 49 test_true_pfm: 1912.505189408456 sim_pfm: 166.46794695977587
episode: 196 training return: tensor(-170.1928, device='cuda:0')
episode: 197 training return: tensor(389.6599, device='cuda:0')
episode: 198 training return: tensor(414.0681, device='cuda:0')
episode: 199 training return: tensor(-127.2195, device='cuda:0')
epoch: 50 test_true_pfm: 1928.0575750023072 sim_pfm: -200.80168439001622
episode: 200 training return: tensor(-220.4466, device='cuda:0')
episode: 201 training return: tensor(1.8878, device='cuda:0')
episode: 202 training return: tensor(-303.2527, device='cuda:0')
episode: 203 training return: tensor(-286.1546, device='cuda:0')
epoch: 51 test_true_pfm: 2086.8443305928517 sim_pfm: 147.237360180278
episode: 204 training return: tensor(-13.7052, device='cuda:0')
episode: 205 training return: tensor(-138.6548, device='cuda:0')
episode: 206 training return: tensor(90.8965, device='cuda:0')
episode: 207 training return: tensor(-358.7082, device='cuda:0')
epoch: 52 test_true_pfm: 1769.0456667938588 sim_pfm: 47.61529316749269
episode: 208 training return: tensor(213.1316, device='cuda:0')
episode: 209 training return: tensor(-65.6219, device='cuda:0')
episode: 210 training return: tensor(-15.1593, device='cuda:0')
episode: 211 training return: tensor(62.1323, device='cuda:0')
epoch: 53 test_true_pfm: 1780.16801183638 sim_pfm: 227.45589901568988
episode: 212 training return: tensor(42.7653, device='cuda:0')
episode: 213 training return: tensor(-355.5075, device='cuda:0')
episode: 214 training return: tensor(-317.7728, device='cuda:0')
episode: 215 training return: tensor(-298.9891, device='cuda:0')
epoch: 54 test_true_pfm: 1917.9358949114157 sim_pfm: -238.70299045572756
episode: 216 training return: tensor(-216.5708, device='cuda:0')
episode: 217 training return: tensor(83.3239, device='cuda:0')
episode: 218 training return: tensor(-213.5553, device='cuda:0')
episode: 219 training return: tensor(83.0731, device='cuda:0')
epoch: 55 test_true_pfm: 1592.4592808079444 sim_pfm: -270.3166646230966
episode: 220 training return: tensor(-291.6112, device='cuda:0')
episode: 221 training return: tensor(-137.7128, device='cuda:0')
episode: 222 training return: tensor(156.1967, device='cuda:0')
episode: 223 training return: tensor(-34.2515, device='cuda:0')
epoch: 56 test_true_pfm: 1896.6593908549319 sim_pfm: -188.11266579525545
episode: 224 training return: tensor(-284.4535, device='cuda:0')
episode: 225 training return: tensor(105.2952, device='cuda:0')
episode: 226 training return: tensor(-207.0189, device='cuda:0')
episode: 227 training return: tensor(-222.7001, device='cuda:0')
epoch: 57 test_true_pfm: 1675.8002028559688 sim_pfm: -234.0027003028663
episode: 228 training return: tensor(293.0727, device='cuda:0')
episode: 229 training return: tensor(-238.8638, device='cuda:0')
episode: 230 training return: tensor(-220.9077, device='cuda:0')
episode: 231 training return: tensor(-205.6520, device='cuda:0')
epoch: 58 test_true_pfm: 1588.7574881039711 sim_pfm: -39.812222151978254
episode: 232 training return: tensor(-111.0148, device='cuda:0')
episode: 233 training return: tensor(-212.2658, device='cuda:0')
episode: 234 training return: tensor(-190.3829, device='cuda:0')
episode: 235 training return: tensor(-241.9968, device='cuda:0')
epoch: 59 test_true_pfm: 1962.4722988919614 sim_pfm: -208.26100667053834
episode: 236 training return: tensor(-210.7972, device='cuda:0')
episode: 237 training return: tensor(-113.3861, device='cuda:0')
episode: 238 training return: tensor(-271.5325, device='cuda:0')
episode: 239 training return: tensor(-238.8445, device='cuda:0')
epoch: 60 test_true_pfm: 2179.479479251631 sim_pfm: -208.16113732746453
episode: 240 training return: tensor(73.7622, device='cuda:0')
episode: 241 training return: tensor(-302.3138, device='cuda:0')
episode: 242 training return: tensor(305.6181, device='cuda:0')
episode: 243 training return: tensor(-248.4496, device='cuda:0')
epoch: 61 test_true_pfm: 2272.9608609361817 sim_pfm: 99.77189647841927
episode: 244 training return: tensor(-249.7097, device='cuda:0')
episode: 245 training return: tensor(-98.9530, device='cuda:0')
episode: 246 training return: tensor(-297.2398, device='cuda:0')
episode: 247 training return: tensor(-103.3712, device='cuda:0')
epoch: 62 test_true_pfm: 2033.1101818550421 sim_pfm: -174.5552620816743
episode: 248 training return: tensor(-256.9894, device='cuda:0')
episode: 249 training return: tensor(93.1390, device='cuda:0')
episode: 250 training return: tensor(-358.2948, device='cuda:0')
episode: 251 training return: tensor(-185.2657, device='cuda:0')
epoch: 63 test_true_pfm: 2470.1619722151577 sim_pfm: 14.5098901927801
episode: 252 training return: tensor(108.8226, device='cuda:0')
episode: 253 training return: tensor(-302.1118, device='cuda:0')
episode: 254 training return: tensor(-361.0382, device='cuda:0')
episode: 255 training return: tensor(54.7551, device='cuda:0')
epoch: 64 test_true_pfm: 1891.5104375335397 sim_pfm: -123.22320459845166
episode: 256 training return: tensor(-29.0781, device='cuda:0')
episode: 257 training return: tensor(-71.8078, device='cuda:0')
episode: 258 training return: tensor(-67.9246, device='cuda:0')
episode: 259 training return: tensor(27.2407, device='cuda:0')
epoch: 65 test_true_pfm: 2150.7924643363854 sim_pfm: -240.49748724885285
episode: 260 training return: tensor(-206.1729, device='cuda:0')
episode: 261 training return: tensor(84.0960, device='cuda:0')
episode: 262 training return: tensor(-130.5751, device='cuda:0')
episode: 263 training return: tensor(-101.8070, device='cuda:0')
epoch: 66 test_true_pfm: 2766.7656950310543 sim_pfm: -224.6883906518536
episode: 264 training return: tensor(-318.1562, device='cuda:0')
episode: 265 training return: tensor(-98.4019, device='cuda:0')
episode: 266 training return: tensor(113.9220, device='cuda:0')
episode: 267 training return: tensor(1.5872, device='cuda:0')
epoch: 67 test_true_pfm: 2463.55523757529 sim_pfm: -252.4312654423605
episode: 268 training return: tensor(-214.1224, device='cuda:0')
episode: 269 training return: tensor(385.7044, device='cuda:0')
episode: 270 training return: tensor(-9.4387, device='cuda:0')
episode: 271 training return: tensor(121.2400, device='cuda:0')
epoch: 68 test_true_pfm: 2122.8136926835787 sim_pfm: -49.1743032761151
episode: 272 training return: tensor(-115.6492, device='cuda:0')
episode: 273 training return: tensor(316.6138, device='cuda:0')
episode: 274 training return: tensor(-211.5360, device='cuda:0')
episode: 275 training return: tensor(-214.0637, device='cuda:0')
epoch: 69 test_true_pfm: 2126.8391364585 sim_pfm: 151.97561853938774
episode: 276 training return: tensor(-211.0552, device='cuda:0')
episode: 277 training return: tensor(-2.5816, device='cuda:0')
episode: 278 training return: tensor(97.9916, device='cuda:0')
episode: 279 training return: tensor(-31.8367, device='cuda:0')
epoch: 70 test_true_pfm: 2538.9093699200985 sim_pfm: -103.16781903180527
episode: 280 training return: tensor(-196.0457, device='cuda:0')
episode: 281 training return: tensor(78.5191, device='cuda:0')
episode: 282 training return: tensor(-263.3823, device='cuda:0')
episode: 283 training return: tensor(-20.5182, device='cuda:0')
epoch: 71 test_true_pfm: 2862.8105071459345 sim_pfm: -240.7184386186127
episode: 284 training return: tensor(-228.6370, device='cuda:0')
episode: 285 training return: tensor(-254.1394, device='cuda:0')
episode: 286 training return: tensor(286.2231, device='cuda:0')
episode: 287 training return: tensor(195.3972, device='cuda:0')
epoch: 72 test_true_pfm: 1601.0749082027323 sim_pfm: -38.65844962412181
episode: 288 training return: tensor(216.9370, device='cuda:0')
episode: 289 training return: tensor(-141.0677, device='cuda:0')
episode: 290 training return: tensor(162.0270, device='cuda:0')
episode: 291 training return: tensor(-21.2217, device='cuda:0')
epoch: 73 test_true_pfm: 1883.092438378746 sim_pfm: 48.75023409782443
episode: 292 training return: tensor(-91.2387, device='cuda:0')
episode: 293 training return: tensor(-119.4210, device='cuda:0')
episode: 294 training return: tensor(376.8670, device='cuda:0')
episode: 295 training return: tensor(-2.4974, device='cuda:0')
epoch: 74 test_true_pfm: 2702.3616570899744 sim_pfm: -216.8528812502433
episode: 296 training return: tensor(-279.6155, device='cuda:0')
episode: 297 training return: tensor(-298.7686, device='cuda:0')
episode: 298 training return: tensor(138.7608, device='cuda:0')
episode: 299 training return: tensor(-185.0547, device='cuda:0')
epoch: 75 test_true_pfm: 1706.248482210782 sim_pfm: -175.52256130004147
episode: 300 training return: tensor(-193.5139, device='cuda:0')
episode: 301 training return: tensor(-28.3588, device='cuda:0')
episode: 302 training return: tensor(-203.0876, device='cuda:0')
episode: 303 training return: tensor(-364.5212, device='cuda:0')
epoch: 76 test_true_pfm: 2510.6247336225874 sim_pfm: -22.137703854144394
episode: 304 training return: tensor(-6.7327, device='cuda:0')
episode: 305 training return: tensor(65.3573, device='cuda:0')
episode: 306 training return: tensor(-251.1940, device='cuda:0')
episode: 307 training return: tensor(96.2772, device='cuda:0')
epoch: 77 test_true_pfm: 2337.8411680126924 sim_pfm: -257.25144962139893
episode: 308 training return: tensor(-250.7197, device='cuda:0')
episode: 309 training return: tensor(-129.8448, device='cuda:0')
episode: 310 training return: tensor(362.7239, device='cuda:0')
episode: 311 training return: tensor(-204.7595, device='cuda:0')
epoch: 78 test_true_pfm: 1648.0972774918503 sim_pfm: -241.66962495993357
episode: 312 training return: tensor(333.6930, device='cuda:0')
episode: 313 training return: tensor(357.8063, device='cuda:0')
episode: 314 training return: tensor(-62.7268, device='cuda:0')
episode: 315 training return: tensor(49.7524, device='cuda:0')
epoch: 79 test_true_pfm: 2712.8066420072005 sim_pfm: -4.106497461344891
episode: 316 training return: tensor(211.6653, device='cuda:0')
episode: 317 training return: tensor(74.1945, device='cuda:0')
episode: 318 training return: tensor(-18.1306, device='cuda:0')
episode: 319 training return: tensor(-311.2347, device='cuda:0')
epoch: 80 test_true_pfm: 2781.4164874828894 sim_pfm: -121.56169677210467
episode: 320 training return: tensor(-101.8494, device='cuda:0')
episode: 321 training return: tensor(-83.3252, device='cuda:0')
episode: 322 training return: tensor(44.0172, device='cuda:0')
episode: 323 training return: tensor(-239.8219, device='cuda:0')
epoch: 81 test_true_pfm: 1934.0382554325986 sim_pfm: -264.7976057615015
episode: 324 training return: tensor(392.5727, device='cuda:0')
episode: 325 training return: tensor(-343.3196, device='cuda:0')
episode: 326 training return: tensor(-361.9203, device='cuda:0')
episode: 327 training return: tensor(-9.2270, device='cuda:0')
epoch: 82 test_true_pfm: 1839.581499065751 sim_pfm: -172.9023357605523
episode: 328 training return: tensor(-207.9240, device='cuda:0')
episode: 329 training return: tensor(-216.2376, device='cuda:0')
episode: 330 training return: tensor(-184.6039, device='cuda:0')
episode: 331 training return: tensor(-326.0079, device='cuda:0')
epoch: 83 test_true_pfm: 1825.522307560108 sim_pfm: -56.37625449284678
episode: 332 training return: tensor(94.9510, device='cuda:0')
episode: 333 training return: tensor(-284.3741, device='cuda:0')
episode: 334 training return: tensor(361.4196, device='cuda:0')
episode: 335 training return: tensor(-218.7741, device='cuda:0')
epoch: 84 test_true_pfm: 2224.6967438084666 sim_pfm: 144.82361025417535
episode: 336 training return: tensor(138.7478, device='cuda:0')
episode: 337 training return: tensor(29.5228, device='cuda:0')
episode: 338 training return: tensor(-173.7505, device='cuda:0')
episode: 339 training return: tensor(418.2641, device='cuda:0')
epoch: 85 test_true_pfm: 2504.4663540040283 sim_pfm: -254.94730604443853
episode: 340 training return: tensor(369.7272, device='cuda:0')
episode: 341 training return: tensor(-295.0833, device='cuda:0')
episode: 342 training return: tensor(-92.3176, device='cuda:0')
episode: 343 training return: tensor(-334.1685, device='cuda:0')
epoch: 86 test_true_pfm: 2187.0987575310173 sim_pfm: -254.228951314309
episode: 344 training return: tensor(-90.8999, device='cuda:0')
episode: 345 training return: tensor(-29.2844, device='cuda:0')
episode: 346 training return: tensor(345.3249, device='cuda:0')
episode: 347 training return: tensor(-317.3596, device='cuda:0')
epoch: 87 test_true_pfm: 2714.8484601919986 sim_pfm: -231.1308710074421
episode: 348 training return: tensor(-205.2025, device='cuda:0')
episode: 349 training return: tensor(21.6773, device='cuda:0')
episode: 350 training return: tensor(-214.6092, device='cuda:0')
episode: 351 training return: tensor(-210.4089, device='cuda:0')
epoch: 88 test_true_pfm: 2480.1959095564107 sim_pfm: -134.92226974099563
episode: 352 training return: tensor(-185.3690, device='cuda:0')
episode: 353 training return: tensor(-229.7406, device='cuda:0')
episode: 354 training return: tensor(-141.3284, device='cuda:0')
episode: 355 training return: tensor(-345.8090, device='cuda:0')
epoch: 89 test_true_pfm: 1584.5511850444861 sim_pfm: 135.37341962060114
episode: 356 training return: tensor(-216.8888, device='cuda:0')
episode: 357 training return: tensor(-100.4563, device='cuda:0')
episode: 358 training return: tensor(-301.3920, device='cuda:0')
episode: 359 training return: tensor(-103.9421, device='cuda:0')
epoch: 90 test_true_pfm: 2057.351172242005 sim_pfm: 98.85257099252583
episode: 360 training return: tensor(343.5245, device='cuda:0')
episode: 361 training return: tensor(-30.5440, device='cuda:0')
episode: 362 training return: tensor(97.3968, device='cuda:0')
episode: 363 training return: tensor(-288.2131, device='cuda:0')
epoch: 91 test_true_pfm: 1762.5988841652527 sim_pfm: -97.610107402065
episode: 364 training return: tensor(-223.6298, device='cuda:0')
episode: 365 training return: tensor(366.5997, device='cuda:0')
episode: 366 training return: tensor(-93.3340, device='cuda:0')
episode: 367 training return: tensor(-356.7392, device='cuda:0')
epoch: 92 test_true_pfm: 1728.8888282273049 sim_pfm: 35.20570647689359
episode: 368 training return: tensor(-28.3276, device='cuda:0')
episode: 369 training return: tensor(362.7326, device='cuda:0')
episode: 370 training return: tensor(355.7602, device='cuda:0')
episode: 371 training return: tensor(-41.8285, device='cuda:0')
epoch: 93 test_true_pfm: 2393.101766944447 sim_pfm: -39.63301476207562
episode: 372 training return: tensor(-357.6699, device='cuda:0')
episode: 373 training return: tensor(58.3863, device='cuda:0')
episode: 374 training return: tensor(82.4096, device='cuda:0')
episode: 375 training return: tensor(409.6930, device='cuda:0')
epoch: 94 test_true_pfm: 2171.142823167221 sim_pfm: -191.48991148398878
episode: 376 training return: tensor(343.4885, device='cuda:0')
episode: 377 training return: tensor(-137.8195, device='cuda:0')
episode: 378 training return: tensor(-218.6734, device='cuda:0')
episode: 379 training return: tensor(-41.5108, device='cuda:0')
epoch: 95 test_true_pfm: 1940.4456851194384 sim_pfm: 155.4602924568268
episode: 380 training return: tensor(-259.9037, device='cuda:0')
episode: 381 training return: tensor(-363.7642, device='cuda:0')
episode: 382 training return: tensor(-22.0744, device='cuda:0')
episode: 383 training return: tensor(-260.5778, device='cuda:0')
epoch: 96 test_true_pfm: 1674.8176520193003 sim_pfm: -232.7533650061038
episode: 384 training return: tensor(-351.0231, device='cuda:0')
episode: 385 training return: tensor(-358.7359, device='cuda:0')
episode: 386 training return: tensor(-256.8250, device='cuda:0')
episode: 387 training return: tensor(-171.7202, device='cuda:0')
epoch: 97 test_true_pfm: 2432.2819131473448 sim_pfm: -230.31320994203756
episode: 388 training return: tensor(-311.5680, device='cuda:0')
episode: 389 training return: tensor(217.3985, device='cuda:0')
episode: 390 training return: tensor(244.5887, device='cuda:0')
episode: 391 training return: tensor(-158.5118, device='cuda:0')
epoch: 98 test_true_pfm: 2325.1730622188384 sim_pfm: -232.86730214039562
episode: 392 training return: tensor(-343.6717, device='cuda:0')
episode: 393 training return: tensor(-18.5779, device='cuda:0')
episode: 394 training return: tensor(381.4865, device='cuda:0')
episode: 395 training return: tensor(62.6866, device='cuda:0')
epoch: 99 test_true_pfm: 2191.82776330121 sim_pfm: -79.17179535686348
episode: 396 training return: tensor(177.2733, device='cuda:0')
episode: 397 training return: tensor(-177.7339, device='cuda:0')
episode: 398 training return: tensor(-216.7038, device='cuda:0')
episode: 399 training return: tensor(105.5353, device='cuda:0')
epoch: 100 test_true_pfm: 2239.0155740794967 sim_pfm: -85.54684824220021
episode: 400 training return: tensor(-294.7036, device='cuda:0')
episode: 401 training return: tensor(383.6805, device='cuda:0')
episode: 402 training return: tensor(175.1501, device='cuda:0')
episode: 403 training return: tensor(271.6276, device='cuda:0')
epoch: 101 test_true_pfm: 2230.346297751998 sim_pfm: -254.1572002018996
episode: 404 training return: tensor(-300.4369, device='cuda:0')
episode: 405 training return: tensor(-206.6529, device='cuda:0')
episode: 406 training return: tensor(-332.9438, device='cuda:0')
episode: 407 training return: tensor(-136.2888, device='cuda:0')
epoch: 102 test_true_pfm: 1783.2092317750448 sim_pfm: -23.786687246271565
episode: 408 training return: tensor(-39.3786, device='cuda:0')
episode: 409 training return: tensor(-270.1642, device='cuda:0')
episode: 410 training return: tensor(-100.3139, device='cuda:0')
episode: 411 training return: tensor(31.9079, device='cuda:0')
epoch: 103 test_true_pfm: 1618.8809428099285 sim_pfm: -51.77890838337286
episode: 412 training return: tensor(-279.1074, device='cuda:0')
episode: 413 training return: tensor(-47.8876, device='cuda:0')
episode: 414 training return: tensor(76.7816, device='cuda:0')
episode: 415 training return: tensor(-318.4877, device='cuda:0')
epoch: 104 test_true_pfm: 2705.255305424208 sim_pfm: -229.04203580746739
episode: 416 training return: tensor(-108.0409, device='cuda:0')
episode: 417 training return: tensor(364.6294, device='cuda:0')
episode: 418 training return: tensor(-308.4124, device='cuda:0')
episode: 419 training return: tensor(-86.6183, device='cuda:0')
epoch: 105 test_true_pfm: 1770.7502256603887 sim_pfm: -128.04142429118897
episode: 420 training return: tensor(-62.9436, device='cuda:0')
episode: 421 training return: tensor(-324.6869, device='cuda:0')
episode: 422 training return: tensor(-323.6624, device='cuda:0')
episode: 423 training return: tensor(-285.2084, device='cuda:0')
epoch: 106 test_true_pfm: 2442.973559805218 sim_pfm: 180.10196789818778
episode: 424 training return: tensor(-339.5472, device='cuda:0')
episode: 425 training return: tensor(63.9480, device='cuda:0')
episode: 426 training return: tensor(-325.1403, device='cuda:0')
episode: 427 training return: tensor(-137.8614, device='cuda:0')
epoch: 107 test_true_pfm: 2596.460733603319 sim_pfm: 134.84859251545276
episode: 428 training return: tensor(-330.2747, device='cuda:0')
episode: 429 training return: tensor(-11.9554, device='cuda:0')
episode: 430 training return: tensor(-256.5552, device='cuda:0')
episode: 431 training return: tensor(154.4036, device='cuda:0')
epoch: 108 test_true_pfm: 2200.5224941669826 sim_pfm: -247.77818471550322
episode: 432 training return: tensor(211.5100, device='cuda:0')
episode: 433 training return: tensor(303.8852, device='cuda:0')
episode: 434 training return: tensor(-131.4924, device='cuda:0')
episode: 435 training return: tensor(365.4648, device='cuda:0')
epoch: 109 test_true_pfm: 2195.350471386545 sim_pfm: -167.71073974203318
episode: 436 training return: tensor(-249.3329, device='cuda:0')
episode: 437 training return: tensor(-315.0246, device='cuda:0')
episode: 438 training return: tensor(391.2058, device='cuda:0')
episode: 439 training return: tensor(-156.2704, device='cuda:0')
epoch: 110 test_true_pfm: 1626.1791002666096 sim_pfm: -63.3765348054003
episode: 440 training return: tensor(-86.0107, device='cuda:0')
episode: 441 training return: tensor(295.2819, device='cuda:0')
episode: 442 training return: tensor(-96.3547, device='cuda:0')
episode: 443 training return: tensor(-34.6824, device='cuda:0')
epoch: 111 test_true_pfm: 2418.025229873647 sim_pfm: -227.1147835715674
episode: 444 training return: tensor(-169.3293, device='cuda:0')
episode: 445 training return: tensor(108.0445, device='cuda:0')
episode: 446 training return: tensor(-230.0083, device='cuda:0')
episode: 447 training return: tensor(102.1359, device='cuda:0')
epoch: 112 test_true_pfm: 2334.4384297439265 sim_pfm: -238.4873368658397
episode: 448 training return: tensor(-221.8893, device='cuda:0')
episode: 449 training return: tensor(344.5666, device='cuda:0')
episode: 450 training return: tensor(-133.0630, device='cuda:0')
episode: 451 training return: tensor(-185.2720, device='cuda:0')
epoch: 113 test_true_pfm: 1648.4195594579721 sim_pfm: -108.31035243382212
episode: 452 training return: tensor(-10.2263, device='cuda:0')
episode: 453 training return: tensor(-184.3258, device='cuda:0')
episode: 454 training return: tensor(-178.3707, device='cuda:0')
episode: 455 training return: tensor(-219.8416, device='cuda:0')
epoch: 114 test_true_pfm: 1617.425600421929 sim_pfm: -237.83433818561025
episode: 456 training return: tensor(-173.9336, device='cuda:0')
episode: 457 training return: tensor(94.3670, device='cuda:0')
episode: 458 training return: tensor(-357.2096, device='cuda:0')
episode: 459 training return: tensor(-21.6064, device='cuda:0')
epoch: 115 test_true_pfm: 3041.6051754294567 sim_pfm: -245.44554409078168
episode: 460 training return: tensor(-274.2727, device='cuda:0')
episode: 461 training return: tensor(-18.2004, device='cuda:0')
episode: 462 training return: tensor(-323.4125, device='cuda:0')
episode: 463 training return: tensor(-340.3388, device='cuda:0')
epoch: 116 test_true_pfm: 2255.177064090147 sim_pfm: -171.44232552236645
episode: 464 training return: tensor(-218.4669, device='cuda:0')
episode: 465 training return: tensor(183.7214, device='cuda:0')
episode: 466 training return: tensor(105.9905, device='cuda:0')
episode: 467 training return: tensor(181.5508, device='cuda:0')
epoch: 117 test_true_pfm: 1617.9528110896892 sim_pfm: 24.252691638646258
episode: 468 training return: tensor(-195.4057, device='cuda:0')
episode: 469 training return: tensor(-291.5806, device='cuda:0')
episode: 470 training return: tensor(-208.9127, device='cuda:0')
episode: 471 training return: tensor(-218.6569, device='cuda:0')
epoch: 118 test_true_pfm: 1916.4656721179942 sim_pfm: 314.6838552195889
episode: 472 training return: tensor(-229.4547, device='cuda:0')
episode: 473 training return: tensor(2.3561, device='cuda:0')
episode: 474 training return: tensor(-163.0439, device='cuda:0')
episode: 475 training return: tensor(358.1519, device='cuda:0')
epoch: 119 test_true_pfm: 1643.2792699507288 sim_pfm: -27.724221601577785
episode: 476 training return: tensor(223.6589, device='cuda:0')
episode: 477 training return: tensor(209.5454, device='cuda:0')
episode: 478 training return: tensor(-23.9561, device='cuda:0')
episode: 479 training return: tensor(-298.5487, device='cuda:0')
epoch: 120 test_true_pfm: 1876.5228073395012 sim_pfm: 7.419093110200872
episode: 480 training return: tensor(-307.5801, device='cuda:0')
episode: 481 training return: tensor(192.8018, device='cuda:0')
episode: 482 training return: tensor(350.3964, device='cuda:0')
episode: 483 training return: tensor(-219.3953, device='cuda:0')
epoch: 121 test_true_pfm: 2703.0009370811554 sim_pfm: -81.55315129792628
episode: 484 training return: tensor(370.8137, device='cuda:0')
episode: 485 training return: tensor(393.0826, device='cuda:0')
episode: 486 training return: tensor(-111.1675, device='cuda:0')
episode: 487 training return: tensor(-25.4864, device='cuda:0')
epoch: 122 test_true_pfm: 1772.1434563234177 sim_pfm: -226.33621732432707
episode: 488 training return: tensor(-238.9364, device='cuda:0')
episode: 489 training return: tensor(-193.6763, device='cuda:0')
episode: 490 training return: tensor(-198.1592, device='cuda:0')
episode: 491 training return: tensor(-215.9312, device='cuda:0')
epoch: 123 test_true_pfm: 2298.794966585347 sim_pfm: -135.70059034736673
episode: 492 training return: tensor(120.2179, device='cuda:0')
episode: 493 training return: tensor(-300.9636, device='cuda:0')
episode: 494 training return: tensor(152.7032, device='cuda:0')
episode: 495 training return: tensor(-96.3707, device='cuda:0')
epoch: 124 test_true_pfm: 1693.6677895100045 sim_pfm: -229.14687491988298
episode: 496 training return: tensor(357.8387, device='cuda:0')
episode: 497 training return: tensor(-217.9868, device='cuda:0')
episode: 498 training return: tensor(-176.1082, device='cuda:0')
episode: 499 training return: tensor(-294.1173, device='cuda:0')
epoch: 125 test_true_pfm: 3086.2669107750794 sim_pfm: -242.1630100623006
episode: 500 training return: tensor(-275.8918, device='cuda:0')
episode: 501 training return: tensor(-362.6654, device='cuda:0')
episode: 502 training return: tensor(85.3341, device='cuda:0')
episode: 503 training return: tensor(-139.3552, device='cuda:0')
epoch: 126 test_true_pfm: 1975.038463829723 sim_pfm: 19.68162459076848
episode: 504 training return: tensor(-345.9129, device='cuda:0')
episode: 505 training return: tensor(-306.8415, device='cuda:0')
episode: 506 training return: tensor(83.3187, device='cuda:0')
episode: 507 training return: tensor(-260.1732, device='cuda:0')
epoch: 127 test_true_pfm: 1961.0485118912577 sim_pfm: -216.26871436423002
episode: 508 training return: tensor(-125.4302, device='cuda:0')
episode: 509 training return: tensor(-295.9748, device='cuda:0')
episode: 510 training return: tensor(191.2529, device='cuda:0')
episode: 511 training return: tensor(-268.1059, device='cuda:0')
epoch: 128 test_true_pfm: 1583.2677619426831 sim_pfm: -247.62637992134356
episode: 512 training return: tensor(-355.7615, device='cuda:0')
episode: 513 training return: tensor(-104.3804, device='cuda:0')
episode: 514 training return: tensor(169.2639, device='cuda:0')
episode: 515 training return: tensor(103.6298, device='cuda:0')
epoch: 129 test_true_pfm: 2452.0229454114483 sim_pfm: 98.89959472537157
episode: 516 training return: tensor(-293.6508, device='cuda:0')
episode: 517 training return: tensor(-306.3106, device='cuda:0')
episode: 518 training return: tensor(-63.9985, device='cuda:0')
episode: 519 training return: tensor(-262.2318, device='cuda:0')
epoch: 130 test_true_pfm: 1734.7704771979643 sim_pfm: -102.33955378366711
episode: 520 training return: tensor(-346.1388, device='cuda:0')
episode: 521 training return: tensor(-295.3511, device='cuda:0')
episode: 522 training return: tensor(-259.1438, device='cuda:0')
episode: 523 training return: tensor(64.6083, device='cuda:0')
epoch: 131 test_true_pfm: 2047.1030627329167 sim_pfm: -69.54398298364443
episode: 524 training return: tensor(-219.5762, device='cuda:0')
episode: 525 training return: tensor(99.1681, device='cuda:0')
episode: 526 training return: tensor(-315.1439, device='cuda:0')
episode: 527 training return: tensor(-359.6490, device='cuda:0')
epoch: 132 test_true_pfm: 3108.496312516765 sim_pfm: -52.01106491376413
episode: 528 training return: tensor(-77.3522, device='cuda:0')
episode: 529 training return: tensor(276.8658, device='cuda:0')
episode: 530 training return: tensor(-263.7588, device='cuda:0')
episode: 531 training return: tensor(-140.5308, device='cuda:0')
epoch: 133 test_true_pfm: 2108.5676688571434 sim_pfm: -209.03233597446038
episode: 532 training return: tensor(-210.1584, device='cuda:0')
episode: 533 training return: tensor(-117.8809, device='cuda:0')
episode: 534 training return: tensor(-315.5262, device='cuda:0')
episode: 535 training return: tensor(57.2953, device='cuda:0')
epoch: 134 test_true_pfm: 2114.972536337793 sim_pfm: -145.37781346220677
episode: 536 training return: tensor(-28.7990, device='cuda:0')
episode: 537 training return: tensor(-281.7108, device='cuda:0')
episode: 538 training return: tensor(109.0637, device='cuda:0')
episode: 539 training return: tensor(-354.9123, device='cuda:0')
epoch: 135 test_true_pfm: 2903.0979416815467 sim_pfm: -223.84812412279038
episode: 540 training return: tensor(157.9834, device='cuda:0')
episode: 541 training return: tensor(-164.5495, device='cuda:0')
episode: 542 training return: tensor(-123.8117, device='cuda:0')
episode: 543 training return: tensor(-100.9844, device='cuda:0')
epoch: 136 test_true_pfm: 2040.1526242250063 sim_pfm: -49.886430110161506
episode: 544 training return: tensor(-273.5867, device='cuda:0')
episode: 545 training return: tensor(39.7843, device='cuda:0')
episode: 546 training return: tensor(-359.3753, device='cuda:0')
episode: 547 training return: tensor(-271.4737, device='cuda:0')
epoch: 137 test_true_pfm: 2164.761100909739 sim_pfm: 97.15542324547035
episode: 548 training return: tensor(-317.7195, device='cuda:0')
episode: 549 training return: tensor(171.0020, device='cuda:0')
episode: 550 training return: tensor(-17.3899, device='cuda:0')
episode: 551 training return: tensor(-88.0235, device='cuda:0')
epoch: 138 test_true_pfm: 1692.751644923285 sim_pfm: -205.03796183806844
episode: 552 training return: tensor(96.3884, device='cuda:0')
episode: 553 training return: tensor(-120.2577, device='cuda:0')
episode: 554 training return: tensor(-312.4501, device='cuda:0')
episode: 555 training return: tensor(367.2004, device='cuda:0')
epoch: 139 test_true_pfm: 2002.9594797275852 sim_pfm: -50.326714697487965
episode: 556 training return: tensor(-212.6155, device='cuda:0')
episode: 557 training return: tensor(-309.1990, device='cuda:0')
episode: 558 training return: tensor(-210.5685, device='cuda:0')
episode: 559 training return: tensor(-273.7712, device='cuda:0')
epoch: 140 test_true_pfm: 2519.1767890884025 sim_pfm: -229.96191407942874
episode: 560 training return: tensor(380.2057, device='cuda:0')
episode: 561 training return: tensor(247.7059, device='cuda:0')
episode: 562 training return: tensor(-219.2018, device='cuda:0')
episode: 563 training return: tensor(-90.9232, device='cuda:0')
epoch: 141 test_true_pfm: 1625.5232548270842 sim_pfm: 41.22401088383049
episode: 564 training return: tensor(-24.2912, device='cuda:0')
episode: 565 training return: tensor(419.8790, device='cuda:0')
episode: 566 training return: tensor(59.6919, device='cuda:0')
episode: 567 training return: tensor(358.0013, device='cuda:0')
epoch: 142 test_true_pfm: 2399.3479178367156 sim_pfm: -99.02582592957576
episode: 568 training return: tensor(-353.2304, device='cuda:0')
episode: 569 training return: tensor(318.5359, device='cuda:0')
episode: 570 training return: tensor(-208.7543, device='cuda:0')
episode: 571 training return: tensor(-352.6457, device='cuda:0')
epoch: 143 test_true_pfm: 1797.379130013007 sim_pfm: -173.8957905865973
episode: 572 training return: tensor(-335.0793, device='cuda:0')
episode: 573 training return: tensor(-95.5972, device='cuda:0')
episode: 574 training return: tensor(334.4104, device='cuda:0')
episode: 575 training return: tensor(-124.2606, device='cuda:0')
epoch: 144 test_true_pfm: 1619.0196658805417 sim_pfm: -191.0555236658547
episode: 576 training return: tensor(-176.2990, device='cuda:0')
episode: 577 training return: tensor(-136.8627, device='cuda:0')
episode: 578 training return: tensor(16.4599, device='cuda:0')
episode: 579 training return: tensor(-11.4705, device='cuda:0')
epoch: 145 test_true_pfm: 1906.115444585539 sim_pfm: 47.62628964191148
episode: 580 training return: tensor(-320.3966, device='cuda:0')
episode: 581 training return: tensor(382.6154, device='cuda:0')
episode: 582 training return: tensor(-354.3489, device='cuda:0')
episode: 583 training return: tensor(103.2455, device='cuda:0')
epoch: 146 test_true_pfm: 2144.6216887737896 sim_pfm: -158.31501733598998
episode: 584 training return: tensor(237.3661, device='cuda:0')
episode: 585 training return: tensor(-278.3390, device='cuda:0')
episode: 586 training return: tensor(-205.9547, device='cuda:0')
episode: 587 training return: tensor(-172.3665, device='cuda:0')
epoch: 147 test_true_pfm: 2341.215038278155 sim_pfm: -219.9109085770518
episode: 588 training return: tensor(-27.6402, device='cuda:0')
episode: 589 training return: tensor(282.7657, device='cuda:0')
episode: 590 training return: tensor(-255.3746, device='cuda:0')
episode: 591 training return: tensor(397.8001, device='cuda:0')
epoch: 148 test_true_pfm: 1882.8540687226857 sim_pfm: -39.40167779421123
episode: 592 training return: tensor(380.9930, device='cuda:0')
episode: 593 training return: tensor(-317.9657, device='cuda:0')
episode: 594 training return: tensor(-131.7407, device='cuda:0')
episode: 595 training return: tensor(-343.1214, device='cuda:0')
epoch: 149 test_true_pfm: 2073.5724107361 sim_pfm: -23.618482045984518
episode: 596 training return: tensor(-296.9107, device='cuda:0')
episode: 597 training return: tensor(86.5820, device='cuda:0')
episode: 598 training return: tensor(101.2323, device='cuda:0')
episode: 599 training return: tensor(414.2922, device='cuda:0')
epoch: 150 test_true_pfm: 2143.4695260788612 sim_pfm: -223.65856325319814
