['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '0']
epoch: 0 training_loss 0.27143647730350495 test_loss: 0.21655428409576416
epoch: 1 training_loss 0.20366869986057282 test_loss: 0.20834453105926515
epoch: 2 training_loss 0.19999648861587047 test_loss: 0.1915385603904724
epoch: 3 training_loss 0.19355352252721786 test_loss: 0.1949252963066101
epoch: 4 training_loss 0.19449681155383586 test_loss: 0.18359711170196533
epoch: 5 training_loss 0.183455845490098 test_loss: 0.17949720621109008
epoch: 6 training_loss 0.19215128369629383 test_loss: 0.1976908802986145
epoch: 7 training_loss 0.1918963860720396 test_loss: 0.17107534408569336
epoch: 8 training_loss 0.18854346379637718 test_loss: 0.18019381761550904
epoch: 9 training_loss 0.19437834091484546 test_loss: 0.18437594175338745
epoch: 10 training_loss 0.18705440789461136 test_loss: 0.19240559339523317
epoch: 11 training_loss 0.18485795207321642 test_loss: 0.18671369552612305
epoch: 12 training_loss 0.18104482866823673 test_loss: 0.1645815372467041
epoch: 13 training_loss 0.18289889998733996 test_loss: 0.19655864238739013
epoch: 14 training_loss 0.18609363563358783 test_loss: 0.15878167152404785
epoch: 15 training_loss 0.1877705878764391 test_loss: 0.1751102924346924
epoch: 16 training_loss 0.17679546251893044 test_loss: 0.18545823097229003
epoch: 17 training_loss 0.1836443004757166 test_loss: 0.18459285497665406
epoch: 18 training_loss 0.1803839276358485 test_loss: 0.18270606994628907
epoch: 19 training_loss 0.18652633152902126 test_loss: 0.16326998472213744
epoch: 20 training_loss 0.18981489822268485 test_loss: 0.15788100957870482
epoch: 21 training_loss 0.19010837264358998 test_loss: 0.18957384824752807
epoch: 22 training_loss 0.18973957873880865 test_loss: 0.18921157121658325
epoch: 23 training_loss 0.18800852306187152 test_loss: 0.18753585815429688
epoch: 24 training_loss 0.18279229976236822 test_loss: 0.17608211040496827
epoch: 25 training_loss 0.19193286702036857 test_loss: 0.1772722125053406
epoch: 26 training_loss 0.18044786117970943 test_loss: 0.18221814632415773
epoch: 27 training_loss 0.17306525498628617 test_loss: 0.16736090183258057
epoch: 28 training_loss 0.18122657991945743 test_loss: 0.18247967958450317
epoch: 29 training_loss 0.18519935585558414 test_loss: 0.18775343894958496
epoch: 30 training_loss 0.18898332133889198 test_loss: 0.1754104256629944
epoch: 31 training_loss 0.18228440977632998 test_loss: 0.17040003538131715
epoch: 32 training_loss 0.18521923504769802 test_loss: 0.17061595916748046
epoch: 33 training_loss 0.17961639903485774 test_loss: 0.17828248739242553
epoch: 34 training_loss 0.1855501637607813 test_loss: 0.16068037748336791
epoch: 35 training_loss 0.18594758689403534 test_loss: 0.18003093004226683
epoch: 36 training_loss 0.17674079075455665 test_loss: 0.18355238437652588
epoch: 37 training_loss 0.18046713463962077 test_loss: 0.16620724201202391
epoch: 38 training_loss 0.18245527625083924 test_loss: 0.17942488193511963
epoch: 39 training_loss 0.18190720714628697 test_loss: 0.1627374768257141
epoch: 40 training_loss 0.18071384213864802 test_loss: 0.17420661449432373
epoch: 41 training_loss 0.1764142245054245 test_loss: 0.1734901785850525
epoch: 42 training_loss 0.18444413922727107 test_loss: 0.1764596223831177
epoch: 43 training_loss 0.18129424937069416 test_loss: 0.17872884273529052
epoch: 44 training_loss 0.18025381974875926 test_loss: 0.18143763542175292
epoch: 45 training_loss 0.18207818239927293 test_loss: 0.16852468252182007
epoch: 46 training_loss 0.18637416362762452 test_loss: 0.18169816732406616
epoch: 47 training_loss 0.18149968057870866 test_loss: 0.17595365047454833
epoch: 48 training_loss 0.18435672141611575 test_loss: 0.18737246990203857
epoch: 49 training_loss 0.18340581066906453 test_loss: 0.18431740999221802
epoch: 50 training_loss 0.17953163363039493 test_loss: 0.1748386025428772
epoch: 51 training_loss 0.17261702470481396 test_loss: 0.18509249687194823
epoch: 52 training_loss 0.18701401427388192 test_loss: 0.17398383617401122
epoch: 53 training_loss 0.18122906371951103 test_loss: 0.18004559278488158
epoch: 54 training_loss 0.17848668716847896 test_loss: 0.16736506223678588
epoch: 55 training_loss 0.1824198706448078 test_loss: 0.1652941107749939
epoch: 56 training_loss 0.18412355728447438 test_loss: 0.18076730966567994
epoch: 57 training_loss 0.17994139052927494 test_loss: 0.1651719331741333
epoch: 58 training_loss 0.17521153964102268 test_loss: 0.16300958395004272
epoch: 59 training_loss 0.1790910080820322 test_loss: 0.18480296134948732
epoch: 60 training_loss 0.18471453174948693 test_loss: 0.16313210725784302
epoch: 61 training_loss 0.17856673419475555 test_loss: 0.18826738595962525
epoch: 62 training_loss 0.184249232634902 test_loss: 0.18470999002456664
epoch: 63 training_loss 0.1785783861577511 test_loss: 0.1689218044281006
epoch: 64 training_loss 0.17700140170753 test_loss: 0.17367360591888428
epoch: 65 training_loss 0.18138053707778454 test_loss: 0.19041714668273926
epoch: 66 training_loss 0.18402356088161467 test_loss: 0.18774447441101075
epoch: 67 training_loss 0.1792148517817259 test_loss: 0.19636374711990356
epoch: 68 training_loss 0.16936052832752466 test_loss: 0.17896032333374023
epoch: 69 training_loss 0.18145090460777283 test_loss: 0.18009309768676757
epoch: 70 training_loss 0.18947901211678983 test_loss: 0.16849273443222046
epoch: 71 training_loss 0.17979406535625458 test_loss: 0.16977367401123047
epoch: 72 training_loss 0.18117118678987026 test_loss: 0.15262989997863768
epoch: 73 training_loss 0.17201896890997886 test_loss: 0.17011637687683107
epoch: 74 training_loss 0.1741650751978159 test_loss: 0.17468894720077516
epoch: 75 training_loss 0.17626707561314106 test_loss: 0.18516398668289186
epoch: 76 training_loss 0.1816424885392189 test_loss: 0.17943440675735473
epoch: 77 training_loss 0.18567691415548324 test_loss: 0.17805603742599488
epoch: 78 training_loss 0.18074202008545398 test_loss: 0.16955684423446654
epoch: 79 training_loss 0.17181795202195643 test_loss: 0.19064205884933472
epoch: 80 training_loss 0.18543548315763472 test_loss: 0.18960968255996705
epoch: 81 training_loss 0.18866809166967868 test_loss: 0.18068983554840087
epoch: 82 training_loss 0.18347005143761635 test_loss: 0.18774869441986083
epoch: 83 training_loss 0.17625248841941357 test_loss: 0.17290905714035035
epoch: 84 training_loss 0.1764927852898836 test_loss: 0.17392981052398682
epoch: 85 training_loss 0.1819144944101572 test_loss: 0.19490646123886107
epoch: 86 training_loss 0.17837807565927505 test_loss: 0.18109916448593139
epoch: 87 training_loss 0.1817648269236088 test_loss: 0.16863402128219604
epoch: 88 training_loss 0.17335830509662628 test_loss: 0.18096204996109008
epoch: 89 training_loss 0.18221639834344386 test_loss: 0.17940962314605713
epoch: 90 training_loss 0.18600257873535156 test_loss: 0.17863537073135377
epoch: 91 training_loss 0.18618384458124637 test_loss: 0.1803303360939026
epoch: 92 training_loss 0.1831726672500372 test_loss: 0.18313868045806886
epoch: 93 training_loss 0.17452170878648757 test_loss: 0.17256083488464355
epoch: 94 training_loss 0.18814972408115863 test_loss: 0.1862137198448181
epoch: 95 training_loss 0.17653260871767998 test_loss: 0.17211076021194457
epoch: 96 training_loss 0.18143141284585 test_loss: 0.18329362869262694
epoch: 97 training_loss 0.18326058655977248 test_loss: 0.17874517440795898
epoch: 98 training_loss 0.17545522145926953 test_loss: 0.16771410703659057
epoch: 99 training_loss 0.17634585451334714 test_loss: 0.17670047283172607
epoch: 100 training_loss 0.18066729992628097 test_loss: 0.18103026151657103
epoch: 101 training_loss 0.17578423745930194 test_loss: 0.1891589045524597
epoch: 102 training_loss 0.17959428884088993 test_loss: 0.18184717893600463
epoch: 103 training_loss 0.17435900419950484 test_loss: 0.1664642095565796
epoch: 104 training_loss 0.1781664327904582 test_loss: 0.19083330631256104
epoch: 105 training_loss 0.1710932332277298 test_loss: 0.17495893239974974
epoch: 106 training_loss 0.17938601307570934 test_loss: 0.18321338891983033
epoch: 107 training_loss 0.1795954440534115 test_loss: 0.1832596778869629
epoch: 108 training_loss 0.17776010267436504 test_loss: 0.18233044147491456
epoch: 109 training_loss 0.18350233145058156 test_loss: 0.19692625999450683
epoch: 110 training_loss 0.17742218129336834 test_loss: 0.1926889657974243
epoch: 111 training_loss 0.18223545961081983 test_loss: 0.1794693350791931
epoch: 112 training_loss 0.17698456160724163 test_loss: 0.16874678134918214
epoch: 113 training_loss 0.17561172105371953 test_loss: 0.1893524169921875
epoch: 114 training_loss 0.1721693179756403 test_loss: 0.1877931237220764
epoch: 115 training_loss 0.1754912381619215 test_loss: 0.16634544134140014
epoch: 116 training_loss 0.17951861515641213 test_loss: 0.1770055890083313
epoch: 117 training_loss 0.18506765447556972 test_loss: 0.186668062210083
epoch: 118 training_loss 0.17733703546226023 test_loss: 0.16640934944152833
epoch: 119 training_loss 0.16894329704344271 test_loss: 0.17325865030288695
epoch: 120 training_loss 0.18154782339930534 test_loss: 0.1974899649620056
epoch: 121 training_loss 0.1793511328101158 test_loss: 0.180509614944458
epoch: 122 training_loss 0.17652300059795378 test_loss: 0.1681322455406189
epoch: 123 training_loss 0.18260434187948704 test_loss: 0.17122488021850585
epoch: 124 training_loss 0.1777825390920043 test_loss: 0.17496036291122435
epoch: 125 training_loss 0.1756748501956463 test_loss: 0.184756600856781
epoch: 126 training_loss 0.17122500330209733 test_loss: 0.17952227592468262
epoch: 127 training_loss 0.18373647786676883 test_loss: 0.15595972537994385
epoch: 128 training_loss 0.17693022198975086 test_loss: 0.18402220010757447
epoch: 129 training_loss 0.18283654272556304 test_loss: 0.17035120725631714
epoch: 130 training_loss 0.1852960240095854 test_loss: 0.17295161485671998
epoch: 131 training_loss 0.1691281294822693 test_loss: 0.16814558506011962
epoch: 132 training_loss 0.18178809687495232 test_loss: 0.17610390186309816
epoch: 133 training_loss 0.17876486383378506 test_loss: 0.15370259284973145
epoch: 134 training_loss 0.18428511150181293 test_loss: 0.1875467300415039
epoch: 135 training_loss 0.18211387395858764 test_loss: 0.1692010998725891
epoch: 136 training_loss 0.1737730598449707 test_loss: 0.18288741111755372
epoch: 137 training_loss 0.18315239433199168 test_loss: 0.18313390016555786
epoch: 138 training_loss 0.18451283037662505 test_loss: 0.1861656904220581
epoch: 139 training_loss 0.18176640436053276 test_loss: 0.1645343065261841
epoch: 140 training_loss 0.17655618458986283 test_loss: 0.18445194959640504
epoch: 141 training_loss 0.17480406381189822 test_loss: 0.17502580881118773
epoch: 142 training_loss 0.17745365969836713 test_loss: 0.16311335563659668
epoch: 143 training_loss 0.18059664733707906 test_loss: 0.16996829509735106
epoch: 144 training_loss 0.1811237069964409 test_loss: 0.18437918424606323
epoch: 145 training_loss 0.17675243839621543 test_loss: 0.1776281476020813
epoch: 146 training_loss 0.17592777080833913 test_loss: 0.17155277729034424
epoch: 147 training_loss 0.17519957304000855 test_loss: 0.19253324270248412
epoch: 148 training_loss 0.17901475474238396 test_loss: 0.17822933197021484
epoch: 149 training_loss 0.18048050805926322 test_loss: 0.17457419633865356
epoch: 0 training_loss 8.328403568267822 test_loss: 5.096269989013672
epoch: 1 training_loss 4.100046212673187 test_loss: 3.1376848220825195
epoch: 2 training_loss 2.7572537684440612 test_loss: 2.3803150177001955
epoch: 3 training_loss 2.200341213941574 test_loss: 2.0244024276733397
epoch: 4 training_loss 1.8823548436164856 test_loss: 1.7584875106811524
epoch: 5 training_loss 1.64317729473114 test_loss: 1.5541840553283692
epoch: 6 training_loss 1.507031331062317 test_loss: 1.4863967895507812
epoch: 7 training_loss 1.4241731333732606 test_loss: 1.357247543334961
epoch: 8 training_loss 1.3219135427474975 test_loss: 1.2380941390991211
epoch: 9 training_loss 1.2714514362812042 test_loss: 1.2632458686828614
epoch: 10 training_loss 1.199014903306961 test_loss: 1.1623288154602052
epoch: 11 training_loss 1.1542102867364883 test_loss: 1.1188178062438965
epoch: 12 training_loss 1.0973717683553696 test_loss: 1.0653599739074706
epoch: 13 training_loss 1.0399269598722458 test_loss: 1.0802176475524903
epoch: 14 training_loss 1.0148965162038803 test_loss: 1.0443227767944336
epoch: 15 training_loss 1.0040485686063767 test_loss: 1.0123700141906737
epoch: 16 training_loss 0.9921231156587601 test_loss: 0.9733299255371094
epoch: 17 training_loss 0.9648632514476776 test_loss: 0.9268082618713379
epoch: 18 training_loss 0.9140388518571854 test_loss: 0.9110162734985352
epoch: 19 training_loss 0.8998010110855102 test_loss: 0.9315818786621094
epoch: 20 training_loss 0.9090071207284928 test_loss: 0.8873661994934082
epoch: 21 training_loss 0.8874241358041763 test_loss: 0.860595989227295
epoch: 22 training_loss 0.8504641234874726 test_loss: 0.865300464630127
epoch: 23 training_loss 0.840591311454773 test_loss: 0.8201061248779297
epoch: 24 training_loss 0.8192830330133438 test_loss: 0.7909195899963379
epoch: 25 training_loss 0.8201772606372834 test_loss: 0.8029199600219726
epoch: 26 training_loss 0.8079592013359069 test_loss: 0.8002915382385254
epoch: 27 training_loss 0.788690157532692 test_loss: 0.7858876705169677
epoch: 28 training_loss 0.805175850391388 test_loss: 0.7846429347991943
epoch: 29 training_loss 0.7644970434904098 test_loss: 0.749190092086792
epoch: 30 training_loss 0.7541356176137924 test_loss: 0.7442447185516358
epoch: 31 training_loss 0.7609982228279114 test_loss: 0.715669059753418
epoch: 32 training_loss 0.738442239165306 test_loss: 0.7235343933105469
epoch: 33 training_loss 0.7485277718305587 test_loss: 0.7259092330932617
epoch: 34 training_loss 0.7417462134361267 test_loss: 0.7317715644836426
epoch: 35 training_loss 0.7274889159202576 test_loss: 0.7320426940917969
epoch: 36 training_loss 0.7418326288461685 test_loss: 0.7231010437011719
epoch: 37 training_loss 0.715499911904335 test_loss: 0.7054071426391602
epoch: 38 training_loss 0.693443386554718 test_loss: 0.7063212871551514
epoch: 39 training_loss 0.6973465675115585 test_loss: 0.6819159984588623
epoch: 40 training_loss 0.6934370470046997 test_loss: 0.6828616619110107
epoch: 41 training_loss 0.6930692946910858 test_loss: 0.684373140335083
epoch: 42 training_loss 0.6896425294876098 test_loss: 0.7029215812683105
epoch: 43 training_loss 0.6857509726285934 test_loss: 0.7087727069854737
epoch: 44 training_loss 0.6851746517419816 test_loss: 0.6565140724182129
epoch: 45 training_loss 0.6745079708099365 test_loss: 0.6637450218200683
epoch: 46 training_loss 0.6760509771108627 test_loss: 0.6820096015930176
epoch: 47 training_loss 0.6599909788370133 test_loss: 0.6769350528717041
epoch: 48 training_loss 0.6659447526931763 test_loss: 0.6652570247650147
epoch: 49 training_loss 0.6634958553314209 test_loss: 0.6426469326019287
epoch: 50 training_loss 0.6579028165340424 test_loss: 0.6616940021514892
epoch: 51 training_loss 0.645981810092926 test_loss: 0.63791823387146
epoch: 52 training_loss 0.6556358104944229 test_loss: 0.6460265636444091
epoch: 53 training_loss 0.63791124522686 test_loss: 0.638861894607544
epoch: 54 training_loss 0.6345599132776261 test_loss: 0.6321605205535888
epoch: 55 training_loss 0.6361629843711853 test_loss: 0.6486330032348633
epoch: 56 training_loss 0.6390338915586472 test_loss: 0.6433255195617675
epoch: 57 training_loss 0.628927703499794 test_loss: 0.6268404960632324
epoch: 58 training_loss 0.6318481242656708 test_loss: 0.6167984008789062
epoch: 59 training_loss 0.6328048354387283 test_loss: 0.6300850868225097
epoch: 60 training_loss 0.6414078134298324 test_loss: 0.6197128772735596
epoch: 61 training_loss 0.6207142615318298 test_loss: 0.636076545715332
epoch: 62 training_loss 0.6337739044427871 test_loss: 0.6409468650817871
epoch: 63 training_loss 0.6149423199892045 test_loss: 0.6218292713165283
epoch: 64 training_loss 0.626077396273613 test_loss: 0.6160518169403076
epoch: 65 training_loss 0.6110295981168747 test_loss: 0.5984203815460205
epoch: 66 training_loss 0.6012199848890305 test_loss: 0.6375036716461182
epoch: 67 training_loss 0.6114280551671982 test_loss: 0.5837112426757812
epoch: 68 training_loss 0.5993020898103714 test_loss: 0.6073244094848633
epoch: 69 training_loss 0.5990707391500473 test_loss: 0.596477746963501
epoch: 70 training_loss 0.5941880828142166 test_loss: 0.5767361164093018
epoch: 71 training_loss 0.5999274212121963 test_loss: 0.6073362350463867
epoch: 72 training_loss 0.6091111540794373 test_loss: 0.5930105686187744
epoch: 73 training_loss 0.5817437314987183 test_loss: 0.5937693595886231
epoch: 74 training_loss 0.5870360964536667 test_loss: 0.6108499050140381
epoch: 75 training_loss 0.5877821260690689 test_loss: 0.5828622341156006
epoch: 76 training_loss 0.5838478398323059 test_loss: 0.5903228759765625
epoch: 77 training_loss 0.584725970029831 test_loss: 0.583132266998291
epoch: 78 training_loss 0.5796374946832656 test_loss: 0.5853131294250489
epoch: 79 training_loss 0.57229926943779 test_loss: 0.5882294178009033
epoch: 80 training_loss 0.5770753067731857 test_loss: 0.582236909866333
epoch: 81 training_loss 0.5757486093044281 test_loss: 0.5837272644042969
epoch: 82 training_loss 0.5736272591352463 test_loss: 0.5811634540557862
epoch: 83 training_loss 0.5709744343161582 test_loss: 0.5784379959106445
epoch: 84 training_loss 0.5718275898694992 test_loss: 0.5832160472869873
epoch: 85 training_loss 0.5694422194361687 test_loss: 0.5799476623535156
epoch: 86 training_loss 0.565707568526268 test_loss: 0.5596689224243164
epoch: 87 training_loss 0.5712177509069443 test_loss: 0.5821637630462646
epoch: 88 training_loss 0.5712685027718544 test_loss: 0.5625560283660889
epoch: 89 training_loss 0.5644042977690696 test_loss: 0.5516214370727539
epoch: 90 training_loss 0.5572864651679993 test_loss: 0.5462592124938965
epoch: 91 training_loss 0.5771825495362282 test_loss: 0.5736440658569336
epoch: 92 training_loss 0.5493673449754715 test_loss: 0.5547126770019531
epoch: 93 training_loss 0.5536605352163315 test_loss: 0.5605880737304687
epoch: 94 training_loss 0.5653514307737351 test_loss: 0.5574786186218261
epoch: 95 training_loss 0.5531030225753785 test_loss: 0.5538410663604736
epoch: 96 training_loss 0.5581196922063828 test_loss: 0.5453804969787598
epoch: 97 training_loss 0.5380369812250138 test_loss: 0.5422601222991943
epoch: 98 training_loss 0.5523907071352006 test_loss: 0.5657931327819824
epoch: 99 training_loss 0.543140427172184 test_loss: 0.5301450252532959
epoch: 100 training_loss 0.555523712337017 test_loss: 0.5458203315734863
epoch: 101 training_loss 0.5420210510492325 test_loss: 0.5593047618865967
epoch: 102 training_loss 0.5403170365095139 test_loss: 0.5357656002044677
epoch: 103 training_loss 0.5411033347249031 test_loss: 0.535388994216919
epoch: 104 training_loss 0.5408725589513779 test_loss: 0.5257750988006592
epoch: 105 training_loss 0.5423548635840416 test_loss: 0.5363487720489502
epoch: 106 training_loss 0.5439179295301437 test_loss: 0.5280323505401612
epoch: 107 training_loss 0.538498901128769 test_loss: 0.5423620700836181
epoch: 108 training_loss 0.5328170365095138 test_loss: 0.5333365440368653
epoch: 109 training_loss 0.5319877055287361 test_loss: 0.5263224601745605
epoch: 110 training_loss 0.5380410876870155 test_loss: 0.543912124633789
epoch: 111 training_loss 0.5328140586614609 test_loss: 0.5812825679779052
epoch: 112 training_loss 0.5341958752274514 test_loss: 0.5371148586273193
epoch: 113 training_loss 0.536270971596241 test_loss: 0.519017505645752
epoch: 114 training_loss 0.5315149688720703 test_loss: 0.5084722518920899
epoch: 115 training_loss 0.5373119971156121 test_loss: 0.519178819656372
epoch: 116 training_loss 0.5307878905534744 test_loss: 0.5348758697509766
epoch: 117 training_loss 0.5264148789644242 test_loss: 0.5272232532501221
epoch: 118 training_loss 0.523717268705368 test_loss: 0.5150234222412109
epoch: 119 training_loss 0.53184779047966 test_loss: 0.5345153331756591
epoch: 120 training_loss 0.5389815312623978 test_loss: 0.5326603412628174
epoch: 121 training_loss 0.522489843070507 test_loss: 0.5411506652832031
epoch: 122 training_loss 0.5236278888583183 test_loss: 0.5237319469451904
epoch: 123 training_loss 0.512832615673542 test_loss: 0.5254078388214112
epoch: 124 training_loss 0.5168249237537385 test_loss: 0.5043755531311035
epoch: 125 training_loss 0.5180624812841416 test_loss: 0.5206692218780518
epoch: 126 training_loss 0.5245204842090607 test_loss: 0.5388132572174072
epoch: 127 training_loss 0.5219256290793419 test_loss: 0.5202658176422119
epoch: 128 training_loss 0.5141718885302544 test_loss: 0.5087424755096436
epoch: 129 training_loss 0.5226610735058784 test_loss: 0.511711597442627
epoch: 130 training_loss 0.5179566526412964 test_loss: 0.5361273288726807
epoch: 131 training_loss 0.5142274922132493 test_loss: 0.49608736038208007
epoch: 132 training_loss 0.506241617500782 test_loss: 0.4973108768463135
epoch: 133 training_loss 0.5103642931580543 test_loss: 0.49861598014831543
epoch: 134 training_loss 0.518222875893116 test_loss: 0.4923823356628418
epoch: 135 training_loss 0.5136893621087074 test_loss: 0.5247304439544678
epoch: 136 training_loss 0.5055471009016037 test_loss: 0.5090303421020508
epoch: 137 training_loss 0.5041229060292244 test_loss: 0.49771842956542967
epoch: 138 training_loss 0.5088502791523933 test_loss: 0.5034282207489014
epoch: 139 training_loss 0.4987734878063202 test_loss: 0.49542999267578125
epoch: 140 training_loss 0.50925955504179 test_loss: 0.5004698753356933
epoch: 141 training_loss 0.50305623203516 test_loss: 0.5097286701202393
epoch: 142 training_loss 0.5092841985821724 test_loss: 0.5063046932220459
epoch: 143 training_loss 0.5115932130813599 test_loss: 0.5087010860443115
epoch: 144 training_loss 0.5043414229154587 test_loss: 0.4881885051727295
epoch: 145 training_loss 0.5076158314943313 test_loss: 0.4954491138458252
epoch: 146 training_loss 0.5033208721876145 test_loss: 0.5079679012298584
epoch: 147 training_loss 0.5090594583749771 test_loss: 0.503093671798706
epoch: 148 training_loss 0.5134094497561454 test_loss: 0.5100153923034668
epoch: 149 training_loss 0.5099225136637687 test_loss: 0.49878387451171874
2001.801884070631
episode: 0 training return: tensor(-391.5941, device='cuda:0')
episode: 1 training return: tensor(-250.8856, device='cuda:0')
episode: 2 training return: tensor(-99.4890, device='cuda:0')
episode: 3 training return: tensor(-347.0198, device='cuda:0')
epoch: 1 test_true_pfm: 2094.4163805492426 sim_pfm: 82.97671941252581
episode: 4 training return: tensor(-130.5763, device='cuda:0')
episode: 5 training return: tensor(-247.9938, device='cuda:0')
episode: 6 training return: tensor(-144.5934, device='cuda:0')
episode: 7 training return: tensor(-387.3447, device='cuda:0')
epoch: 2 test_true_pfm: 3030.20437612488 sim_pfm: 239.11677678657966
episode: 8 training return: tensor(-113.3693, device='cuda:0')
episode: 9 training return: tensor(238.8631, device='cuda:0')
episode: 10 training return: tensor(-193.6379, device='cuda:0')
episode: 11 training return: tensor(-351.2206, device='cuda:0')
epoch: 3 test_true_pfm: 1280.5128586142876 sim_pfm: -66.10277731425595
episode: 12 training return: tensor(-345.0530, device='cuda:0')
episode: 13 training return: tensor(-349.8811, device='cuda:0')
episode: 14 training return: tensor(-64.3759, device='cuda:0')
episode: 15 training return: tensor(-343.9227, device='cuda:0')
epoch: 4 test_true_pfm: 2005.1176660527278 sim_pfm: -343.11015426617814
episode: 16 training return: tensor(-371.4060, device='cuda:0')
episode: 17 training return: tensor(-341.3325, device='cuda:0')
episode: 18 training return: tensor(-343.9885, device='cuda:0')
episode: 19 training return: tensor(-160.6625, device='cuda:0')
epoch: 5 test_true_pfm: 1264.7882650582687 sim_pfm: -346.67647822098417
episode: 20 training return: tensor(-52.0963, device='cuda:0')
episode: 21 training return: tensor(-61.4200, device='cuda:0')
episode: 22 training return: tensor(-161.4979, device='cuda:0')
episode: 23 training return: tensor(-246.2599, device='cuda:0')
epoch: 6 test_true_pfm: 1287.6721605385826 sim_pfm: -359.9954037229084
episode: 24 training return: tensor(-347.1721, device='cuda:0')
episode: 25 training return: tensor(-11.7390, device='cuda:0')
episode: 26 training return: tensor(-293.1717, device='cuda:0')
episode: 27 training return: tensor(-140.8933, device='cuda:0')
epoch: 7 test_true_pfm: 1783.6141280679694 sim_pfm: -121.8230949594484
episode: 28 training return: tensor(-347.7855, device='cuda:0')
episode: 29 training return: tensor(-282.2317, device='cuda:0')
episode: 30 training return: tensor(135.1229, device='cuda:0')
episode: 31 training return: tensor(-303.5455, device='cuda:0')
epoch: 8 test_true_pfm: 2007.410063238448 sim_pfm: -328.16843180930783
episode: 32 training return: tensor(-344.2192, device='cuda:0')
episode: 33 training return: tensor(181.2431, device='cuda:0')
episode: 34 training return: tensor(-258.7417, device='cuda:0')
episode: 35 training return: tensor(-372.3374, device='cuda:0')
epoch: 9 test_true_pfm: 1709.4003592878187 sim_pfm: -29.240936836732242
episode: 36 training return: tensor(-220.6738, device='cuda:0')
episode: 37 training return: tensor(-157.4828, device='cuda:0')
episode: 38 training return: tensor(143.8275, device='cuda:0')
episode: 39 training return: tensor(-336.4461, device='cuda:0')
epoch: 10 test_true_pfm: 1352.7549580461457 sim_pfm: -17.455880183377303
episode: 40 training return: tensor(-258.4106, device='cuda:0')
episode: 41 training return: tensor(-47.6869, device='cuda:0')
episode: 42 training return: tensor(63.5550, device='cuda:0')
episode: 43 training return: tensor(-350.0150, device='cuda:0')
epoch: 11 test_true_pfm: 2342.202055476654 sim_pfm: 19.382074473736186
episode: 44 training return: tensor(-248.1368, device='cuda:0')
episode: 45 training return: tensor(-348.6574, device='cuda:0')
episode: 46 training return: tensor(321.6749, device='cuda:0')
episode: 47 training return: tensor(-292.9994, device='cuda:0')
epoch: 12 test_true_pfm: 1951.8505322522171 sim_pfm: -34.82911804388277
episode: 48 training return: tensor(-288.2785, device='cuda:0')
episode: 49 training return: tensor(317.4355, device='cuda:0')
episode: 50 training return: tensor(-388.4021, device='cuda:0')
episode: 51 training return: tensor(-219.7256, device='cuda:0')
epoch: 13 test_true_pfm: 2580.5284723098634 sim_pfm: 141.68448279052973
episode: 52 training return: tensor(-255.2063, device='cuda:0')
episode: 53 training return: tensor(-16.5023, device='cuda:0')
episode: 54 training return: tensor(-150.8555, device='cuda:0')
episode: 55 training return: tensor(-302.0680, device='cuda:0')
epoch: 14 test_true_pfm: 2714.107504013334 sim_pfm: -208.02272386107748
episode: 56 training return: tensor(266.1857, device='cuda:0')
episode: 57 training return: tensor(-85.9704, device='cuda:0')
episode: 58 training return: tensor(-317.0456, device='cuda:0')
episode: 59 training return: tensor(2.2976, device='cuda:0')
epoch: 15 test_true_pfm: 2383.7885990940044 sim_pfm: 179.78297127773598
episode: 60 training return: tensor(-345.2804, device='cuda:0')
episode: 61 training return: tensor(-333.7505, device='cuda:0')
episode: 62 training return: tensor(-343.4704, device='cuda:0')
episode: 63 training return: tensor(-101.1470, device='cuda:0')
epoch: 16 test_true_pfm: 2335.933741847739 sim_pfm: -58.65311885284609
episode: 64 training return: tensor(-299.1523, device='cuda:0')
episode: 65 training return: tensor(3.7249, device='cuda:0')
episode: 66 training return: tensor(-340.5295, device='cuda:0')
episode: 67 training return: tensor(338.4207, device='cuda:0')
epoch: 17 test_true_pfm: 2143.1808932142026 sim_pfm: 248.14831596445097
episode: 68 training return: tensor(-101.4690, device='cuda:0')
episode: 69 training return: tensor(-343.0472, device='cuda:0')
episode: 70 training return: tensor(-352.9688, device='cuda:0')
episode: 71 training return: tensor(23.6892, device='cuda:0')
epoch: 18 test_true_pfm: 1634.576336494778 sim_pfm: 69.12358275290656
episode: 72 training return: tensor(235.4595, device='cuda:0')
episode: 73 training return: tensor(60.1401, device='cuda:0')
episode: 74 training return: tensor(68.0378, device='cuda:0')
episode: 75 training return: tensor(-131.7506, device='cuda:0')
epoch: 19 test_true_pfm: 1975.5571878998055 sim_pfm: -95.86680193276455
episode: 76 training return: tensor(-224.5850, device='cuda:0')
episode: 77 training return: tensor(97.4562, device='cuda:0')
episode: 78 training return: tensor(-343.0334, device='cuda:0')
episode: 79 training return: tensor(226.0165, device='cuda:0')
epoch: 20 test_true_pfm: 2317.99906223083 sim_pfm: -26.05207102513911
episode: 80 training return: tensor(-345.7782, device='cuda:0')
episode: 81 training return: tensor(-340.0502, device='cuda:0')
episode: 82 training return: tensor(-245.4319, device='cuda:0')
episode: 83 training return: tensor(147.7727, device='cuda:0')
epoch: 21 test_true_pfm: 1749.2883504874324 sim_pfm: -226.91743175025718
episode: 84 training return: tensor(337.7505, device='cuda:0')
episode: 85 training return: tensor(-338.3846, device='cuda:0')
episode: 86 training return: tensor(-269.4973, device='cuda:0')
episode: 87 training return: tensor(-334.3732, device='cuda:0')
epoch: 22 test_true_pfm: 1921.3401691926222 sim_pfm: 20.111150152476814
episode: 88 training return: tensor(35.7508, device='cuda:0')
episode: 89 training return: tensor(-235.3122, device='cuda:0')
episode: 90 training return: tensor(62.9005, device='cuda:0')
episode: 91 training return: tensor(-89.6140, device='cuda:0')
epoch: 23 test_true_pfm: 2057.10019571927 sim_pfm: 174.02626753847775
episode: 92 training return: tensor(-248.2728, device='cuda:0')
episode: 93 training return: tensor(-26.5633, device='cuda:0')
episode: 94 training return: tensor(336.7914, device='cuda:0')
episode: 95 training return: tensor(50.6819, device='cuda:0')
epoch: 24 test_true_pfm: 2042.2981536490333 sim_pfm: -214.549386101251
episode: 96 training return: tensor(-300.9202, device='cuda:0')
episode: 97 training return: tensor(-238.6779, device='cuda:0')
episode: 98 training return: tensor(-50.2526, device='cuda:0')
episode: 99 training return: tensor(-82.2215, device='cuda:0')
epoch: 25 test_true_pfm: 2419.2286881889163 sim_pfm: -141.33419513411354
episode: 100 training return: tensor(-93.8898, device='cuda:0')
episode: 101 training return: tensor(-358.2494, device='cuda:0')
episode: 102 training return: tensor(220.7890, device='cuda:0')
episode: 103 training return: tensor(-87.1843, device='cuda:0')
epoch: 26 test_true_pfm: 2955.7085235066734 sim_pfm: 180.41861140037267
episode: 104 training return: tensor(-266.7443, device='cuda:0')
episode: 105 training return: tensor(-298.8304, device='cuda:0')
episode: 106 training return: tensor(-74.9055, device='cuda:0')
episode: 107 training return: tensor(-323.6136, device='cuda:0')
epoch: 27 test_true_pfm: 1646.1684659900175 sim_pfm: -59.645896245007556
episode: 108 training return: tensor(-311.6216, device='cuda:0')
episode: 109 training return: tensor(-340.0280, device='cuda:0')
episode: 110 training return: tensor(23.3941, device='cuda:0')
episode: 111 training return: tensor(154.2478, device='cuda:0')
epoch: 28 test_true_pfm: 1796.9579118242566 sim_pfm: 12.063038406214522
episode: 112 training return: tensor(136.8380, device='cuda:0')
episode: 113 training return: tensor(-57.3881, device='cuda:0')
episode: 114 training return: tensor(-304.0943, device='cuda:0')
episode: 115 training return: tensor(-62.7058, device='cuda:0')
epoch: 29 test_true_pfm: 1642.8526528185264 sim_pfm: -73.07937225707185
episode: 116 training return: tensor(-219.1088, device='cuda:0')
episode: 117 training return: tensor(-187.7649, device='cuda:0')
episode: 118 training return: tensor(-135.5621, device='cuda:0')
episode: 119 training return: tensor(417.7697, device='cuda:0')
epoch: 30 test_true_pfm: 3005.900580801541 sim_pfm: -216.49897326571713
episode: 120 training return: tensor(-339.7100, device='cuda:0')
episode: 121 training return: tensor(-8.1767, device='cuda:0')
episode: 122 training return: tensor(-348.5128, device='cuda:0')
episode: 123 training return: tensor(-243.3499, device='cuda:0')
epoch: 31 test_true_pfm: 2890.6360667162935 sim_pfm: 143.684337627977
episode: 124 training return: tensor(-185.5180, device='cuda:0')
episode: 125 training return: tensor(110.4103, device='cuda:0')
episode: 126 training return: tensor(-349.3289, device='cuda:0')
episode: 127 training return: tensor(-343.6014, device='cuda:0')
epoch: 32 test_true_pfm: 2397.0948430429985 sim_pfm: 275.5791712612069
episode: 128 training return: tensor(-339.2726, device='cuda:0')
episode: 129 training return: tensor(-301.8989, device='cuda:0')
episode: 130 training return: tensor(-343.8121, device='cuda:0')
episode: 131 training return: tensor(55.3481, device='cuda:0')
epoch: 33 test_true_pfm: 2409.147218531833 sim_pfm: 204.3271094210601
episode: 132 training return: tensor(388.9382, device='cuda:0')
episode: 133 training return: tensor(-242.9497, device='cuda:0')
episode: 134 training return: tensor(-7.5634, device='cuda:0')
episode: 135 training return: tensor(52.5236, device='cuda:0')
epoch: 34 test_true_pfm: 2312.8203535963353 sim_pfm: -241.53079464108063
episode: 136 training return: tensor(348.8242, device='cuda:0')
episode: 137 training return: tensor(207.3827, device='cuda:0')
episode: 138 training return: tensor(316.5949, device='cuda:0')
episode: 139 training return: tensor(-301.9420, device='cuda:0')
epoch: 35 test_true_pfm: 2794.7827674391124 sim_pfm: -132.35097606563554
episode: 140 training return: tensor(158.4751, device='cuda:0')
episode: 141 training return: tensor(346.9659, device='cuda:0')
episode: 142 training return: tensor(-231.2085, device='cuda:0')
episode: 143 training return: tensor(-315.0768, device='cuda:0')
epoch: 36 test_true_pfm: 1706.8510275631854 sim_pfm: -104.9400724269605
episode: 144 training return: tensor(-71.5021, device='cuda:0')
episode: 145 training return: tensor(-261.0143, device='cuda:0')
episode: 146 training return: tensor(-291.3623, device='cuda:0')
episode: 147 training return: tensor(-347.3509, device='cuda:0')
epoch: 37 test_true_pfm: 2827.658922179968 sim_pfm: 154.91493780961414
episode: 148 training return: tensor(-256.4549, device='cuda:0')
episode: 149 training return: tensor(286.8343, device='cuda:0')
episode: 150 training return: tensor(-259.8139, device='cuda:0')
episode: 151 training return: tensor(-244.5182, device='cuda:0')
epoch: 38 test_true_pfm: 2634.9761177834357 sim_pfm: 119.54546693787172
episode: 152 training return: tensor(-247.6973, device='cuda:0')
episode: 153 training return: tensor(-212.7730, device='cuda:0')
episode: 154 training return: tensor(-247.1045, device='cuda:0')
episode: 155 training return: tensor(-339.3949, device='cuda:0')
epoch: 39 test_true_pfm: 2249.2185697235914 sim_pfm: 165.75821227833512
episode: 156 training return: tensor(306.0768, device='cuda:0')
episode: 157 training return: tensor(-241.4429, device='cuda:0')
episode: 158 training return: tensor(361.4360, device='cuda:0')
episode: 159 training return: tensor(56.5990, device='cuda:0')
epoch: 40 test_true_pfm: 2671.9466639877246 sim_pfm: 114.3625476435215
episode: 160 training return: tensor(-298.2827, device='cuda:0')
episode: 161 training return: tensor(-341.8211, device='cuda:0')
episode: 162 training return: tensor(377.5204, device='cuda:0')
episode: 163 training return: tensor(21.5928, device='cuda:0')
epoch: 41 test_true_pfm: 2465.5445675999604 sim_pfm: 174.46759483008645
episode: 164 training return: tensor(-359.6849, device='cuda:0')
episode: 165 training return: tensor(-353.2283, device='cuda:0')
episode: 166 training return: tensor(-328.9603, device='cuda:0')
episode: 167 training return: tensor(415.5976, device='cuda:0')
epoch: 42 test_true_pfm: 2160.98169952357 sim_pfm: 90.23610685724027
episode: 168 training return: tensor(-160.4680, device='cuda:0')
episode: 169 training return: tensor(410.4951, device='cuda:0')
episode: 170 training return: tensor(-3.3615, device='cuda:0')
episode: 171 training return: tensor(-302.6057, device='cuda:0')
epoch: 43 test_true_pfm: 1918.9210638196366 sim_pfm: -238.73345138747632
episode: 172 training return: tensor(-348.5574, device='cuda:0')
episode: 173 training return: tensor(-334.0067, device='cuda:0')
episode: 174 training return: tensor(-312.1284, device='cuda:0')
episode: 175 training return: tensor(-366.6046, device='cuda:0')
epoch: 44 test_true_pfm: 2634.0452265538092 sim_pfm: 367.4161372579013
episode: 176 training return: tensor(-235.9517, device='cuda:0')
episode: 177 training return: tensor(249.5351, device='cuda:0')
episode: 178 training return: tensor(398.0611, device='cuda:0')
episode: 179 training return: tensor(-40.0002, device='cuda:0')
epoch: 45 test_true_pfm: 2411.818321935594 sim_pfm: -51.87025974485247
episode: 180 training return: tensor(-268.7084, device='cuda:0')
episode: 181 training return: tensor(-349.3951, device='cuda:0')
episode: 182 training return: tensor(-73.8201, device='cuda:0')
episode: 183 training return: tensor(-287.3557, device='cuda:0')
epoch: 46 test_true_pfm: 1909.691923257434 sim_pfm: -31.6926044083278
episode: 184 training return: tensor(-138.3578, device='cuda:0')
episode: 185 training return: tensor(-19.6462, device='cuda:0')
episode: 186 training return: tensor(-65.9193, device='cuda:0')
episode: 187 training return: tensor(-223.5982, device='cuda:0')
epoch: 47 test_true_pfm: 1970.441977987349 sim_pfm: -231.41311051723702
episode: 188 training return: tensor(-340.0898, device='cuda:0')
episode: 189 training return: tensor(-287.4448, device='cuda:0')
episode: 190 training return: tensor(384.3324, device='cuda:0')
episode: 191 training return: tensor(-244.1597, device='cuda:0')
epoch: 48 test_true_pfm: 2706.197373042451 sim_pfm: 209.77184545855076
episode: 192 training return: tensor(382.4172, device='cuda:0')
episode: 193 training return: tensor(-80.8511, device='cuda:0')
episode: 194 training return: tensor(-202.9893, device='cuda:0')
episode: 195 training return: tensor(-295.2006, device='cuda:0')
epoch: 49 test_true_pfm: 2614.013383870397 sim_pfm: -64.32555244502146
episode: 196 training return: tensor(426.0296, device='cuda:0')
episode: 197 training return: tensor(-345.8703, device='cuda:0')
episode: 198 training return: tensor(341.4969, device='cuda:0')
episode: 199 training return: tensor(172.2415, device='cuda:0')
epoch: 50 test_true_pfm: 2291.045456906378 sim_pfm: 235.62393085739072
episode: 200 training return: tensor(-64.3089, device='cuda:0')
episode: 201 training return: tensor(-131.1715, device='cuda:0')
episode: 202 training return: tensor(-246.5964, device='cuda:0')
episode: 203 training return: tensor(-348.5596, device='cuda:0')
epoch: 51 test_true_pfm: 2745.8515241116097 sim_pfm: -33.08423637976133
episode: 204 training return: tensor(-109.0688, device='cuda:0')
episode: 205 training return: tensor(-288.4086, device='cuda:0')
episode: 206 training return: tensor(-53.5823, device='cuda:0')
episode: 207 training return: tensor(232.6764, device='cuda:0')
epoch: 52 test_true_pfm: 2196.1628494981287 sim_pfm: -54.442827700908914
episode: 208 training return: tensor(200.2081, device='cuda:0')
episode: 209 training return: tensor(-313.3177, device='cuda:0')
episode: 210 training return: tensor(-154.1329, device='cuda:0')
episode: 211 training return: tensor(155.3575, device='cuda:0')
epoch: 53 test_true_pfm: 2126.7912084044788 sim_pfm: -30.98746996835689
episode: 212 training return: tensor(-345.3573, device='cuda:0')
episode: 213 training return: tensor(-246.3881, device='cuda:0')
episode: 214 training return: tensor(-319.6399, device='cuda:0')
episode: 215 training return: tensor(-344.9784, device='cuda:0')
epoch: 54 test_true_pfm: 2018.6964249962912 sim_pfm: -231.10447731394865
episode: 216 training return: tensor(354.4565, device='cuda:0')
episode: 217 training return: tensor(153.7386, device='cuda:0')
episode: 218 training return: tensor(-47.3752, device='cuda:0')
episode: 219 training return: tensor(392.0397, device='cuda:0')
epoch: 55 test_true_pfm: 1876.9190645245515 sim_pfm: -216.45238101945142
episode: 220 training return: tensor(-340.8980, device='cuda:0')
episode: 221 training return: tensor(-303.0275, device='cuda:0')
episode: 222 training return: tensor(-259.7272, device='cuda:0')
episode: 223 training return: tensor(-78.7876, device='cuda:0')
epoch: 56 test_true_pfm: 2095.62833539988 sim_pfm: 91.53064165592271
episode: 224 training return: tensor(362.7364, device='cuda:0')
episode: 225 training return: tensor(-251.7231, device='cuda:0')
episode: 226 training return: tensor(-350.3002, device='cuda:0')
episode: 227 training return: tensor(-187.2946, device='cuda:0')
epoch: 57 test_true_pfm: 1776.9415866904117 sim_pfm: 82.25000272854231
episode: 228 training return: tensor(-170.6505, device='cuda:0')
episode: 229 training return: tensor(-266.1289, device='cuda:0')
episode: 230 training return: tensor(156.2014, device='cuda:0')
episode: 231 training return: tensor(-92.1492, device='cuda:0')
epoch: 58 test_true_pfm: 2151.090613408976 sim_pfm: -38.46135270735249
episode: 232 training return: tensor(-355.2712, device='cuda:0')
episode: 233 training return: tensor(-249.7103, device='cuda:0')
episode: 234 training return: tensor(-336.9822, device='cuda:0')
episode: 235 training return: tensor(59.9967, device='cuda:0')
epoch: 59 test_true_pfm: 2469.1702167956114 sim_pfm: 62.068473303166684
episode: 236 training return: tensor(349.2011, device='cuda:0')
episode: 237 training return: tensor(-338.8147, device='cuda:0')
episode: 238 training return: tensor(41.5048, device='cuda:0')
episode: 239 training return: tensor(174.8960, device='cuda:0')
epoch: 60 test_true_pfm: 2314.0016550023342 sim_pfm: 26.7714038650156
episode: 240 training return: tensor(-317.2930, device='cuda:0')
episode: 241 training return: tensor(243.6200, device='cuda:0')
episode: 242 training return: tensor(-266.3205, device='cuda:0')
episode: 243 training return: tensor(-351.9590, device='cuda:0')
epoch: 61 test_true_pfm: 2614.5144393639507 sim_pfm: 125.67313528355832
episode: 244 training return: tensor(373.8037, device='cuda:0')
episode: 245 training return: tensor(52.6842, device='cuda:0')
episode: 246 training return: tensor(-338.4745, device='cuda:0')
episode: 247 training return: tensor(-296.2145, device='cuda:0')
epoch: 62 test_true_pfm: 3010.4038631141434 sim_pfm: 243.98052451224066
episode: 248 training return: tensor(-81.2444, device='cuda:0')
episode: 249 training return: tensor(-333.3019, device='cuda:0')
episode: 250 training return: tensor(368.4258, device='cuda:0')
episode: 251 training return: tensor(-344.0999, device='cuda:0')
epoch: 63 test_true_pfm: 2477.8105511115973 sim_pfm: -54.70566230240123
episode: 252 training return: tensor(-144.0925, device='cuda:0')
episode: 253 training return: tensor(-349.4894, device='cuda:0')
episode: 254 training return: tensor(-333.7117, device='cuda:0')
episode: 255 training return: tensor(335.2993, device='cuda:0')
epoch: 64 test_true_pfm: 2786.302966247413 sim_pfm: 199.44404764013598
episode: 256 training return: tensor(104.2698, device='cuda:0')
episode: 257 training return: tensor(-349.8855, device='cuda:0')
episode: 258 training return: tensor(434.9508, device='cuda:0')
episode: 259 training return: tensor(-344.4115, device='cuda:0')
epoch: 65 test_true_pfm: 2329.0023947439095 sim_pfm: 108.6123842341088
episode: 260 training return: tensor(-300.5671, device='cuda:0')
episode: 261 training return: tensor(-319.4871, device='cuda:0')
episode: 262 training return: tensor(449.8092, device='cuda:0')
episode: 263 training return: tensor(-82.5650, device='cuda:0')
epoch: 66 test_true_pfm: 2482.9496591628663 sim_pfm: 59.50223265143965
episode: 264 training return: tensor(-46.4916, device='cuda:0')
episode: 265 training return: tensor(-132.3091, device='cuda:0')
episode: 266 training return: tensor(-75.2496, device='cuda:0')
episode: 267 training return: tensor(-341.4877, device='cuda:0')
epoch: 67 test_true_pfm: 3210.8826266083656 sim_pfm: 20.455655809491873
episode: 268 training return: tensor(-297.6136, device='cuda:0')
episode: 269 training return: tensor(-347.0615, device='cuda:0')
episode: 270 training return: tensor(334.6329, device='cuda:0')
episode: 271 training return: tensor(141.7794, device='cuda:0')
epoch: 68 test_true_pfm: 3063.317756289895 sim_pfm: 173.23031814165492
episode: 272 training return: tensor(306.3490, device='cuda:0')
episode: 273 training return: tensor(74.5113, device='cuda:0')
episode: 274 training return: tensor(48.1406, device='cuda:0')
episode: 275 training return: tensor(-345.7167, device='cuda:0')
epoch: 69 test_true_pfm: 2600.359812766356 sim_pfm: -61.83104254241334
episode: 276 training return: tensor(-349.0505, device='cuda:0')
episode: 277 training return: tensor(-297.9762, device='cuda:0')
episode: 278 training return: tensor(-78.6648, device='cuda:0')
episode: 279 training return: tensor(-353.1163, device='cuda:0')
epoch: 70 test_true_pfm: 2302.0687514544416 sim_pfm: -31.628862780188985
episode: 280 training return: tensor(-102.2204, device='cuda:0')
episode: 281 training return: tensor(-322.6809, device='cuda:0')
episode: 282 training return: tensor(-98.6057, device='cuda:0')
episode: 283 training return: tensor(-320.1112, device='cuda:0')
epoch: 71 test_true_pfm: 2471.145274735654 sim_pfm: 194.00425963524808
episode: 284 training return: tensor(-334.6776, device='cuda:0')
episode: 285 training return: tensor(-42.6752, device='cuda:0')
episode: 286 training return: tensor(260.3013, device='cuda:0')
episode: 287 training return: tensor(-336.8047, device='cuda:0')
epoch: 72 test_true_pfm: 2668.654982088885 sim_pfm: 60.59749570842056
episode: 288 training return: tensor(-347.7229, device='cuda:0')
episode: 289 training return: tensor(16.8864, device='cuda:0')
episode: 290 training return: tensor(141.5253, device='cuda:0')
episode: 291 training return: tensor(-254.7587, device='cuda:0')
epoch: 73 test_true_pfm: 2969.4997593430076 sim_pfm: 208.6846824164386
episode: 292 training return: tensor(-364.7577, device='cuda:0')
episode: 293 training return: tensor(-106.6630, device='cuda:0')
episode: 294 training return: tensor(-274.6469, device='cuda:0')
episode: 295 training return: tensor(-328.7567, device='cuda:0')
epoch: 74 test_true_pfm: 2758.1025685329782 sim_pfm: -54.128768290858716
episode: 296 training return: tensor(-391.0911, device='cuda:0')
episode: 297 training return: tensor(-337.0674, device='cuda:0')
episode: 298 training return: tensor(-116.3938, device='cuda:0')
episode: 299 training return: tensor(-183.6400, device='cuda:0')
epoch: 75 test_true_pfm: 2183.5790985780477 sim_pfm: 281.963388369603
episode: 300 training return: tensor(-93.6137, device='cuda:0')
episode: 301 training return: tensor(383.4452, device='cuda:0')
episode: 302 training return: tensor(-318.2864, device='cuda:0')
episode: 303 training return: tensor(-152.4880, device='cuda:0')
epoch: 76 test_true_pfm: 2447.808928562172 sim_pfm: 50.70418494095793
episode: 304 training return: tensor(-1.8635, device='cuda:0')
episode: 305 training return: tensor(-10.1504, device='cuda:0')
episode: 306 training return: tensor(-340.0813, device='cuda:0')
episode: 307 training return: tensor(-357.6349, device='cuda:0')
epoch: 77 test_true_pfm: 2668.9924400304426 sim_pfm: -22.605597942582488
episode: 308 training return: tensor(8.5372, device='cuda:0')
episode: 309 training return: tensor(92.6686, device='cuda:0')
episode: 310 training return: tensor(399.6888, device='cuda:0')
episode: 311 training return: tensor(-49.8127, device='cuda:0')
epoch: 78 test_true_pfm: 2798.5363843132527 sim_pfm: 277.09163835951284
episode: 312 training return: tensor(-340.5342, device='cuda:0')
episode: 313 training return: tensor(216.8075, device='cuda:0')
episode: 314 training return: tensor(161.2040, device='cuda:0')
episode: 315 training return: tensor(-316.2144, device='cuda:0')
epoch: 79 test_true_pfm: 2380.018882376655 sim_pfm: 101.20950399868889
episode: 316 training return: tensor(36.3362, device='cuda:0')
episode: 317 training return: tensor(-11.4290, device='cuda:0')
episode: 318 training return: tensor(-126.4730, device='cuda:0')
episode: 319 training return: tensor(-117.3705, device='cuda:0')
epoch: 80 test_true_pfm: 2554.750951932114 sim_pfm: 163.51325070599947
episode: 320 training return: tensor(-280.2600, device='cuda:0')
episode: 321 training return: tensor(400.6347, device='cuda:0')
episode: 322 training return: tensor(-160.8365, device='cuda:0')
episode: 323 training return: tensor(-57.3408, device='cuda:0')
epoch: 81 test_true_pfm: 2357.03646562757 sim_pfm: -36.760095444701925
episode: 324 training return: tensor(-115.5530, device='cuda:0')
episode: 325 training return: tensor(-288.8084, device='cuda:0')
episode: 326 training return: tensor(-208.6419, device='cuda:0')
episode: 327 training return: tensor(296.6628, device='cuda:0')
epoch: 82 test_true_pfm: 2979.3874751112617 sim_pfm: 41.419619426053636
episode: 328 training return: tensor(42.3286, device='cuda:0')
episode: 329 training return: tensor(-346.0956, device='cuda:0')
episode: 330 training return: tensor(-249.4608, device='cuda:0')
episode: 331 training return: tensor(367.6480, device='cuda:0')
epoch: 83 test_true_pfm: 2254.722748639979 sim_pfm: 238.11372050576998
episode: 332 training return: tensor(-337.6718, device='cuda:0')
episode: 333 training return: tensor(-338.1931, device='cuda:0')
episode: 334 training return: tensor(271.5141, device='cuda:0')
episode: 335 training return: tensor(68.2531, device='cuda:0')
epoch: 84 test_true_pfm: 2275.107704738011 sim_pfm: 42.43316549703013
episode: 336 training return: tensor(-391.9606, device='cuda:0')
episode: 337 training return: tensor(-287.0579, device='cuda:0')
episode: 338 training return: tensor(-350.8833, device='cuda:0')
episode: 339 training return: tensor(-324.0850, device='cuda:0')
epoch: 85 test_true_pfm: 2520.9499501005344 sim_pfm: 80.149240849268
episode: 340 training return: tensor(-256.1033, device='cuda:0')
episode: 341 training return: tensor(-164.2979, device='cuda:0')
episode: 342 training return: tensor(63.5647, device='cuda:0')
episode: 343 training return: tensor(-252.7669, device='cuda:0')
epoch: 86 test_true_pfm: 2287.3788012028067 sim_pfm: -51.68430305355772
episode: 344 training return: tensor(359.5734, device='cuda:0')
episode: 345 training return: tensor(-339.9451, device='cuda:0')
episode: 346 training return: tensor(-95.8692, device='cuda:0')
episode: 347 training return: tensor(-341.5806, device='cuda:0')
epoch: 87 test_true_pfm: 2477.3506982795266 sim_pfm: 200.39360382660138
episode: 348 training return: tensor(-342.8073, device='cuda:0')
episode: 349 training return: tensor(-251.4350, device='cuda:0')
episode: 350 training return: tensor(-23.0765, device='cuda:0')
episode: 351 training return: tensor(-223.8203, device='cuda:0')
epoch: 88 test_true_pfm: 2296.9455075693095 sim_pfm: 80.60898735771964
episode: 352 training return: tensor(-43.6442, device='cuda:0')
episode: 353 training return: tensor(20.2489, device='cuda:0')
episode: 354 training return: tensor(-79.1235, device='cuda:0')
episode: 355 training return: tensor(-327.4020, device='cuda:0')
epoch: 89 test_true_pfm: 2339.0696198502633 sim_pfm: 162.4376567202271
episode: 356 training return: tensor(395.3992, device='cuda:0')
episode: 357 training return: tensor(-110.4870, device='cuda:0')
episode: 358 training return: tensor(-362.2136, device='cuda:0')
episode: 359 training return: tensor(4.4838, device='cuda:0')
epoch: 90 test_true_pfm: 2972.9293841739277 sim_pfm: 305.21843140018365
episode: 360 training return: tensor(-51.3332, device='cuda:0')
episode: 361 training return: tensor(188.5166, device='cuda:0')
episode: 362 training return: tensor(-252.8908, device='cuda:0')
episode: 363 training return: tensor(-79.1333, device='cuda:0')
epoch: 91 test_true_pfm: 2678.595447024574 sim_pfm: 183.14086462037326
episode: 364 training return: tensor(-210.8147, device='cuda:0')
episode: 365 training return: tensor(-67.8179, device='cuda:0')
episode: 366 training return: tensor(-149.9315, device='cuda:0')
episode: 367 training return: tensor(-268.8361, device='cuda:0')
epoch: 92 test_true_pfm: 2211.8334359653545 sim_pfm: 160.39039066052646
episode: 368 training return: tensor(352.5874, device='cuda:0')
episode: 369 training return: tensor(-313.7907, device='cuda:0')
episode: 370 training return: tensor(-168.8220, device='cuda:0')
episode: 371 training return: tensor(364.3582, device='cuda:0')
epoch: 93 test_true_pfm: 2790.484013683835 sim_pfm: 173.7350250776411
episode: 372 training return: tensor(-140.2954, device='cuda:0')
episode: 373 training return: tensor(-135.9203, device='cuda:0')
episode: 374 training return: tensor(-344.5901, device='cuda:0')
episode: 375 training return: tensor(-235.1748, device='cuda:0')
epoch: 94 test_true_pfm: 2317.3010239173723 sim_pfm: 239.7787104344849
episode: 376 training return: tensor(-350.9110, device='cuda:0')
episode: 377 training return: tensor(-355.7517, device='cuda:0')
episode: 378 training return: tensor(-87.0962, device='cuda:0')
episode: 379 training return: tensor(138.5436, device='cuda:0')
epoch: 95 test_true_pfm: 2600.66675826933 sim_pfm: 24.05728153604044
episode: 380 training return: tensor(403.6850, device='cuda:0')
episode: 381 training return: tensor(-326.6302, device='cuda:0')
episode: 382 training return: tensor(-162.1987, device='cuda:0')
episode: 383 training return: tensor(356.4387, device='cuda:0')
epoch: 96 test_true_pfm: 2239.2416832287095 sim_pfm: -58.76212023862172
episode: 384 training return: tensor(-134.9465, device='cuda:0')
episode: 385 training return: tensor(-317.7299, device='cuda:0')
episode: 386 training return: tensor(-169.9662, device='cuda:0')
episode: 387 training return: tensor(-6.4069, device='cuda:0')
epoch: 97 test_true_pfm: 2619.579677831754 sim_pfm: 291.7253461267489
episode: 388 training return: tensor(-344.0990, device='cuda:0')
episode: 389 training return: tensor(295.6131, device='cuda:0')
episode: 390 training return: tensor(-345.3122, device='cuda:0')
episode: 391 training return: tensor(-288.5821, device='cuda:0')
epoch: 98 test_true_pfm: 2403.3430019280427 sim_pfm: -21.90494645447082
episode: 392 training return: tensor(-346.7315, device='cuda:0')
episode: 393 training return: tensor(-123.0245, device='cuda:0')
episode: 394 training return: tensor(-334.3037, device='cuda:0')
episode: 395 training return: tensor(361.8636, device='cuda:0')
epoch: 99 test_true_pfm: 1829.860668686376 sim_pfm: -97.45700553942395
episode: 396 training return: tensor(-343.7764, device='cuda:0')
episode: 397 training return: tensor(-160.2906, device='cuda:0')
episode: 398 training return: tensor(-138.3986, device='cuda:0')
episode: 399 training return: tensor(-93.2696, device='cuda:0')
epoch: 100 test_true_pfm: 2411.2411505275377 sim_pfm: 97.75971948937513
episode: 400 training return: tensor(276.1365, device='cuda:0')
episode: 401 training return: tensor(327.5667, device='cuda:0')
episode: 402 training return: tensor(-348.0568, device='cuda:0')
episode: 403 training return: tensor(-107.9254, device='cuda:0')
epoch: 101 test_true_pfm: 2522.836227936824 sim_pfm: 217.1504900790557
episode: 404 training return: tensor(-339.2614, device='cuda:0')
episode: 405 training return: tensor(39.9875, device='cuda:0')
episode: 406 training return: tensor(-192.6470, device='cuda:0')
episode: 407 training return: tensor(-71.9584, device='cuda:0')
epoch: 102 test_true_pfm: 2053.065210126641 sim_pfm: 104.17700729436667
episode: 408 training return: tensor(-344.3134, device='cuda:0')
episode: 409 training return: tensor(234.8461, device='cuda:0')
episode: 410 training return: tensor(-250.1561, device='cuda:0')
episode: 411 training return: tensor(-339.0914, device='cuda:0')
epoch: 103 test_true_pfm: 1737.4451058050902 sim_pfm: 77.83903182990616
episode: 412 training return: tensor(402.3603, device='cuda:0')
episode: 413 training return: tensor(-47.5864, device='cuda:0')
episode: 414 training return: tensor(-238.9327, device='cuda:0')
episode: 415 training return: tensor(326.2232, device='cuda:0')
epoch: 104 test_true_pfm: 2380.4043095530233 sim_pfm: 345.9574008607112
episode: 416 training return: tensor(-79.4090, device='cuda:0')
episode: 417 training return: tensor(237.6760, device='cuda:0')
episode: 418 training return: tensor(-346.9500, device='cuda:0')
episode: 419 training return: tensor(-60.0812, device='cuda:0')
epoch: 105 test_true_pfm: 1495.84648845363 sim_pfm: 67.97110898701551
episode: 420 training return: tensor(-345.1438, device='cuda:0')
episode: 421 training return: tensor(337.5676, device='cuda:0')
episode: 422 training return: tensor(-346.5169, device='cuda:0')
episode: 423 training return: tensor(-266.4992, device='cuda:0')
epoch: 106 test_true_pfm: 2747.5785635234647 sim_pfm: 80.43435152917907
episode: 424 training return: tensor(-245.6606, device='cuda:0')
episode: 425 training return: tensor(378.7162, device='cuda:0')
episode: 426 training return: tensor(-164.5833, device='cuda:0')
episode: 427 training return: tensor(-345.5583, device='cuda:0')
epoch: 107 test_true_pfm: 3220.942492684177 sim_pfm: 70.08249414134964
episode: 428 training return: tensor(-54.4351, device='cuda:0')
episode: 429 training return: tensor(9.1618, device='cuda:0')
episode: 430 training return: tensor(-329.6676, device='cuda:0')
episode: 431 training return: tensor(-315.0622, device='cuda:0')
epoch: 108 test_true_pfm: 2854.1939573501645 sim_pfm: 253.9071479625709
episode: 432 training return: tensor(-342.5579, device='cuda:0')
episode: 433 training return: tensor(-198.8527, device='cuda:0')
episode: 434 training return: tensor(-109.7730, device='cuda:0')
episode: 435 training return: tensor(-301.2486, device='cuda:0')
epoch: 109 test_true_pfm: 2390.297761744247 sim_pfm: 110.35353985329857
episode: 436 training return: tensor(-291.4792, device='cuda:0')
episode: 437 training return: tensor(-343.6935, device='cuda:0')
episode: 438 training return: tensor(93.1327, device='cuda:0')
episode: 439 training return: tensor(-351.9672, device='cuda:0')
epoch: 110 test_true_pfm: 1838.407032065318 sim_pfm: 86.81140988232801
episode: 440 training return: tensor(-338.1410, device='cuda:0')
episode: 441 training return: tensor(358.4360, device='cuda:0')
episode: 442 training return: tensor(286.0107, device='cuda:0')
episode: 443 training return: tensor(-351.3522, device='cuda:0')
epoch: 111 test_true_pfm: 2982.1093729836793 sim_pfm: 79.97692363841149
episode: 444 training return: tensor(-334.5056, device='cuda:0')
episode: 445 training return: tensor(-318.4484, device='cuda:0')
episode: 446 training return: tensor(52.9281, device='cuda:0')
episode: 447 training return: tensor(247.1530, device='cuda:0')
epoch: 112 test_true_pfm: 2382.8459161835995 sim_pfm: -9.835161099152174
episode: 448 training return: tensor(-283.1420, device='cuda:0')
episode: 449 training return: tensor(-96.2604, device='cuda:0')
episode: 450 training return: tensor(-345.7523, device='cuda:0')
episode: 451 training return: tensor(-336.7414, device='cuda:0')
epoch: 113 test_true_pfm: 2296.2482373409603 sim_pfm: 99.53238585713552
episode: 452 training return: tensor(-348.9513, device='cuda:0')
episode: 453 training return: tensor(-350.8235, device='cuda:0')
episode: 454 training return: tensor(-345.7473, device='cuda:0')
episode: 455 training return: tensor(-321.3794, device='cuda:0')
epoch: 114 test_true_pfm: 2250.2176978922894 sim_pfm: 181.5182269232852
episode: 456 training return: tensor(-284.4619, device='cuda:0')
episode: 457 training return: tensor(-348.3187, device='cuda:0')
episode: 458 training return: tensor(167.1647, device='cuda:0')
episode: 459 training return: tensor(364.3389, device='cuda:0')
epoch: 115 test_true_pfm: 2750.351975429999 sim_pfm: -90.74316042770322
episode: 460 training return: tensor(-346.7209, device='cuda:0')
episode: 461 training return: tensor(348.2791, device='cuda:0')
episode: 462 training return: tensor(-245.5659, device='cuda:0')
episode: 463 training return: tensor(-141.1628, device='cuda:0')
epoch: 116 test_true_pfm: 2571.4456424626264 sim_pfm: 125.45038651007538
episode: 464 training return: tensor(-351.7212, device='cuda:0')
episode: 465 training return: tensor(28.4350, device='cuda:0')
episode: 466 training return: tensor(-88.8598, device='cuda:0')
episode: 467 training return: tensor(356.6046, device='cuda:0')
epoch: 117 test_true_pfm: 2307.6712596946804 sim_pfm: 114.77427259685162
episode: 468 training return: tensor(316.8272, device='cuda:0')
episode: 469 training return: tensor(-59.4558, device='cuda:0')
episode: 470 training return: tensor(-242.7209, device='cuda:0')
episode: 471 training return: tensor(-346.9932, device='cuda:0')
epoch: 118 test_true_pfm: 2271.8035262780213 sim_pfm: -138.5154702718428
episode: 472 training return: tensor(-242.9559, device='cuda:0')
episode: 473 training return: tensor(-348.1901, device='cuda:0')
episode: 474 training return: tensor(-346.8770, device='cuda:0')
episode: 475 training return: tensor(-139.7313, device='cuda:0')
epoch: 119 test_true_pfm: 2588.319308250971 sim_pfm: 397.5133318324515
episode: 476 training return: tensor(-320.2354, device='cuda:0')
episode: 477 training return: tensor(-112.5997, device='cuda:0')
episode: 478 training return: tensor(-89.9800, device='cuda:0')
episode: 479 training return: tensor(-342.8826, device='cuda:0')
epoch: 120 test_true_pfm: 2631.284068247392 sim_pfm: -21.471605742068885
episode: 480 training return: tensor(-288.1047, device='cuda:0')
episode: 481 training return: tensor(-184.0218, device='cuda:0')
episode: 482 training return: tensor(362.8949, device='cuda:0')
episode: 483 training return: tensor(421.3131, device='cuda:0')
epoch: 121 test_true_pfm: 2258.876122889627 sim_pfm: -86.62202884526535
episode: 484 training return: tensor(-244.6252, device='cuda:0')
episode: 485 training return: tensor(-224.9527, device='cuda:0')
episode: 486 training return: tensor(372.6050, device='cuda:0')
episode: 487 training return: tensor(-106.9081, device='cuda:0')
epoch: 122 test_true_pfm: 1894.0350757750214 sim_pfm: -88.63673726126824
episode: 488 training return: tensor(-213.6841, device='cuda:0')
episode: 489 training return: tensor(-347.4273, device='cuda:0')
episode: 490 training return: tensor(-362.1945, device='cuda:0')
episode: 491 training return: tensor(150.2344, device='cuda:0')
epoch: 123 test_true_pfm: 2481.4535721907178 sim_pfm: 78.57910250738496
episode: 492 training return: tensor(-93.3014, device='cuda:0')
episode: 493 training return: tensor(326.6862, device='cuda:0')
episode: 494 training return: tensor(377.6637, device='cuda:0')
episode: 495 training return: tensor(-55.2005, device='cuda:0')
epoch: 124 test_true_pfm: 2391.743072121107 sim_pfm: 304.3158228304528
episode: 496 training return: tensor(-290.5637, device='cuda:0')
episode: 497 training return: tensor(-231.8630, device='cuda:0')
episode: 498 training return: tensor(-336.7078, device='cuda:0')
episode: 499 training return: tensor(152.4651, device='cuda:0')
epoch: 125 test_true_pfm: 2639.081363954398 sim_pfm: 95.8249250773418
episode: 500 training return: tensor(-351.5916, device='cuda:0')
episode: 501 training return: tensor(-282.8392, device='cuda:0')
episode: 502 training return: tensor(-243.4183, device='cuda:0')
episode: 503 training return: tensor(-310.5219, device='cuda:0')
epoch: 126 test_true_pfm: 2067.6979622360122 sim_pfm: 285.6991355434099
episode: 504 training return: tensor(176.4210, device='cuda:0')
episode: 505 training return: tensor(-340.5939, device='cuda:0')
episode: 506 training return: tensor(346.5855, device='cuda:0')
episode: 507 training return: tensor(-71.3065, device='cuda:0')
epoch: 127 test_true_pfm: 2261.928404652223 sim_pfm: 178.98050517602437
episode: 508 training return: tensor(-286.2404, device='cuda:0')
episode: 509 training return: tensor(370.3578, device='cuda:0')
episode: 510 training return: tensor(258.1521, device='cuda:0')
episode: 511 training return: tensor(-343.5886, device='cuda:0')
epoch: 128 test_true_pfm: 3012.1019493421154 sim_pfm: 172.80588147847448
episode: 512 training return: tensor(-345.4532, device='cuda:0')
episode: 513 training return: tensor(-260.8455, device='cuda:0')
episode: 514 training return: tensor(-341.8720, device='cuda:0')
episode: 515 training return: tensor(-342.3241, device='cuda:0')
epoch: 129 test_true_pfm: 2079.526275552807 sim_pfm: -50.18544046739893
episode: 516 training return: tensor(414.3779, device='cuda:0')
episode: 517 training return: tensor(160.3423, device='cuda:0')
episode: 518 training return: tensor(-204.9550, device='cuda:0')
episode: 519 training return: tensor(-94.6005, device='cuda:0')
epoch: 130 test_true_pfm: 2943.1589008822516 sim_pfm: -88.81346585165011
episode: 520 training return: tensor(-294.5597, device='cuda:0')
episode: 521 training return: tensor(-146.8179, device='cuda:0')
episode: 522 training return: tensor(222.8596, device='cuda:0')
episode: 523 training return: tensor(-20.8949, device='cuda:0')
epoch: 131 test_true_pfm: 2294.4765451011144 sim_pfm: -59.78934502520133
episode: 524 training return: tensor(391.3565, device='cuda:0')
episode: 525 training return: tensor(-342.6348, device='cuda:0')
episode: 526 training return: tensor(-32.9844, device='cuda:0')
episode: 527 training return: tensor(169.8710, device='cuda:0')
epoch: 132 test_true_pfm: 2478.5257106901863 sim_pfm: 177.4363753753229
episode: 528 training return: tensor(-209.6608, device='cuda:0')
episode: 529 training return: tensor(-256.5623, device='cuda:0')
episode: 530 training return: tensor(265.8315, device='cuda:0')
episode: 531 training return: tensor(-34.2624, device='cuda:0')
epoch: 133 test_true_pfm: 3206.324053852597 sim_pfm: 282.78482157120015
episode: 532 training return: tensor(-331.6060, device='cuda:0')
episode: 533 training return: tensor(-321.1799, device='cuda:0')
episode: 534 training return: tensor(37.1499, device='cuda:0')
episode: 535 training return: tensor(54.1997, device='cuda:0')
epoch: 134 test_true_pfm: 2409.0975900045464 sim_pfm: 230.78738197205044
episode: 536 training return: tensor(-316.9278, device='cuda:0')
episode: 537 training return: tensor(-250.3171, device='cuda:0')
episode: 538 training return: tensor(-357.7440, device='cuda:0')
episode: 539 training return: tensor(-248.1039, device='cuda:0')
epoch: 135 test_true_pfm: 2527.2975936931 sim_pfm: 368.70631293282105
episode: 540 training return: tensor(-349.8254, device='cuda:0')
episode: 541 training return: tensor(-296.7536, device='cuda:0')
episode: 542 training return: tensor(132.2862, device='cuda:0')
episode: 543 training return: tensor(-342.3799, device='cuda:0')
epoch: 136 test_true_pfm: 2540.138193746714 sim_pfm: 312.85833716324606
episode: 544 training return: tensor(364.5001, device='cuda:0')
episode: 545 training return: tensor(244.7186, device='cuda:0')
episode: 546 training return: tensor(177.2725, device='cuda:0')
episode: 547 training return: tensor(-134.6545, device='cuda:0')
epoch: 137 test_true_pfm: 2557.4741519882214 sim_pfm: 114.46755754359765
episode: 548 training return: tensor(-338.7466, device='cuda:0')
episode: 549 training return: tensor(324.0682, device='cuda:0')
episode: 550 training return: tensor(-319.2332, device='cuda:0')
episode: 551 training return: tensor(-349.5862, device='cuda:0')
epoch: 138 test_true_pfm: 2371.6418879600365 sim_pfm: 151.4462362860601
episode: 552 training return: tensor(6.3535, device='cuda:0')
episode: 553 training return: tensor(-324.1579, device='cuda:0')
episode: 554 training return: tensor(-348.5717, device='cuda:0')
episode: 555 training return: tensor(-114.4937, device='cuda:0')
epoch: 139 test_true_pfm: 2539.2058792657667 sim_pfm: -80.66137781828486
episode: 556 training return: tensor(-298.7870, device='cuda:0')
episode: 557 training return: tensor(-61.9827, device='cuda:0')
episode: 558 training return: tensor(45.5863, device='cuda:0')
episode: 559 training return: tensor(-281.4878, device='cuda:0')
epoch: 140 test_true_pfm: 2297.932245418919 sim_pfm: 162.18501441718158
episode: 560 training return: tensor(-204.5378, device='cuda:0')
episode: 561 training return: tensor(-346.0059, device='cuda:0')
episode: 562 training return: tensor(-291.5734, device='cuda:0')
episode: 563 training return: tensor(-382.0150, device='cuda:0')
epoch: 141 test_true_pfm: 2505.0239871693207 sim_pfm: 21.311643260948284
episode: 564 training return: tensor(-305.2025, device='cuda:0')
episode: 565 training return: tensor(-235.9412, device='cuda:0')
episode: 566 training return: tensor(-57.0894, device='cuda:0')
episode: 567 training return: tensor(-351.4031, device='cuda:0')
epoch: 142 test_true_pfm: 2253.444407100406 sim_pfm: -19.598784587035578
episode: 568 training return: tensor(-126.7519, device='cuda:0')
episode: 569 training return: tensor(-349.9923, device='cuda:0')
episode: 570 training return: tensor(-344.8050, device='cuda:0')
episode: 571 training return: tensor(-208.1858, device='cuda:0')
epoch: 143 test_true_pfm: 1816.724847490618 sim_pfm: -254.1112292277976
episode: 572 training return: tensor(-243.6118, device='cuda:0')
episode: 573 training return: tensor(-207.4869, device='cuda:0')
episode: 574 training return: tensor(-52.1157, device='cuda:0')
episode: 575 training return: tensor(25.6213, device='cuda:0')
epoch: 144 test_true_pfm: 2001.3046282378443 sim_pfm: -158.74291445111157
episode: 576 training return: tensor(-334.5869, device='cuda:0')
episode: 577 training return: tensor(-93.7036, device='cuda:0')
episode: 578 training return: tensor(-87.2194, device='cuda:0')
episode: 579 training return: tensor(-323.2139, device='cuda:0')
epoch: 145 test_true_pfm: 2356.079257740738 sim_pfm: 300.4560540923849
episode: 580 training return: tensor(-299.4865, device='cuda:0')
episode: 581 training return: tensor(-338.7343, device='cuda:0')
episode: 582 training return: tensor(327.4926, device='cuda:0')
episode: 583 training return: tensor(383.6235, device='cuda:0')
epoch: 146 test_true_pfm: 2149.6834094957994 sim_pfm: -9.991225400842572
episode: 584 training return: tensor(338.8200, device='cuda:0')
episode: 585 training return: tensor(-344.1487, device='cuda:0')
episode: 586 training return: tensor(-48.7489, device='cuda:0')
episode: 587 training return: tensor(-345.4977, device='cuda:0')
epoch: 147 test_true_pfm: 2327.3814510338725 sim_pfm: -70.90885004218823
episode: 588 training return: tensor(416.7097, device='cuda:0')
episode: 589 training return: tensor(-292.5938, device='cuda:0')
episode: 590 training return: tensor(-207.6794, device='cuda:0')
episode: 591 training return: tensor(53.2327, device='cuda:0')
epoch: 148 test_true_pfm: 2385.910827626131 sim_pfm: 112.93226627301192
episode: 592 training return: tensor(-348.1331, device='cuda:0')
episode: 593 training return: tensor(-374.2218, device='cuda:0')
episode: 594 training return: tensor(410.1778, device='cuda:0')
episode: 595 training return: tensor(-207.8091, device='cuda:0')
epoch: 149 test_true_pfm: 2820.2741041036556 sim_pfm: 44.22037632346231
episode: 596 training return: tensor(-259.8660, device='cuda:0')
episode: 597 training return: tensor(-353.0302, device='cuda:0')
episode: 598 training return: tensor(133.3642, device='cuda:0')
episode: 599 training return: tensor(-351.9771, device='cuda:0')
epoch: 150 test_true_pfm: 2674.1040263135997 sim_pfm: 61.77698518157316
