['--env', 'Walker2d-v3']
epoch: 0 training_loss 0.26095411032438276 test_loss: 0.2653132200241089
epoch: 1 training_loss 0.1854887403547764 test_loss: 0.15284769535064696
epoch: 2 training_loss 0.174370998442173 test_loss: 0.15404149293899536
epoch: 3 training_loss 0.16804194424301386 test_loss: 0.14549554586410524
epoch: 4 training_loss 0.15887244053184987 test_loss: 0.16187005043029784
epoch: 5 training_loss 0.15181211430579425 test_loss: 0.14309148788452147
epoch: 6 training_loss 0.14502252753823996 test_loss: 0.15772733688354493
epoch: 7 training_loss 0.14567760147154332 test_loss: 0.14489083290100097
epoch: 8 training_loss 0.1427026803791523 test_loss: 0.15157426595687867
epoch: 9 training_loss 0.1397486175224185 test_loss: 0.1647718906402588
epoch: 10 training_loss 0.13652140155434608 test_loss: 0.129463267326355
epoch: 11 training_loss 0.13982014685869218 test_loss: 0.11193650960922241
epoch: 12 training_loss 0.12993234355002642 test_loss: 0.12844094038009643
epoch: 13 training_loss 0.1289466805011034 test_loss: 0.13856124877929688
epoch: 14 training_loss 0.13043628729879855 test_loss: 0.12413538694381714
epoch: 15 training_loss 0.13158514935523272 test_loss: 0.11639091968536378
epoch: 16 training_loss 0.12017931751906871 test_loss: 0.11556808948516846
epoch: 17 training_loss 0.12512144818902016 test_loss: 0.09830453991889954
epoch: 18 training_loss 0.13440591286867856 test_loss: 0.1127397894859314
epoch: 19 training_loss 0.12693851120769978 test_loss: 0.1351444125175476
epoch: 20 training_loss 0.13075337514281274 test_loss: 0.10805606842041016
epoch: 21 training_loss 0.12741553895175456 test_loss: 0.1488679051399231
epoch: 22 training_loss 0.12462765242904425 test_loss: 0.13809638023376464
epoch: 23 training_loss 0.13102399060502648 test_loss: 0.12252755165100097
epoch: 24 training_loss 0.12073827870190143 test_loss: 0.17706328630447388
epoch: 25 training_loss 0.12889433231204747 test_loss: 0.12422583103179932
epoch: 26 training_loss 0.13078035887330772 test_loss: 0.12653414011001587
epoch: 27 training_loss 0.12526960808783769 test_loss: 0.10893909931182862
epoch: 28 training_loss 0.1256135212816298 test_loss: 0.10933479070663452
epoch: 29 training_loss 0.12373991895467043 test_loss: 0.11828310489654541
epoch: 30 training_loss 0.12327834227122367 test_loss: 0.12343019247055054
epoch: 31 training_loss 0.11976233147084712 test_loss: 0.1534245491027832
epoch: 32 training_loss 0.13350821010768413 test_loss: 0.12864662408828736
epoch: 33 training_loss 0.11976133562624454 test_loss: 0.11109257936477661
epoch: 34 training_loss 0.12499158341437579 test_loss: 0.12583204507827758
epoch: 35 training_loss 0.12490415439009667 test_loss: 0.13528212308883666
epoch: 36 training_loss 0.12232117258012294 test_loss: 0.1307859778404236
epoch: 37 training_loss 0.1291490453109145 test_loss: 0.12801854610443114
epoch: 38 training_loss 0.1170220424607396 test_loss: 0.14575552940368652
epoch: 39 training_loss 0.12278053399175405 test_loss: 0.11328225135803223
epoch: 40 training_loss 0.12349478645250202 test_loss: 0.11848709583282471
epoch: 41 training_loss 0.1280010947585106 test_loss: 0.12476674318313599
epoch: 42 training_loss 0.12457573730498553 test_loss: 0.12507115602493285
epoch: 43 training_loss 0.12429247412830591 test_loss: 0.1272863984107971
epoch: 44 training_loss 0.11812224425375462 test_loss: 0.11841484308242797
epoch: 45 training_loss 0.12118892710655928 test_loss: 0.15384525060653687
epoch: 46 training_loss 0.12692260362207888 test_loss: 0.1355513095855713
epoch: 47 training_loss 0.12530600778758527 test_loss: 0.11643098592758179
epoch: 48 training_loss 0.12226822428405285 test_loss: 0.120040762424469
epoch: 49 training_loss 0.12589756447821857 test_loss: 0.1008527398109436
epoch: 50 training_loss 0.12081896994262933 test_loss: 0.1462611436843872
epoch: 51 training_loss 0.11672292377799749 test_loss: 0.09249380826950074
epoch: 52 training_loss 0.1212752254679799 test_loss: 0.11663708686828614
epoch: 53 training_loss 0.12566718254238368 test_loss: 0.10601955652236938
epoch: 54 training_loss 0.12084176810458302 test_loss: 0.13022805452346803
epoch: 55 training_loss 0.12674654088914394 test_loss: 0.11366240978240967
epoch: 56 training_loss 0.1181595629453659 test_loss: 0.12284178733825683
epoch: 57 training_loss 0.126889004483819 test_loss: 0.10684328079223633
epoch: 58 training_loss 0.12463342618197203 test_loss: 0.1565266728401184
epoch: 59 training_loss 0.12143322873860597 test_loss: 0.09561478495597839
epoch: 60 training_loss 0.11532942758873105 test_loss: 0.11377439498901368
epoch: 61 training_loss 0.10790550660341978 test_loss: 0.16800273656845094
epoch: 62 training_loss 0.11850492777302861 test_loss: 0.10454539060592652
epoch: 63 training_loss 0.12470956716686488 test_loss: 0.13791729211807252
epoch: 64 training_loss 0.1280791802704334 test_loss: 0.1225726842880249
epoch: 65 training_loss 0.12244266115128993 test_loss: 0.1225623607635498
epoch: 66 training_loss 0.11618029095232486 test_loss: 0.12875211238861084
epoch: 67 training_loss 0.1146757135912776 test_loss: 0.10412043333053589
epoch: 68 training_loss 0.12330698031932115 test_loss: 0.13216406106948853
epoch: 69 training_loss 0.12172115974128246 test_loss: 0.14520657062530518
epoch: 70 training_loss 0.12331564098596573 test_loss: 0.09652003645896912
epoch: 71 training_loss 0.11485339675098657 test_loss: 0.09827848672866821
epoch: 72 training_loss 0.1226258872821927 test_loss: 0.14878156185150146
epoch: 73 training_loss 0.11785287443548441 test_loss: 0.14540032148361207
epoch: 74 training_loss 0.12008629623800517 test_loss: 0.1564938545227051
epoch: 75 training_loss 0.11758797377347946 test_loss: 0.09942349195480346
epoch: 76 training_loss 0.1257033855840564 test_loss: 0.12095222473144532
epoch: 77 training_loss 0.11746298782527446 test_loss: 0.12405291795730591
epoch: 78 training_loss 0.11902642406523228 test_loss: 0.10137685537338256
epoch: 79 training_loss 0.12223149944096803 test_loss: 0.08542807698249817
epoch: 80 training_loss 0.11330094432458282 test_loss: 0.13083698749542236
epoch: 81 training_loss 0.11224244356155395 test_loss: 0.11507127285003663
epoch: 82 training_loss 0.11660895805805921 test_loss: 0.10748233795166015
epoch: 83 training_loss 0.12151656568050384 test_loss: 0.11360307931900024
epoch: 84 training_loss 0.11693204224109649 test_loss: 0.10479897260665894
epoch: 85 training_loss 0.12481271263211965 test_loss: 0.11072481870651245
epoch: 86 training_loss 0.11917463563382626 test_loss: 0.12919740676879882
epoch: 87 training_loss 0.11138497620820999 test_loss: 0.09106956720352173
epoch: 88 training_loss 0.11112086961045861 test_loss: 0.10964622497558593
epoch: 89 training_loss 0.11810104291886091 test_loss: 0.12026765346527099
epoch: 90 training_loss 0.12027347214519978 test_loss: 0.1395118474960327
epoch: 91 training_loss 0.12371592164039612 test_loss: 0.11776273250579834
epoch: 92 training_loss 0.1194895476102829 test_loss: 0.1131359577178955
epoch: 93 training_loss 0.11992070231586695 test_loss: 0.1299649715423584
epoch: 94 training_loss 0.12674914162606002 test_loss: 0.10040262937545777
epoch: 95 training_loss 0.11385122075676918 test_loss: 0.14331908226013185
epoch: 96 training_loss 0.10965085122734308 test_loss: 0.12672635316848754
epoch: 97 training_loss 0.11833629587665201 test_loss: 0.08887503147125245
epoch: 98 training_loss 0.11837392650544644 test_loss: 0.11788547039031982
epoch: 99 training_loss 0.11523220820352435 test_loss: 0.09522526860237121
epoch: 100 training_loss 0.11682087279856206 test_loss: 0.12300004959106445
epoch: 101 training_loss 0.11138476982712746 test_loss: 0.1281331419944763
epoch: 102 training_loss 0.11873006004840135 test_loss: 0.13379524946212767
epoch: 103 training_loss 0.1156773390620947 test_loss: 0.09425535798072815
epoch: 104 training_loss 0.11813689775764942 test_loss: 0.10970917940139771
epoch: 105 training_loss 0.1218020262941718 test_loss: 0.1513246178627014
epoch: 106 training_loss 0.12188213385641575 test_loss: 0.10759813785552978
epoch: 107 training_loss 0.10701747156679631 test_loss: 0.13459539413452148
epoch: 108 training_loss 0.11203570472076535 test_loss: 0.10285212993621826
epoch: 109 training_loss 0.11696096409112215 test_loss: 0.10901775360107421
epoch: 110 training_loss 0.11553789891302585 test_loss: 0.12632898092269898
epoch: 111 training_loss 0.11077049016952514 test_loss: 0.121661376953125
epoch: 112 training_loss 0.11441341366618872 test_loss: 0.12562662363052368
epoch: 113 training_loss 0.1159458608739078 test_loss: 0.10906940698623657
epoch: 114 training_loss 0.12460136082023382 test_loss: 0.128210186958313
epoch: 115 training_loss 0.12525979911908508 test_loss: 0.11656817197799682
epoch: 116 training_loss 0.11300208412110806 test_loss: 0.10275225639343262
epoch: 117 training_loss 0.11837139662355184 test_loss: 0.13153657913208008
epoch: 118 training_loss 0.12135846681892871 test_loss: 0.12020878791809082
epoch: 119 training_loss 0.11436750879511237 test_loss: 0.13529351949691773
epoch: 120 training_loss 0.11972931865602732 test_loss: 0.12113829851150512
epoch: 121 training_loss 0.12027681201696395 test_loss: 0.1326942801475525
epoch: 122 training_loss 0.11800970397889614 test_loss: 0.11168980598449707
epoch: 123 training_loss 0.11879616418853402 test_loss: 0.11382381916046143
epoch: 124 training_loss 0.11384528677910566 test_loss: 0.1430195927619934
epoch: 125 training_loss 0.1235467980057001 test_loss: 0.14184237718582154
epoch: 126 training_loss 0.1183950569294393 test_loss: 0.1213568091392517
epoch: 127 training_loss 0.11395422134548426 test_loss: 0.10837746858596801
epoch: 128 training_loss 0.11812899138778449 test_loss: 0.09699010848999023
epoch: 129 training_loss 0.11519659589976072 test_loss: 0.10727227926254272
epoch: 130 training_loss 0.11867408152669669 test_loss: 0.12395771741867065
epoch: 131 training_loss 0.11481131963431836 test_loss: 0.1173707127571106
epoch: 132 training_loss 0.12420203518122434 test_loss: 0.11988705396652222
epoch: 133 training_loss 0.11783886700868607 test_loss: 0.0952820599079132
epoch: 134 training_loss 0.12077706880867481 test_loss: 0.145970618724823
epoch: 135 training_loss 0.11901623833924532 test_loss: 0.10285441875457764
epoch: 136 training_loss 0.11719727696850896 test_loss: 0.10184829235076905
epoch: 137 training_loss 0.10909700220450759 test_loss: 0.12544790506362916
epoch: 138 training_loss 0.12038997512310744 test_loss: 0.1345839262008667
epoch: 139 training_loss 0.12307496525347233 test_loss: 0.1097025752067566
epoch: 140 training_loss 0.11881164610385894 test_loss: 0.12166918516159057
epoch: 141 training_loss 0.11212574057281018 test_loss: 0.11367048025131225
epoch: 142 training_loss 0.12024905737489462 test_loss: 0.0858952283859253
epoch: 143 training_loss 0.10877753719687462 test_loss: 0.11182957887649536
epoch: 144 training_loss 0.10764215882867574 test_loss: 0.11555008888244629
epoch: 145 training_loss 0.12386483930051327 test_loss: 0.10698117017745971
epoch: 146 training_loss 0.1202254830673337 test_loss: 0.14127626419067382
epoch: 147 training_loss 0.11570704968646169 test_loss: 0.10072140693664551
epoch: 148 training_loss 0.11887813284993172 test_loss: 0.091609525680542
epoch: 149 training_loss 0.12965673834085464 test_loss: 0.12147175073623658
epoch: 0 training_loss 27.389613528251648 test_loss: 9.231749725341796
epoch: 1 training_loss 7.05841254234314 test_loss: 6.193200302124024
epoch: 2 training_loss 5.19704482793808 test_loss: 4.901266098022461
epoch: 3 training_loss 4.379954440593719 test_loss: 4.44691047668457
epoch: 4 training_loss 3.834857630729675 test_loss: 4.176191329956055
epoch: 5 training_loss 3.4382436060905457 test_loss: 3.6074920654296876
epoch: 6 training_loss 3.192832741737366 test_loss: 3.17630558013916
epoch: 7 training_loss 2.9236373138427734 test_loss: 3.1499366760253906
epoch: 8 training_loss 2.739731976985931 test_loss: 2.937757110595703
epoch: 9 training_loss 2.5615530514717104 test_loss: 2.4640790939331056
epoch: 10 training_loss 2.4460967063903807 test_loss: 2.542195129394531
epoch: 11 training_loss 2.368203318119049 test_loss: 2.3400449752807617
epoch: 12 training_loss 2.315418157577515 test_loss: 2.484625053405762
epoch: 13 training_loss 2.1244114089012145 test_loss: 2.366952133178711
epoch: 14 training_loss 2.181997470855713 test_loss: 1.978615951538086
epoch: 15 training_loss 2.0564994609355924 test_loss: 2.242573547363281
epoch: 16 training_loss 1.9824258315563201 test_loss: 2.038003349304199
epoch: 17 training_loss 1.9650945842266083 test_loss: 1.9811023712158202
epoch: 18 training_loss 1.8681019854545593 test_loss: 1.8975160598754883
epoch: 19 training_loss 1.8453636968135834 test_loss: 1.7274509429931642
epoch: 20 training_loss 1.8715635025501252 test_loss: 1.8607053756713867
epoch: 21 training_loss 1.8176158940792084 test_loss: 1.9828731536865234
epoch: 22 training_loss 1.7542741239070891 test_loss: 1.7992656707763672
epoch: 23 training_loss 1.7698364102840423 test_loss: 1.7236858367919923
epoch: 24 training_loss 1.718303816318512 test_loss: 1.7201385498046875
epoch: 25 training_loss 1.7389945709705352 test_loss: 1.8389863967895508
epoch: 26 training_loss 1.6904745388031006 test_loss: 1.846683883666992
epoch: 27 training_loss 1.655003297328949 test_loss: 1.7073667526245118
epoch: 28 training_loss 1.6165879011154174 test_loss: 1.6983797073364257
epoch: 29 training_loss 1.59280149102211 test_loss: 1.593211841583252
epoch: 30 training_loss 1.600022941827774 test_loss: 1.6192459106445312
epoch: 31 training_loss 1.5865131890773774 test_loss: 1.5812989234924317
epoch: 32 training_loss 1.5969156646728515 test_loss: 1.4364537239074706
epoch: 33 training_loss 1.6028460276126861 test_loss: 1.6773544311523438
epoch: 34 training_loss 1.552199810743332 test_loss: 1.6240758895874023
epoch: 35 training_loss 1.5589737296104431 test_loss: 1.6957141876220703
epoch: 36 training_loss 1.5192682647705078 test_loss: 1.5670296669006347
epoch: 37 training_loss 1.5289029133319856 test_loss: 1.621487808227539
epoch: 38 training_loss 1.47222674369812 test_loss: 1.4357186317443849
epoch: 39 training_loss 1.4424655258655548 test_loss: 1.5738185882568358
epoch: 40 training_loss 1.4345890867710114 test_loss: 1.5005204200744628
epoch: 41 training_loss 1.4388097441196441 test_loss: 1.4176095008850098
epoch: 42 training_loss 1.4250538182258605 test_loss: 1.5514771461486816
epoch: 43 training_loss 1.361632878780365 test_loss: 1.5037748336791992
epoch: 44 training_loss 1.4426541304588318 test_loss: 1.5047033309936524
epoch: 45 training_loss 1.4250170946121217 test_loss: 1.3600334167480468
epoch: 46 training_loss 1.3819010317325593 test_loss: 1.3118063926696777
epoch: 47 training_loss 1.3812150371074676 test_loss: 1.4083476066589355
epoch: 48 training_loss 1.4008547818660737 test_loss: 1.4093944549560546
epoch: 49 training_loss 1.3653017950057984 test_loss: 1.480631160736084
epoch: 50 training_loss 1.3535229909420012 test_loss: 1.5383891105651855
epoch: 51 training_loss 1.3373344147205353 test_loss: 1.4343822479248047
epoch: 52 training_loss 1.3647521340847015 test_loss: 1.3003512382507325
epoch: 53 training_loss 1.349585143327713 test_loss: 1.4116835594177246
epoch: 54 training_loss 1.3716032457351686 test_loss: 1.2588236808776856
epoch: 55 training_loss 1.3459689569473268 test_loss: 1.3143513679504395
epoch: 56 training_loss 1.3477696526050567 test_loss: 1.287852668762207
epoch: 57 training_loss 1.3017981731891632 test_loss: 1.2510085105895996
epoch: 58 training_loss 1.3019458520412446 test_loss: 1.188263511657715
epoch: 59 training_loss 1.259631591439247 test_loss: 1.217824649810791
epoch: 60 training_loss 1.2481706142425537 test_loss: 1.429399585723877
epoch: 61 training_loss 1.2573035556077956 test_loss: 1.3242663383483886
epoch: 62 training_loss 1.268801041841507 test_loss: 1.3207515716552733
epoch: 63 training_loss 1.2358049476146697 test_loss: 1.3379179954528808
epoch: 64 training_loss 1.2237685424089433 test_loss: 1.239127254486084
epoch: 65 training_loss 1.1969635534286498 test_loss: 1.23779239654541
epoch: 66 training_loss 1.227286873459816 test_loss: 1.2643081665039062
epoch: 67 training_loss 1.253272152543068 test_loss: 1.20924072265625
epoch: 68 training_loss 1.2353124415874481 test_loss: 1.3206903457641601
epoch: 69 training_loss 1.2365265989303589 test_loss: 1.2754326820373536
epoch: 70 training_loss 1.2099177432060242 test_loss: 1.2188066482543944
epoch: 71 training_loss 1.2737882816791535 test_loss: 1.1221940994262696
epoch: 72 training_loss 1.2395676147937775 test_loss: 1.412532901763916
epoch: 73 training_loss 1.1885756272077561 test_loss: 1.2640352249145508
epoch: 74 training_loss 1.227241957783699 test_loss: 1.2673344612121582
epoch: 75 training_loss 1.2123365890979767 test_loss: 1.3889302253723144
epoch: 76 training_loss 1.209158834218979 test_loss: 1.1340922355651855
epoch: 77 training_loss 1.2307257640361786 test_loss: 1.2384984016418457
epoch: 78 training_loss 1.233432975411415 test_loss: 1.2445256233215332
epoch: 79 training_loss 1.1848855024576188 test_loss: 1.3017268180847168
epoch: 80 training_loss 1.2057859128713608 test_loss: 1.24381685256958
epoch: 81 training_loss 1.157233949303627 test_loss: 1.1817241668701173
epoch: 82 training_loss 1.172349102497101 test_loss: 1.076345443725586
epoch: 83 training_loss 1.1618745481967927 test_loss: 1.1115402221679687
epoch: 84 training_loss 1.1783624297380448 test_loss: 1.1868220329284669
epoch: 85 training_loss 1.1642466419935227 test_loss: 1.1717588424682617
epoch: 86 training_loss 1.16841277718544 test_loss: 1.2048487663269043
epoch: 87 training_loss 1.1495342928171157 test_loss: 1.2008259773254395
epoch: 88 training_loss 1.1424442774057388 test_loss: 1.1858132362365723
epoch: 89 training_loss 1.160585069656372 test_loss: 1.1938735961914062
epoch: 90 training_loss 1.1462810558080674 test_loss: 1.1364039421081542
epoch: 91 training_loss 1.1448552161455154 test_loss: 1.1468433380126952
epoch: 92 training_loss 1.1506268221139908 test_loss: 1.093174934387207
epoch: 93 training_loss 1.1200821381807327 test_loss: 1.0992196083068848
epoch: 94 training_loss 1.1067503648996353 test_loss: 1.2084335327148437
epoch: 95 training_loss 1.1113981837034226 test_loss: 1.1257234573364259
epoch: 96 training_loss 1.1636633086204529 test_loss: 1.1599831581115723
epoch: 97 training_loss 1.124254630804062 test_loss: 1.0617542266845703
epoch: 98 training_loss 1.117346501350403 test_loss: 1.1236966133117676
epoch: 99 training_loss 1.109554997086525 test_loss: 1.1974909782409668
epoch: 100 training_loss 1.11018648147583 test_loss: 1.0585947990417481
epoch: 101 training_loss 1.0987840461730958 test_loss: 1.177958869934082
epoch: 102 training_loss 1.0867989993095397 test_loss: 1.0997559547424316
epoch: 103 training_loss 1.117811943292618 test_loss: 1.14288272857666
epoch: 104 training_loss 1.1293405777215957 test_loss: 1.1468893051147462
epoch: 105 training_loss 1.085977269411087 test_loss: 1.210786724090576
epoch: 106 training_loss 1.0825790131092072 test_loss: 1.0830720901489257
epoch: 107 training_loss 1.0980216479301452 test_loss: 1.1614659309387207
epoch: 108 training_loss 1.0867120623588562 test_loss: 1.1799715042114258
epoch: 109 training_loss 1.087061535716057 test_loss: 1.1049732208251952
epoch: 110 training_loss 1.0773409563302994 test_loss: 1.022437572479248
epoch: 111 training_loss 1.0851535487174988 test_loss: 1.0963257789611816
epoch: 112 training_loss 1.1284350448846816 test_loss: 1.2609625816345216
epoch: 113 training_loss 1.0969886356592178 test_loss: 1.1910154342651367
epoch: 114 training_loss 1.09374174952507 test_loss: 1.149239730834961
epoch: 115 training_loss 1.076854435801506 test_loss: 1.0586731910705567
epoch: 116 training_loss 1.1292158365249634 test_loss: 1.1021695137023926
epoch: 117 training_loss 1.0704940837621688 test_loss: 1.2317218780517578
epoch: 118 training_loss 1.0619699305295944 test_loss: 1.0766002655029296
epoch: 119 training_loss 1.0387308531999588 test_loss: 1.0792072296142579
epoch: 120 training_loss 1.0646752202510834 test_loss: 1.0692717552185058
epoch: 121 training_loss 1.0558132135868072 test_loss: 1.0192384719848633
epoch: 122 training_loss 1.0771393799781799 test_loss: 1.0682063102722168
epoch: 123 training_loss 1.049236844778061 test_loss: 1.1227346420288087
epoch: 124 training_loss 1.0600487905740739 test_loss: 1.0192469596862792
epoch: 125 training_loss 1.0409711498022078 test_loss: 1.0317455291748048
epoch: 126 training_loss 1.0245178508758546 test_loss: 1.0199532508850098
epoch: 127 training_loss 1.0837304550409317 test_loss: 1.0474750518798828
epoch: 128 training_loss 1.0362117391824723 test_loss: 1.0033140182495117
epoch: 129 training_loss 1.0682457846403122 test_loss: 1.1238142013549806
epoch: 130 training_loss 1.0846419954299926 test_loss: 1.1642608642578125
epoch: 131 training_loss 0.9963957673311233 test_loss: 1.119422435760498
epoch: 132 training_loss 1.0243866515159608 test_loss: 1.0938671112060547
epoch: 133 training_loss 1.0246821296215058 test_loss: 1.1629521369934082
epoch: 134 training_loss 1.02340267598629 test_loss: 1.0365610122680664
epoch: 135 training_loss 1.0142876279354096 test_loss: 1.0524017333984375
epoch: 136 training_loss 1.0305504292249679 test_loss: 1.028104591369629
epoch: 137 training_loss 1.0159405267238617 test_loss: 1.027320957183838
epoch: 138 training_loss 1.0197404342889786 test_loss: 1.0317599296569824
epoch: 139 training_loss 1.0374597680568696 test_loss: 1.0906924247741698
epoch: 140 training_loss 1.0188162714242934 test_loss: 1.0487820625305175
epoch: 141 training_loss 0.997782188653946 test_loss: 1.0761232376098633
epoch: 142 training_loss 1.013456814289093 test_loss: 1.0228237152099608
epoch: 143 training_loss 1.0275217109918595 test_loss: 0.9843684196472168
epoch: 144 training_loss 1.010171763896942 test_loss: 1.0502365112304688
epoch: 145 training_loss 1.0189046651124953 test_loss: 1.0166224479675292
epoch: 146 training_loss 1.0341783970594407 test_loss: 0.9531627655029297
epoch: 147 training_loss 1.0095073354244233 test_loss: 1.1190009117126465
epoch: 148 training_loss 0.9857615339756012 test_loss: 1.0778040885925293
epoch: 149 training_loss 1.002469735145569 test_loss: 0.9738070487976074
3823.6584955713506
episode: 0 training return: tensor(-13.0373, device='cuda:0')
episode: 1 training return: tensor(-16.9960, device='cuda:0')
episode: 2 training return: tensor(-56.9395, device='cuda:0')
episode: 3 training return: tensor(-5.3980, device='cuda:0')
epoch: 1 test_true_pfm: 3971.2759356898005 sim_pfm: -10.133677100811232
episode: 4 training return: tensor(-14.7248, device='cuda:0')
episode: 5 training return: tensor(-48.5359, device='cuda:0')
episode: 6 training return: tensor(-52.6821, device='cuda:0')
episode: 7 training return: tensor(36.6865, device='cuda:0')
epoch: 2 test_true_pfm: 3968.169813598341 sim_pfm: -58.696495663791815
episode: 8 training return: tensor(14.5160, device='cuda:0')
episode: 9 training return: tensor(7.7350, device='cuda:0')
episode: 10 training return: tensor(-29.1065, device='cuda:0')
episode: 11 training return: tensor(-28.7073, device='cuda:0')
epoch: 3 test_true_pfm: 3994.882083061577 sim_pfm: 4.089721394062508
episode: 12 training return: tensor(27.4319, device='cuda:0')
episode: 13 training return: tensor(-69.5610, device='cuda:0')
episode: 14 training return: tensor(-17.1758, device='cuda:0')
episode: 15 training return: tensor(-24.3857, device='cuda:0')
epoch: 4 test_true_pfm: 4021.593817416508 sim_pfm: -11.447266738124503
episode: 16 training return: tensor(16.6004, device='cuda:0')
episode: 17 training return: tensor(-3.6947, device='cuda:0')
episode: 18 training return: tensor(-956.4366, device='cuda:0')
episode: 19 training return: tensor(-0.0964, device='cuda:0')
epoch: 5 test_true_pfm: 3968.191103369784 sim_pfm: -278.7498348561833
episode: 20 training return: tensor(-32.4221, device='cuda:0')
episode: 21 training return: tensor(-969.7168, device='cuda:0')
episode: 22 training return: tensor(-909.4830, device='cuda:0')
episode: 23 training return: tensor(42.8196, device='cuda:0')
epoch: 6 test_true_pfm: 3956.594565839407 sim_pfm: 28.425619353026075
episode: 24 training return: tensor(51.8802, device='cuda:0')
episode: 25 training return: tensor(26.8291, device='cuda:0')
episode: 26 training return: tensor(52.4575, device='cuda:0')
episode: 27 training return: tensor(11.0806, device='cuda:0')
epoch: 7 test_true_pfm: 4027.773591293795 sim_pfm: -10.766091680037789
episode: 28 training return: tensor(26.8475, device='cuda:0')
episode: 29 training return: tensor(-8.1215, device='cuda:0')
episode: 30 training return: tensor(-29.2235, device='cuda:0')
episode: 31 training return: tensor(49.5900, device='cuda:0')
epoch: 8 test_true_pfm: 3979.3082327288016 sim_pfm: -1.182060297462158
episode: 32 training return: tensor(-953.3834, device='cuda:0')
episode: 33 training return: tensor(27.8714, device='cuda:0')
episode: 34 training return: tensor(0.1616, device='cuda:0')
episode: 35 training return: tensor(-24.1362, device='cuda:0')
epoch: 9 test_true_pfm: 4031.446086780447 sim_pfm: 18.150687385932542
episode: 36 training return: tensor(-11.4042, device='cuda:0')
episode: 37 training return: tensor(-6.4980, device='cuda:0')
episode: 38 training return: tensor(-14.3979, device='cuda:0')
episode: 39 training return: tensor(-37.5994, device='cuda:0')
epoch: 10 test_true_pfm: 3992.166971867886 sim_pfm: 32.95688572271805
episode: 40 training return: tensor(12.9170, device='cuda:0')
episode: 41 training return: tensor(-0.9126, device='cuda:0')
episode: 42 training return: tensor(-4.3693, device='cuda:0')
episode: 43 training return: tensor(-97.5073, device='cuda:0')
epoch: 11 test_true_pfm: 4008.0179751568962 sim_pfm: -7.821523925037279
episode: 44 training return: tensor(-9.6352, device='cuda:0')
episode: 45 training return: tensor(-10.5971, device='cuda:0')
episode: 46 training return: tensor(34.4937, device='cuda:0')
episode: 47 training return: tensor(7.7786, device='cuda:0')
epoch: 12 test_true_pfm: 2857.5018210284893 sim_pfm: -30.775979722636595
episode: 48 training return: tensor(29.6465, device='cuda:0')
episode: 49 training return: tensor(-96.9089, device='cuda:0')
episode: 50 training return: tensor(-10.3267, device='cuda:0')
episode: 51 training return: tensor(-5.6180, device='cuda:0')
epoch: 13 test_true_pfm: 3995.127910377841 sim_pfm: 9.229491980824
episode: 52 training return: tensor(59.6276, device='cuda:0')
episode: 53 training return: tensor(-8.7053, device='cuda:0')
episode: 54 training return: tensor(6.2786, device='cuda:0')
episode: 55 training return: tensor(-937.5865, device='cuda:0')
epoch: 14 test_true_pfm: 4023.229890122311 sim_pfm: 5.7258896054991055
episode: 56 training return: tensor(6.9925, device='cuda:0')
episode: 57 training return: tensor(-938.6154, device='cuda:0')
episode: 58 training return: tensor(-43.5502, device='cuda:0')
episode: 59 training return: tensor(21.2196, device='cuda:0')
epoch: 15 test_true_pfm: 4034.840248015684 sim_pfm: 39.38149173600444
episode: 60 training return: tensor(-920.9688, device='cuda:0')
episode: 61 training return: tensor(49.5103, device='cuda:0')
episode: 62 training return: tensor(4.1147, device='cuda:0')
episode: 63 training return: tensor(-20.6523, device='cuda:0')
epoch: 16 test_true_pfm: 4010.9619741501497 sim_pfm: 30.162934044676756
episode: 64 training return: tensor(10.1349, device='cuda:0')
episode: 65 training return: tensor(48.5527, device='cuda:0')
episode: 66 training return: tensor(-44.8326, device='cuda:0')
episode: 67 training return: tensor(-5.6967, device='cuda:0')
epoch: 17 test_true_pfm: 4037.7598350915778 sim_pfm: 36.2596849197192
episode: 68 training return: tensor(43.4844, device='cuda:0')
episode: 69 training return: tensor(49.9924, device='cuda:0')
episode: 70 training return: tensor(-2.2566, device='cuda:0')
episode: 71 training return: tensor(0.1327, device='cuda:0')
epoch: 18 test_true_pfm: 4049.8454719015085 sim_pfm: 63.431322394045615
episode: 72 training return: tensor(56.8637, device='cuda:0')
episode: 73 training return: tensor(5.2443, device='cuda:0')
episode: 74 training return: tensor(-38.8502, device='cuda:0')
episode: 75 training return: tensor(-56.0085, device='cuda:0')
epoch: 19 test_true_pfm: 4031.370082382773 sim_pfm: 54.858658972826866
episode: 76 training return: tensor(36.9784, device='cuda:0')
episode: 77 training return: tensor(-24.3682, device='cuda:0')
episode: 78 training return: tensor(-27.2073, device='cuda:0')
episode: 79 training return: tensor(-2.1654, device='cuda:0')
epoch: 20 test_true_pfm: 4012.501024157049 sim_pfm: 28.687022132236354
episode: 80 training return: tensor(40.9498, device='cuda:0')
episode: 81 training return: tensor(-22.3294, device='cuda:0')
episode: 82 training return: tensor(29.3840, device='cuda:0')
episode: 83 training return: tensor(-5.6736, device='cuda:0')
epoch: 21 test_true_pfm: 4035.9067299661924 sim_pfm: 3.779521762703856
episode: 84 training return: tensor(39.3976, device='cuda:0')
episode: 85 training return: tensor(-34.1747, device='cuda:0')
episode: 86 training return: tensor(43.7528, device='cuda:0')
episode: 87 training return: tensor(-34.7396, device='cuda:0')
epoch: 22 test_true_pfm: 3993.256530884168 sim_pfm: 35.48634031428568
episode: 88 training return: tensor(-50.8480, device='cuda:0')
episode: 89 training return: tensor(-24.7374, device='cuda:0')
episode: 90 training return: tensor(-9.3293, device='cuda:0')
episode: 91 training return: tensor(-21.2899, device='cuda:0')
epoch: 23 test_true_pfm: 4014.8247740455276 sim_pfm: -6.424049979133997
episode: 92 training return: tensor(-32.5091, device='cuda:0')
episode: 93 training return: tensor(-59.7184, device='cuda:0')
episode: 94 training return: tensor(21.6452, device='cuda:0')
episode: 95 training return: tensor(27.0532, device='cuda:0')
epoch: 24 test_true_pfm: 2817.917213452631 sim_pfm: -259.8743903740445
episode: 96 training return: tensor(15.2776, device='cuda:0')
episode: 97 training return: tensor(14.5759, device='cuda:0')
episode: 98 training return: tensor(34.5997, device='cuda:0')
episode: 99 training return: tensor(42.0216, device='cuda:0')
epoch: 25 test_true_pfm: 4028.5416701176014 sim_pfm: 20.435580168288045
episode: 100 training return: tensor(45.8958, device='cuda:0')
episode: 101 training return: tensor(32.7632, device='cuda:0')
episode: 102 training return: tensor(29.7892, device='cuda:0')
episode: 103 training return: tensor(-29.6667, device='cuda:0')
epoch: 26 test_true_pfm: 4047.789589167676 sim_pfm: 41.011982736216545
episode: 104 training return: tensor(-51.7165, device='cuda:0')
episode: 105 training return: tensor(49.9409, device='cuda:0')
episode: 106 training return: tensor(-14.6645, device='cuda:0')
episode: 107 training return: tensor(8.4770, device='cuda:0')
epoch: 27 test_true_pfm: 3994.2353565617063 sim_pfm: 22.918817831319757
episode: 108 training return: tensor(41.8417, device='cuda:0')
episode: 109 training return: tensor(-8.0910, device='cuda:0')
episode: 110 training return: tensor(1.6145, device='cuda:0')
episode: 111 training return: tensor(-10.3076, device='cuda:0')
epoch: 28 test_true_pfm: 3997.2861373463834 sim_pfm: -7.4050168159495415
episode: 112 training return: tensor(38.1594, device='cuda:0')
episode: 113 training return: tensor(-88.3099, device='cuda:0')
episode: 114 training return: tensor(-36.0802, device='cuda:0')
episode: 115 training return: tensor(8.4057, device='cuda:0')
epoch: 29 test_true_pfm: 3974.7031539178865 sim_pfm: 8.406737231018875
episode: 116 training return: tensor(7.2517, device='cuda:0')
episode: 117 training return: tensor(-6.6071, device='cuda:0')
episode: 118 training return: tensor(36.2221, device='cuda:0')
episode: 119 training return: tensor(-14.5237, device='cuda:0')
epoch: 30 test_true_pfm: 3986.417023946675 sim_pfm: 20.62573651408699
episode: 120 training return: tensor(-25.9346, device='cuda:0')
episode: 121 training return: tensor(-5.1858, device='cuda:0')
episode: 122 training return: tensor(23.3602, device='cuda:0')
episode: 123 training return: tensor(21.5999, device='cuda:0')
epoch: 31 test_true_pfm: 4011.049549086983 sim_pfm: 3.4780884216015693
episode: 124 training return: tensor(-45.7643, device='cuda:0')
episode: 125 training return: tensor(-3.4873, device='cuda:0')
episode: 126 training return: tensor(-21.6095, device='cuda:0')
episode: 127 training return: tensor(29.1828, device='cuda:0')
epoch: 32 test_true_pfm: 4017.9440318768616 sim_pfm: 7.233502231363673
episode: 128 training return: tensor(-8.4279, device='cuda:0')
episode: 129 training return: tensor(37.8734, device='cuda:0')
episode: 130 training return: tensor(-33.7105, device='cuda:0')
episode: 131 training return: tensor(-64.3002, device='cuda:0')
epoch: 33 test_true_pfm: 4040.2516643668696 sim_pfm: 5.770145253530548
episode: 132 training return: tensor(-34.2290, device='cuda:0')
episode: 133 training return: tensor(-72.5575, device='cuda:0')
episode: 134 training return: tensor(-109.8406, device='cuda:0')
episode: 135 training return: tensor(16.5437, device='cuda:0')
epoch: 34 test_true_pfm: 4041.280185585842 sim_pfm: 6.424155515346986
episode: 136 training return: tensor(-69.5123, device='cuda:0')
episode: 137 training return: tensor(17.1983, device='cuda:0')
episode: 138 training return: tensor(-754.8047, device='cuda:0')
episode: 139 training return: tensor(-960.6782, device='cuda:0')
epoch: 35 test_true_pfm: 4023.7242854628603 sim_pfm: 32.93959274207009
episode: 140 training return: tensor(17.7948, device='cuda:0')
episode: 141 training return: tensor(-67.0217, device='cuda:0')
episode: 142 training return: tensor(24.7745, device='cuda:0')
episode: 143 training return: tensor(-70.7554, device='cuda:0')
epoch: 36 test_true_pfm: 4012.753583770482 sim_pfm: -0.9905638667017532
episode: 144 training return: tensor(11.0532, device='cuda:0')
episode: 145 training return: tensor(-39.5838, device='cuda:0')
episode: 146 training return: tensor(20.5688, device='cuda:0')
episode: 147 training return: tensor(17.9624, device='cuda:0')
epoch: 37 test_true_pfm: 3973.0559706855597 sim_pfm: -15.284344260474123
episode: 148 training return: tensor(8.6588, device='cuda:0')
episode: 149 training return: tensor(-907.1116, device='cuda:0')
episode: 150 training return: tensor(2.9841, device='cuda:0')
episode: 151 training return: tensor(-0.0551, device='cuda:0')
epoch: 38 test_true_pfm: 3957.048726483334 sim_pfm: 7.423789381136885
episode: 152 training return: tensor(37.1312, device='cuda:0')
episode: 153 training return: tensor(-14.3538, device='cuda:0')
episode: 154 training return: tensor(-48.7154, device='cuda:0')
episode: 155 training return: tensor(-902.8404, device='cuda:0')
epoch: 39 test_true_pfm: 3986.6113278104544 sim_pfm: 4.060356964308691
episode: 156 training return: tensor(12.5252, device='cuda:0')
episode: 157 training return: tensor(39.1353, device='cuda:0')
episode: 158 training return: tensor(-62.4763, device='cuda:0')
episode: 159 training return: tensor(-34.9641, device='cuda:0')
epoch: 40 test_true_pfm: 2887.8864510584717 sim_pfm: -293.2617126557161
episode: 160 training return: tensor(-12.5422, device='cuda:0')
episode: 161 training return: tensor(-23.8947, device='cuda:0')
episode: 162 training return: tensor(-6.0779, device='cuda:0')
episode: 163 training return: tensor(40.5088, device='cuda:0')
epoch: 41 test_true_pfm: 4011.189663088999 sim_pfm: -3.1710323257235964
episode: 164 training return: tensor(10.0589, device='cuda:0')
episode: 165 training return: tensor(53.4866, device='cuda:0')
episode: 166 training return: tensor(-12.5256, device='cuda:0')
episode: 167 training return: tensor(51.2499, device='cuda:0')
epoch: 42 test_true_pfm: 3989.6645852124916 sim_pfm: 45.51431325976349
episode: 168 training return: tensor(-52.4864, device='cuda:0')
episode: 169 training return: tensor(-34.2597, device='cuda:0')
episode: 170 training return: tensor(-6.4166, device='cuda:0')
episode: 171 training return: tensor(-929.3527, device='cuda:0')
epoch: 43 test_true_pfm: 4013.4235249888056 sim_pfm: -1.7950506137955624
episode: 172 training return: tensor(41.1896, device='cuda:0')
episode: 173 training return: tensor(-40.0063, device='cuda:0')
episode: 174 training return: tensor(26.7791, device='cuda:0')
episode: 175 training return: tensor(-935.7629, device='cuda:0')
epoch: 44 test_true_pfm: 4042.8827648153965 sim_pfm: 51.02883685368579
episode: 176 training return: tensor(-3.0415, device='cuda:0')
episode: 177 training return: tensor(-87.6945, device='cuda:0')
episode: 178 training return: tensor(-865.5549, device='cuda:0')
episode: 179 training return: tensor(-109.6677, device='cuda:0')
epoch: 45 test_true_pfm: 4026.576132378579 sim_pfm: 15.471402298426256
episode: 180 training return: tensor(-73.0863, device='cuda:0')
episode: 181 training return: tensor(-12.2857, device='cuda:0')
episode: 182 training return: tensor(5.1102, device='cuda:0')
episode: 183 training return: tensor(55.3813, device='cuda:0')
epoch: 46 test_true_pfm: 4013.523357789482 sim_pfm: 17.685150115091044
episode: 184 training return: tensor(-26.5701, device='cuda:0')
episode: 185 training return: tensor(-33.1683, device='cuda:0')
episode: 186 training return: tensor(-628.0822, device='cuda:0')
episode: 187 training return: tensor(47.9118, device='cuda:0')
epoch: 47 test_true_pfm: 4021.5458297489336 sim_pfm: 16.290058969781967
episode: 188 training return: tensor(-64.8892, device='cuda:0')
episode: 189 training return: tensor(-18.8268, device='cuda:0')
episode: 190 training return: tensor(47.6725, device='cuda:0')
episode: 191 training return: tensor(-37.7254, device='cuda:0')
epoch: 48 test_true_pfm: 4001.514986938257 sim_pfm: -18.722228607647896
episode: 192 training return: tensor(30.1184, device='cuda:0')
episode: 193 training return: tensor(-40.3513, device='cuda:0')
episode: 194 training return: tensor(-23.5479, device='cuda:0')
episode: 195 training return: tensor(19.6115, device='cuda:0')
epoch: 49 test_true_pfm: 3983.050573777902 sim_pfm: 14.943318309359407
episode: 196 training return: tensor(1.8162, device='cuda:0')
episode: 197 training return: tensor(23.1757, device='cuda:0')
episode: 198 training return: tensor(-893.5460, device='cuda:0')
episode: 199 training return: tensor(-20.6165, device='cuda:0')
epoch: 50 test_true_pfm: 4021.039249617967 sim_pfm: 33.07821383324335
episode: 200 training return: tensor(-44.1565, device='cuda:0')
episode: 201 training return: tensor(-11.5956, device='cuda:0')
episode: 202 training return: tensor(-10.0013, device='cuda:0')
episode: 203 training return: tensor(17.8941, device='cuda:0')
epoch: 51 test_true_pfm: 4031.0234028983928 sim_pfm: 58.29870362993097
episode: 204 training return: tensor(32.8891, device='cuda:0')
episode: 205 training return: tensor(-53.9481, device='cuda:0')
episode: 206 training return: tensor(27.4661, device='cuda:0')
episode: 207 training return: tensor(54.2638, device='cuda:0')
epoch: 52 test_true_pfm: 4019.856969419096 sim_pfm: -244.64876845876765
episode: 208 training return: tensor(31.8501, device='cuda:0')
episode: 209 training return: tensor(-12.1103, device='cuda:0')
episode: 210 training return: tensor(0.8511, device='cuda:0')
episode: 211 training return: tensor(7.2458, device='cuda:0')
epoch: 53 test_true_pfm: 4000.5449220973337 sim_pfm: 18.215594652690925
episode: 212 training return: tensor(41.8557, device='cuda:0')
episode: 213 training return: tensor(-0.2896, device='cuda:0')
episode: 214 training return: tensor(4.0914, device='cuda:0')
episode: 215 training return: tensor(-42.2621, device='cuda:0')
epoch: 54 test_true_pfm: 4016.7721992828333 sim_pfm: 27.85519918636419
episode: 216 training return: tensor(-9.0807, device='cuda:0')
episode: 217 training return: tensor(18.4363, device='cuda:0')
episode: 218 training return: tensor(14.0471, device='cuda:0')
episode: 219 training return: tensor(-9.6611, device='cuda:0')
epoch: 55 test_true_pfm: 3994.2728573211193 sim_pfm: 4.51962780552761
episode: 220 training return: tensor(32.2642, device='cuda:0')
episode: 221 training return: tensor(-35.3845, device='cuda:0')
episode: 222 training return: tensor(8.0520, device='cuda:0')
episode: 223 training return: tensor(-30.2821, device='cuda:0')
epoch: 56 test_true_pfm: 4004.984967988128 sim_pfm: 25.79570361447016
episode: 224 training return: tensor(-11.6711, device='cuda:0')
episode: 225 training return: tensor(-21.2092, device='cuda:0')
episode: 226 training return: tensor(-45.6444, device='cuda:0')
episode: 227 training return: tensor(43.1026, device='cuda:0')
epoch: 57 test_true_pfm: 2818.3364705425606 sim_pfm: 3.114611976130012
episode: 228 training return: tensor(-28.4312, device='cuda:0')
episode: 229 training return: tensor(-29.2050, device='cuda:0')
episode: 230 training return: tensor(-0.9103, device='cuda:0')
episode: 231 training return: tensor(-54.7000, device='cuda:0')
epoch: 58 test_true_pfm: 4010.407469690643 sim_pfm: 26.714739394558517
episode: 232 training return: tensor(7.3730, device='cuda:0')
episode: 233 training return: tensor(29.2658, device='cuda:0')
episode: 234 training return: tensor(-33.2260, device='cuda:0')
episode: 235 training return: tensor(-25.9823, device='cuda:0')
epoch: 59 test_true_pfm: 4006.100235247166 sim_pfm: 34.836959553475026
episode: 236 training return: tensor(-59.1896, device='cuda:0')
episode: 237 training return: tensor(-24.6220, device='cuda:0')
episode: 238 training return: tensor(52.5586, device='cuda:0')
episode: 239 training return: tensor(30.8270, device='cuda:0')
epoch: 60 test_true_pfm: 3979.8577825658394 sim_pfm: 12.424676054273732
episode: 240 training return: tensor(55.0916, device='cuda:0')
episode: 241 training return: tensor(0.0515, device='cuda:0')
episode: 242 training return: tensor(-912.4821, device='cuda:0')
episode: 243 training return: tensor(48.6395, device='cuda:0')
epoch: 61 test_true_pfm: 3988.756127038338 sim_pfm: 3.480903279501945
episode: 244 training return: tensor(-7.8596, device='cuda:0')
episode: 245 training return: tensor(16.2408, device='cuda:0')
episode: 246 training return: tensor(60.6501, device='cuda:0')
episode: 247 training return: tensor(28.4900, device='cuda:0')
epoch: 62 test_true_pfm: 4007.0525298685548 sim_pfm: 4.169511797711796
episode: 248 training return: tensor(-25.8629, device='cuda:0')
episode: 249 training return: tensor(21.3318, device='cuda:0')
episode: 250 training return: tensor(4.6369, device='cuda:0')
episode: 251 training return: tensor(-39.9792, device='cuda:0')
epoch: 63 test_true_pfm: 3971.206590965461 sim_pfm: 14.017773013775392
episode: 252 training return: tensor(-24.6005, device='cuda:0')
episode: 253 training return: tensor(27.7831, device='cuda:0')
episode: 254 training return: tensor(-3.6814, device='cuda:0')
episode: 255 training return: tensor(-26.6080, device='cuda:0')
epoch: 64 test_true_pfm: 4000.6761942948306 sim_pfm: 27.214821921506275
episode: 256 training return: tensor(-11.4629, device='cuda:0')
episode: 257 training return: tensor(-0.2964, device='cuda:0')
episode: 258 training return: tensor(-83.6472, device='cuda:0')
episode: 259 training return: tensor(6.4394, device='cuda:0')
epoch: 65 test_true_pfm: 3971.6211209123126 sim_pfm: -26.652724927126354
episode: 260 training return: tensor(31.8093, device='cuda:0')
episode: 261 training return: tensor(-57.3855, device='cuda:0')
episode: 262 training return: tensor(9.9268, device='cuda:0')
episode: 263 training return: tensor(-35.3667, device='cuda:0')
epoch: 66 test_true_pfm: 3966.875089174948 sim_pfm: 9.505506781085083
episode: 264 training return: tensor(-46.8999, device='cuda:0')
episode: 265 training return: tensor(50.3265, device='cuda:0')
episode: 266 training return: tensor(-684.6097, device='cuda:0')
episode: 267 training return: tensor(5.4499, device='cuda:0')
epoch: 67 test_true_pfm: 4000.302704168566 sim_pfm: 21.857899119592428
episode: 268 training return: tensor(-26.1313, device='cuda:0')
episode: 269 training return: tensor(26.1181, device='cuda:0')
episode: 270 training return: tensor(-946.3991, device='cuda:0')
episode: 271 training return: tensor(-806.8960, device='cuda:0')
epoch: 68 test_true_pfm: 3991.6162637454763 sim_pfm: -31.16988332233935
episode: 272 training return: tensor(39.8083, device='cuda:0')
episode: 273 training return: tensor(-73.6138, device='cuda:0')
episode: 274 training return: tensor(18.9020, device='cuda:0')
episode: 275 training return: tensor(-0.2002, device='cuda:0')
epoch: 69 test_true_pfm: 3976.998269684734 sim_pfm: 15.059152508474654
episode: 276 training return: tensor(-30.3572, device='cuda:0')
episode: 277 training return: tensor(17.3790, device='cuda:0')
episode: 278 training return: tensor(-3.7575, device='cuda:0')
episode: 279 training return: tensor(21.8241, device='cuda:0')
epoch: 70 test_true_pfm: 3990.9804647420783 sim_pfm: 10.834696674428415
episode: 280 training return: tensor(-16.9946, device='cuda:0')
episode: 281 training return: tensor(25.3994, device='cuda:0')
episode: 282 training return: tensor(-53.0452, device='cuda:0')
episode: 283 training return: tensor(27.7840, device='cuda:0')
epoch: 71 test_true_pfm: 3991.6404901871897 sim_pfm: 21.57745541879558
episode: 284 training return: tensor(-22.5143, device='cuda:0')
episode: 285 training return: tensor(-916.6292, device='cuda:0')
episode: 286 training return: tensor(-22.5254, device='cuda:0')
episode: 287 training return: tensor(-27.0688, device='cuda:0')
epoch: 72 test_true_pfm: 3998.8496689084936 sim_pfm: -11.922455937077757
episode: 288 training return: tensor(-4.9803, device='cuda:0')
episode: 289 training return: tensor(-3.1918, device='cuda:0')
episode: 290 training return: tensor(-10.3490, device='cuda:0')
episode: 291 training return: tensor(-80.5174, device='cuda:0')
epoch: 73 test_true_pfm: 3968.076915542409 sim_pfm: -283.235006408504
episode: 292 training return: tensor(-17.7104, device='cuda:0')
episode: 293 training return: tensor(27.8628, device='cuda:0')
episode: 294 training return: tensor(-20.7634, device='cuda:0')
episode: 295 training return: tensor(-925.7140, device='cuda:0')
epoch: 74 test_true_pfm: 4023.6740919417275 sim_pfm: 23.22458976850612
episode: 296 training return: tensor(-6.9578, device='cuda:0')
episode: 297 training return: tensor(-34.1766, device='cuda:0')
episode: 298 training return: tensor(-70.5552, device='cuda:0')
episode: 299 training return: tensor(23.0819, device='cuda:0')
epoch: 75 test_true_pfm: 3993.4989051307093 sim_pfm: 3.6733080161502585
episode: 300 training return: tensor(-36.2936, device='cuda:0')
episode: 301 training return: tensor(23.8941, device='cuda:0')
episode: 302 training return: tensor(-33.6014, device='cuda:0')
episode: 303 training return: tensor(3.4355, device='cuda:0')
epoch: 76 test_true_pfm: 4002.390362568132 sim_pfm: 14.534430960629834
episode: 304 training return: tensor(-59.3505, device='cuda:0')
episode: 305 training return: tensor(-921.3983, device='cuda:0')
episode: 306 training return: tensor(16.6514, device='cuda:0')
episode: 307 training return: tensor(-52.6261, device='cuda:0')
epoch: 77 test_true_pfm: 3999.836382160138 sim_pfm: 48.12296915155215
episode: 308 training return: tensor(48.6547, device='cuda:0')
episode: 309 training return: tensor(41.2313, device='cuda:0')
episode: 310 training return: tensor(-7.0069, device='cuda:0')
episode: 311 training return: tensor(-11.9474, device='cuda:0')
epoch: 78 test_true_pfm: 4000.4845412689247 sim_pfm: 20.407734205694094
episode: 312 training return: tensor(22.2391, device='cuda:0')
episode: 313 training return: tensor(54.2194, device='cuda:0')
episode: 314 training return: tensor(-14.8495, device='cuda:0')
episode: 315 training return: tensor(-1.1852, device='cuda:0')
epoch: 79 test_true_pfm: 4047.644406841675 sim_pfm: 32.64594232884701
episode: 316 training return: tensor(3.2718, device='cuda:0')
episode: 317 training return: tensor(-23.0957, device='cuda:0')
episode: 318 training return: tensor(39.3608, device='cuda:0')
episode: 319 training return: tensor(22.2291, device='cuda:0')
epoch: 80 test_true_pfm: 3988.851591353045 sim_pfm: -29.051307724468643
episode: 320 training return: tensor(-14.2210, device='cuda:0')
episode: 321 training return: tensor(-827.3578, device='cuda:0')
episode: 322 training return: tensor(42.4728, device='cuda:0')
episode: 323 training return: tensor(-6.4587, device='cuda:0')
epoch: 81 test_true_pfm: 3964.9512907058966 sim_pfm: 7.90824567474192
episode: 324 training return: tensor(47.1333, device='cuda:0')
episode: 325 training return: tensor(30.2216, device='cuda:0')
episode: 326 training return: tensor(-899.4994, device='cuda:0')
episode: 327 training return: tensor(-58.2862, device='cuda:0')
epoch: 82 test_true_pfm: 3998.01415238413 sim_pfm: 8.4912556152946
episode: 328 training return: tensor(41.0687, device='cuda:0')
episode: 329 training return: tensor(38.1639, device='cuda:0')
episode: 330 training return: tensor(26.5386, device='cuda:0')
episode: 331 training return: tensor(26.8032, device='cuda:0')
epoch: 83 test_true_pfm: 2812.1700197692785 sim_pfm: -0.5197511388299366
episode: 332 training return: tensor(31.5839, device='cuda:0')
episode: 333 training return: tensor(31.6066, device='cuda:0')
episode: 334 training return: tensor(49.4781, device='cuda:0')
episode: 335 training return: tensor(-31.8390, device='cuda:0')
epoch: 84 test_true_pfm: 4016.644052285072 sim_pfm: 21.684140805349063
episode: 336 training return: tensor(25.5476, device='cuda:0')
episode: 337 training return: tensor(7.3452, device='cuda:0')
episode: 338 training return: tensor(-79.3106, device='cuda:0')
episode: 339 training return: tensor(-4.7652, device='cuda:0')
epoch: 85 test_true_pfm: 3971.212077137212 sim_pfm: 46.280413288758915
episode: 340 training return: tensor(25.2674, device='cuda:0')
episode: 341 training return: tensor(40.6930, device='cuda:0')
episode: 342 training return: tensor(-33.2105, device='cuda:0')
episode: 343 training return: tensor(17.1392, device='cuda:0')
epoch: 86 test_true_pfm: 3992.5163810209483 sim_pfm: 2.4749383762634047
episode: 344 training return: tensor(26.0687, device='cuda:0')
episode: 345 training return: tensor(29.4899, device='cuda:0')
episode: 346 training return: tensor(0.2598, device='cuda:0')
episode: 347 training return: tensor(23.7130, device='cuda:0')
epoch: 87 test_true_pfm: 4035.067864523154 sim_pfm: 28.253128401673166
episode: 348 training return: tensor(49.3391, device='cuda:0')
episode: 349 training return: tensor(-9.1814, device='cuda:0')
episode: 350 training return: tensor(-48.5067, device='cuda:0')
episode: 351 training return: tensor(52.7500, device='cuda:0')
epoch: 88 test_true_pfm: 4019.28512714396 sim_pfm: 20.35421101609245
episode: 352 training return: tensor(-35.2628, device='cuda:0')
episode: 353 training return: tensor(56.8909, device='cuda:0')
episode: 354 training return: tensor(15.7280, device='cuda:0')
episode: 355 training return: tensor(51.4225, device='cuda:0')
epoch: 89 test_true_pfm: 4002.217398206702 sim_pfm: 26.24694362959902
episode: 356 training return: tensor(37.8975, device='cuda:0')
episode: 357 training return: tensor(0.0387, device='cuda:0')
episode: 358 training return: tensor(56.0602, device='cuda:0')
episode: 359 training return: tensor(-34.9761, device='cuda:0')
epoch: 90 test_true_pfm: 3981.9333718180255 sim_pfm: -5.586900617917611
episode: 360 training return: tensor(26.0882, device='cuda:0')
episode: 361 training return: tensor(30.6383, device='cuda:0')
episode: 362 training return: tensor(-1.4804, device='cuda:0')
episode: 363 training return: tensor(43.4350, device='cuda:0')
epoch: 91 test_true_pfm: 4018.9126419888835 sim_pfm: 30.51816701188606
episode: 364 training return: tensor(40.8795, device='cuda:0')
episode: 365 training return: tensor(1.2033, device='cuda:0')
episode: 366 training return: tensor(2.3030, device='cuda:0')
episode: 367 training return: tensor(54.4329, device='cuda:0')
epoch: 92 test_true_pfm: 4049.646356593427 sim_pfm: 12.956837195515012
episode: 368 training return: tensor(7.1327, device='cuda:0')
episode: 369 training return: tensor(11.9009, device='cuda:0')
episode: 370 training return: tensor(27.0234, device='cuda:0')
episode: 371 training return: tensor(-64.8126, device='cuda:0')
epoch: 93 test_true_pfm: 3997.298351743297 sim_pfm: 25.895838306945127
episode: 372 training return: tensor(18.3019, device='cuda:0')
episode: 373 training return: tensor(-740.1119, device='cuda:0')
episode: 374 training return: tensor(21.1943, device='cuda:0')
episode: 375 training return: tensor(13.7527, device='cuda:0')
epoch: 94 test_true_pfm: 3980.300677201912 sim_pfm: 2.8329101724763555
episode: 376 training return: tensor(18.7884, device='cuda:0')
episode: 377 training return: tensor(50.5583, device='cuda:0')
episode: 378 training return: tensor(51.1479, device='cuda:0')
episode: 379 training return: tensor(-11.5510, device='cuda:0')
epoch: 95 test_true_pfm: 4038.6259797844905 sim_pfm: 12.374868988418408
episode: 380 training return: tensor(-33.2490, device='cuda:0')
episode: 381 training return: tensor(-24.8912, device='cuda:0')
episode: 382 training return: tensor(70.0927, device='cuda:0')
episode: 383 training return: tensor(26.6775, device='cuda:0')
epoch: 96 test_true_pfm: 4008.0244790013962 sim_pfm: 15.45329867041437
episode: 384 training return: tensor(39.3962, device='cuda:0')
episode: 385 training return: tensor(5.5846, device='cuda:0')
episode: 386 training return: tensor(67.5504, device='cuda:0')
episode: 387 training return: tensor(69.4113, device='cuda:0')
epoch: 97 test_true_pfm: 4025.7121751724494 sim_pfm: 34.129969089262886
episode: 388 training return: tensor(52.8499, device='cuda:0')
episode: 389 training return: tensor(48.2079, device='cuda:0')
episode: 390 training return: tensor(27.9196, device='cuda:0')
episode: 391 training return: tensor(2.5619, device='cuda:0')
epoch: 98 test_true_pfm: 4005.6586378077395 sim_pfm: 25.446189531716907
episode: 392 training return: tensor(23.6763, device='cuda:0')
episode: 393 training return: tensor(6.0220, device='cuda:0')
episode: 394 training return: tensor(6.0628, device='cuda:0')
episode: 395 training return: tensor(-11.3803, device='cuda:0')
epoch: 99 test_true_pfm: 3990.902326071048 sim_pfm: -12.19484792308261
episode: 396 training return: tensor(9.6782, device='cuda:0')
episode: 397 training return: tensor(48.4945, device='cuda:0')
episode: 398 training return: tensor(-25.4046, device='cuda:0')
episode: 399 training return: tensor(31.4692, device='cuda:0')
epoch: 100 test_true_pfm: 4034.052898536944 sim_pfm: 56.87638770104968
episode: 400 training return: tensor(34.8180, device='cuda:0')
episode: 401 training return: tensor(-56.7459, device='cuda:0')
episode: 402 training return: tensor(-2.2461, device='cuda:0')
episode: 403 training return: tensor(55.7931, device='cuda:0')
epoch: 101 test_true_pfm: 4037.42412973791 sim_pfm: 48.1819159128548
episode: 404 training return: tensor(29.4084, device='cuda:0')
episode: 405 training return: tensor(36.0519, device='cuda:0')
episode: 406 training return: tensor(-1.3897, device='cuda:0')
episode: 407 training return: tensor(-41.1048, device='cuda:0')
epoch: 102 test_true_pfm: 3967.826980819365 sim_pfm: 25.396101012534928
episode: 408 training return: tensor(2.5277, device='cuda:0')
episode: 409 training return: tensor(-14.6209, device='cuda:0')
episode: 410 training return: tensor(69.9757, device='cuda:0')
episode: 411 training return: tensor(25.3864, device='cuda:0')
epoch: 103 test_true_pfm: 4015.9104562519374 sim_pfm: 4.479537938711776
episode: 412 training return: tensor(25.0487, device='cuda:0')
episode: 413 training return: tensor(12.2181, device='cuda:0')
episode: 414 training return: tensor(61.2928, device='cuda:0')
episode: 415 training return: tensor(52.4308, device='cuda:0')
epoch: 104 test_true_pfm: 4027.704597201202 sim_pfm: 29.705332560115494
episode: 416 training return: tensor(-22.0163, device='cuda:0')
episode: 417 training return: tensor(24.2006, device='cuda:0')
episode: 418 training return: tensor(36.8624, device='cuda:0')
episode: 419 training return: tensor(62.6397, device='cuda:0')
epoch: 105 test_true_pfm: 4068.0034200917266 sim_pfm: 6.017264390412795
episode: 420 training return: tensor(14.0912, device='cuda:0')
episode: 421 training return: tensor(-3.2090, device='cuda:0')
episode: 422 training return: tensor(-8.7524, device='cuda:0')
episode: 423 training return: tensor(65.2537, device='cuda:0')
epoch: 106 test_true_pfm: 4014.292447921805 sim_pfm: 27.219179058321362
episode: 424 training return: tensor(29.6876, device='cuda:0')
episode: 425 training return: tensor(29.8152, device='cuda:0')
episode: 426 training return: tensor(48.2727, device='cuda:0')
episode: 427 training return: tensor(7.9183, device='cuda:0')
epoch: 107 test_true_pfm: 4067.888391741051 sim_pfm: 62.169406398306215
episode: 428 training return: tensor(44.2070, device='cuda:0')
episode: 429 training return: tensor(9.5885, device='cuda:0')
episode: 430 training return: tensor(35.1816, device='cuda:0')
episode: 431 training return: tensor(43.5639, device='cuda:0')
epoch: 108 test_true_pfm: 4017.9507629423206 sim_pfm: 42.821207604157586
episode: 432 training return: tensor(60.3037, device='cuda:0')
episode: 433 training return: tensor(-4.5448, device='cuda:0')
episode: 434 training return: tensor(7.7103, device='cuda:0')
episode: 435 training return: tensor(-44.2745, device='cuda:0')
epoch: 109 test_true_pfm: 4020.9471297321347 sim_pfm: 21.014271212761134
episode: 436 training return: tensor(30.8208, device='cuda:0')
episode: 437 training return: tensor(20.5204, device='cuda:0')
episode: 438 training return: tensor(-16.3213, device='cuda:0')
episode: 439 training return: tensor(19.3452, device='cuda:0')
epoch: 110 test_true_pfm: 4016.020568675256 sim_pfm: 48.840404030052014
episode: 440 training return: tensor(15.8697, device='cuda:0')
episode: 441 training return: tensor(55.6425, device='cuda:0')
episode: 442 training return: tensor(-920.8035, device='cuda:0')
episode: 443 training return: tensor(31.6762, device='cuda:0')
epoch: 111 test_true_pfm: 4000.588717392713 sim_pfm: 27.08805789061201
episode: 444 training return: tensor(53.9994, device='cuda:0')
episode: 445 training return: tensor(-11.7912, device='cuda:0')
episode: 446 training return: tensor(17.8649, device='cuda:0')
episode: 447 training return: tensor(5.2016, device='cuda:0')
epoch: 112 test_true_pfm: 4019.5258938090974 sim_pfm: -21.577035410048364
episode: 448 training return: tensor(20.3483, device='cuda:0')
episode: 449 training return: tensor(20.8346, device='cuda:0')
episode: 450 training return: tensor(8.5258, device='cuda:0')
episode: 451 training return: tensor(-44.9182, device='cuda:0')
epoch: 113 test_true_pfm: 4045.444517553724 sim_pfm: 36.70821979359607
episode: 452 training return: tensor(33.1781, device='cuda:0')
episode: 453 training return: tensor(31.6270, device='cuda:0')
episode: 454 training return: tensor(41.5730, device='cuda:0')
episode: 455 training return: tensor(-67.1259, device='cuda:0')
epoch: 114 test_true_pfm: 4054.558738365669 sim_pfm: 40.09426246241977
episode: 456 training return: tensor(58.5908, device='cuda:0')
episode: 457 training return: tensor(36.4315, device='cuda:0')
episode: 458 training return: tensor(68.9171, device='cuda:0')
episode: 459 training return: tensor(43.7304, device='cuda:0')
epoch: 115 test_true_pfm: 3988.8536081058205 sim_pfm: 44.38945115269356
episode: 460 training return: tensor(16.0835, device='cuda:0')
episode: 461 training return: tensor(73.1328, device='cuda:0')
episode: 462 training return: tensor(63.7704, device='cuda:0')
episode: 463 training return: tensor(44.2419, device='cuda:0')
epoch: 116 test_true_pfm: 4012.0580627930553 sim_pfm: 5.329462638745706
episode: 464 training return: tensor(-27.2897, device='cuda:0')
episode: 465 training return: tensor(28.0963, device='cuda:0')
episode: 466 training return: tensor(47.3344, device='cuda:0')
episode: 467 training return: tensor(49.4447, device='cuda:0')
epoch: 117 test_true_pfm: 4037.178061628736 sim_pfm: 29.606134666712023
episode: 468 training return: tensor(39.3521, device='cuda:0')
episode: 469 training return: tensor(-12.0920, device='cuda:0')
episode: 470 training return: tensor(69.3659, device='cuda:0')
episode: 471 training return: tensor(35.0264, device='cuda:0')
epoch: 118 test_true_pfm: 4045.5706641246684 sim_pfm: 25.54494983917296
episode: 472 training return: tensor(-3.8338, device='cuda:0')
episode: 473 training return: tensor(48.3435, device='cuda:0')
episode: 474 training return: tensor(45.6397, device='cuda:0')
episode: 475 training return: tensor(49.9370, device='cuda:0')
epoch: 119 test_true_pfm: 4038.001435251977 sim_pfm: 19.687616818303166
episode: 476 training return: tensor(36.8698, device='cuda:0')
episode: 477 training return: tensor(37.5822, device='cuda:0')
episode: 478 training return: tensor(15.6559, device='cuda:0')
episode: 479 training return: tensor(35.1167, device='cuda:0')
epoch: 120 test_true_pfm: 3999.7562668183505 sim_pfm: 3.777022867288906
episode: 480 training return: tensor(25.5719, device='cuda:0')
episode: 481 training return: tensor(2.7001, device='cuda:0')
episode: 482 training return: tensor(0.6448, device='cuda:0')
episode: 483 training return: tensor(5.7788, device='cuda:0')
epoch: 121 test_true_pfm: 4010.6777137938516 sim_pfm: 61.05704693247875
episode: 484 training return: tensor(11.4938, device='cuda:0')
episode: 485 training return: tensor(13.8285, device='cuda:0')
episode: 486 training return: tensor(-22.8824, device='cuda:0')
episode: 487 training return: tensor(-16.4243, device='cuda:0')
epoch: 122 test_true_pfm: 4059.066741728549 sim_pfm: 52.35502339344627
episode: 488 training return: tensor(-15.7432, device='cuda:0')
episode: 489 training return: tensor(45.7965, device='cuda:0')
episode: 490 training return: tensor(38.1976, device='cuda:0')
episode: 491 training return: tensor(52.8567, device='cuda:0')
epoch: 123 test_true_pfm: 4051.011662832049 sim_pfm: 45.03516039971146
episode: 492 training return: tensor(21.9701, device='cuda:0')
episode: 493 training return: tensor(-39.3131, device='cuda:0')
episode: 494 training return: tensor(-68.1774, device='cuda:0')
episode: 495 training return: tensor(55.3212, device='cuda:0')
epoch: 124 test_true_pfm: 4039.60195906915 sim_pfm: 66.20593295041665
episode: 496 training return: tensor(-15.7715, device='cuda:0')
episode: 497 training return: tensor(35.8145, device='cuda:0')
episode: 498 training return: tensor(46.2560, device='cuda:0')
episode: 499 training return: tensor(38.3560, device='cuda:0')
epoch: 125 test_true_pfm: 4018.2575347321376 sim_pfm: 40.3303347924666
episode: 500 training return: tensor(56.0423, device='cuda:0')
episode: 501 training return: tensor(19.2563, device='cuda:0')
episode: 502 training return: tensor(57.5800, device='cuda:0')
episode: 503 training return: tensor(19.8337, device='cuda:0')
epoch: 126 test_true_pfm: 4016.3138115900347 sim_pfm: 33.56583147090472
episode: 504 training return: tensor(28.8750, device='cuda:0')
episode: 505 training return: tensor(-69.5187, device='cuda:0')
episode: 506 training return: tensor(38.5606, device='cuda:0')
episode: 507 training return: tensor(45.1283, device='cuda:0')
epoch: 127 test_true_pfm: 4035.1643768781796 sim_pfm: 65.33934555806142
episode: 508 training return: tensor(-28.8958, device='cuda:0')
episode: 509 training return: tensor(35.9869, device='cuda:0')
episode: 510 training return: tensor(12.9058, device='cuda:0')
episode: 511 training return: tensor(20.8505, device='cuda:0')
epoch: 128 test_true_pfm: 4044.409157598655 sim_pfm: 60.704869333450915
episode: 512 training return: tensor(32.6695, device='cuda:0')
episode: 513 training return: tensor(60.9375, device='cuda:0')
episode: 514 training return: tensor(-6.1178, device='cuda:0')
episode: 515 training return: tensor(57.9095, device='cuda:0')
epoch: 129 test_true_pfm: 4049.261147284764 sim_pfm: 38.60950841278342
episode: 516 training return: tensor(23.6687, device='cuda:0')
episode: 517 training return: tensor(22.8219, device='cuda:0')
episode: 518 training return: tensor(-1.8896, device='cuda:0')
episode: 519 training return: tensor(47.6692, device='cuda:0')
epoch: 130 test_true_pfm: 4050.0717444540664 sim_pfm: 12.25236357767911
episode: 520 training return: tensor(56.2497, device='cuda:0')
episode: 521 training return: tensor(66.0830, device='cuda:0')
episode: 522 training return: tensor(63.5212, device='cuda:0')
episode: 523 training return: tensor(44.6983, device='cuda:0')
epoch: 131 test_true_pfm: 4038.7787814724966 sim_pfm: 34.325347203072546
episode: 524 training return: tensor(13.1218, device='cuda:0')
episode: 525 training return: tensor(42.8832, device='cuda:0')
episode: 526 training return: tensor(59.6721, device='cuda:0')
episode: 527 training return: tensor(20.9797, device='cuda:0')
epoch: 132 test_true_pfm: 4050.9585390304715 sim_pfm: 51.91545342017586
episode: 528 training return: tensor(-18.8428, device='cuda:0')
episode: 529 training return: tensor(-37.2949, device='cuda:0')
episode: 530 training return: tensor(54.8862, device='cuda:0')
episode: 531 training return: tensor(37.7898, device='cuda:0')
epoch: 133 test_true_pfm: 4004.1385523034455 sim_pfm: 30.58368618938645
episode: 532 training return: tensor(53.3907, device='cuda:0')
episode: 533 training return: tensor(26.2654, device='cuda:0')
episode: 534 training return: tensor(-19.1804, device='cuda:0')
episode: 535 training return: tensor(62.5513, device='cuda:0')
epoch: 134 test_true_pfm: 4017.3407670317833 sim_pfm: 31.25013383467255
episode: 536 training return: tensor(6.1312, device='cuda:0')
episode: 537 training return: tensor(47.9000, device='cuda:0')
episode: 538 training return: tensor(1.7745, device='cuda:0')
episode: 539 training return: tensor(-1.9689, device='cuda:0')
epoch: 135 test_true_pfm: 4065.209531395603 sim_pfm: 49.19635504902302
episode: 540 training return: tensor(55.6415, device='cuda:0')
episode: 541 training return: tensor(23.0228, device='cuda:0')
episode: 542 training return: tensor(12.3002, device='cuda:0')
episode: 543 training return: tensor(61.4717, device='cuda:0')
epoch: 136 test_true_pfm: 4029.3135402351963 sim_pfm: 60.61197521075761
episode: 544 training return: tensor(-16.3306, device='cuda:0')
episode: 545 training return: tensor(26.8287, device='cuda:0')
episode: 546 training return: tensor(62.6290, device='cuda:0')
episode: 547 training return: tensor(7.5538, device='cuda:0')
epoch: 137 test_true_pfm: 4025.7644346257434 sim_pfm: 10.916591755201807
episode: 548 training return: tensor(24.0976, device='cuda:0')
episode: 549 training return: tensor(24.8882, device='cuda:0')
episode: 550 training return: tensor(54.5612, device='cuda:0')
episode: 551 training return: tensor(8.7125, device='cuda:0')
epoch: 138 test_true_pfm: 4014.8302064996083 sim_pfm: 43.60992340135272
episode: 552 training return: tensor(23.4591, device='cuda:0')
episode: 553 training return: tensor(-24.2398, device='cuda:0')
episode: 554 training return: tensor(67.0778, device='cuda:0')
episode: 555 training return: tensor(38.0551, device='cuda:0')
epoch: 139 test_true_pfm: 4062.6612714347016 sim_pfm: 49.803037899779156
episode: 556 training return: tensor(61.6340, device='cuda:0')
episode: 557 training return: tensor(37.4180, device='cuda:0')
episode: 558 training return: tensor(0.3574, device='cuda:0')
episode: 559 training return: tensor(40.0105, device='cuda:0')
epoch: 140 test_true_pfm: 4019.275655792369 sim_pfm: 27.770423940053053
episode: 560 training return: tensor(52.9535, device='cuda:0')
episode: 561 training return: tensor(37.5280, device='cuda:0')
episode: 562 training return: tensor(64.5077, device='cuda:0')
episode: 563 training return: tensor(29.3966, device='cuda:0')
epoch: 141 test_true_pfm: 4041.6667459749883 sim_pfm: 30.948050577901693
episode: 564 training return: tensor(16.4556, device='cuda:0')
episode: 565 training return: tensor(60.3965, device='cuda:0')
episode: 566 training return: tensor(42.2753, device='cuda:0')
episode: 567 training return: tensor(37.7893, device='cuda:0')
epoch: 142 test_true_pfm: 4079.721522639325 sim_pfm: 55.15511705934963
episode: 568 training return: tensor(59.9589, device='cuda:0')
episode: 569 training return: tensor(32.4519, device='cuda:0')
episode: 570 training return: tensor(39.3787, device='cuda:0')
episode: 571 training return: tensor(70.5986, device='cuda:0')
epoch: 143 test_true_pfm: 4053.2904722235307 sim_pfm: 63.90382262718049
episode: 572 training return: tensor(43.0725, device='cuda:0')
episode: 573 training return: tensor(21.3007, device='cuda:0')
episode: 574 training return: tensor(-7.1614, device='cuda:0')
episode: 575 training return: tensor(51.6756, device='cuda:0')
epoch: 144 test_true_pfm: 3997.832694302844 sim_pfm: 16.417013747345965
episode: 576 training return: tensor(36.2290, device='cuda:0')
episode: 577 training return: tensor(54.6645, device='cuda:0')
episode: 578 training return: tensor(73.3500, device='cuda:0')
episode: 579 training return: tensor(23.4272, device='cuda:0')
epoch: 145 test_true_pfm: 4043.9934080193875 sim_pfm: 55.381258403494336
episode: 580 training return: tensor(37.8096, device='cuda:0')
episode: 581 training return: tensor(22.5412, device='cuda:0')
episode: 582 training return: tensor(-18.9161, device='cuda:0')
episode: 583 training return: tensor(-38.7629, device='cuda:0')
epoch: 146 test_true_pfm: 4039.2876861382006 sim_pfm: 42.00179312978677
episode: 584 training return: tensor(61.5833, device='cuda:0')
episode: 585 training return: tensor(35.7284, device='cuda:0')
episode: 586 training return: tensor(27.9010, device='cuda:0')
episode: 587 training return: tensor(54.5983, device='cuda:0')
epoch: 147 test_true_pfm: 4060.5262653174154 sim_pfm: 60.36310296741431
episode: 588 training return: tensor(40.3158, device='cuda:0')
episode: 589 training return: tensor(8.7552, device='cuda:0')
episode: 590 training return: tensor(30.8072, device='cuda:0')
episode: 591 training return: tensor(71.5713, device='cuda:0')
epoch: 148 test_true_pfm: 4037.0553814928044 sim_pfm: 13.851062580525953
episode: 592 training return: tensor(64.7190, device='cuda:0')
episode: 593 training return: tensor(44.3637, device='cuda:0')
episode: 594 training return: tensor(-49.9674, device='cuda:0')
episode: 595 training return: tensor(-11.4768, device='cuda:0')
epoch: 149 test_true_pfm: 4057.9956759096913 sim_pfm: 38.6065624995681
episode: 596 training return: tensor(45.5997, device='cuda:0')
episode: 597 training return: tensor(52.3070, device='cuda:0')
episode: 598 training return: tensor(66.0173, device='cuda:0')
episode: 599 training return: tensor(33.9965, device='cuda:0')
epoch: 150 test_true_pfm: 4058.7036393998824 sim_pfm: 49.35874568162641
