['--env', 'Hopper-v3', '--seed', '2']
epoch: 0 training_loss 0.2835392612963915 test_loss: 0.210729718208313
epoch: 1 training_loss 0.17417992800474166 test_loss: 0.16810253858566285
epoch: 2 training_loss 0.16559984147548676 test_loss: 0.16732903718948364
epoch: 3 training_loss 0.15434980414807797 test_loss: 0.15307705402374266
epoch: 4 training_loss 0.15308909010142088 test_loss: 0.14857906103134155
epoch: 5 training_loss 0.16224744316190481 test_loss: 0.17379167079925537
epoch: 6 training_loss 0.15023486752063037 test_loss: 0.16852582693099977
epoch: 7 training_loss 0.1553826516866684 test_loss: 0.15194753408432007
epoch: 8 training_loss 0.15348558340221644 test_loss: 0.16371885538101197
epoch: 9 training_loss 0.14688125923275946 test_loss: 0.1565121293067932
epoch: 10 training_loss 0.15174047216773034 test_loss: 0.14562588930130005
epoch: 11 training_loss 0.1473295770585537 test_loss: 0.1303965926170349
epoch: 12 training_loss 0.152122510895133 test_loss: 0.16364248991012573
epoch: 13 training_loss 0.14625255487859248 test_loss: 0.16214969158172607
epoch: 14 training_loss 0.14356522865593432 test_loss: 0.13398102521896363
epoch: 15 training_loss 0.14279653076082469 test_loss: 0.1482493042945862
epoch: 16 training_loss 0.14204130277037622 test_loss: 0.1301475167274475
epoch: 17 training_loss 0.13647835351526738 test_loss: 0.15059969425201417
epoch: 18 training_loss 0.14126885810866951 test_loss: 0.14200092554092408
epoch: 19 training_loss 0.14427652984857559 test_loss: 0.14113152027130127
epoch: 20 training_loss 0.13831693463027478 test_loss: 0.15149900913238526
epoch: 21 training_loss 0.13860345497727394 test_loss: 0.12155841588973999
epoch: 22 training_loss 0.14126533675938846 test_loss: 0.1384270429611206
epoch: 23 training_loss 0.1405710979178548 test_loss: 0.13901236057281494
epoch: 24 training_loss 0.13504944663494825 test_loss: 0.1619049310684204
epoch: 25 training_loss 0.13637913279235364 test_loss: 0.14428216218948364
epoch: 26 training_loss 0.15002928644418717 test_loss: 0.15724215507507325
epoch: 27 training_loss 0.14266339886933566 test_loss: 0.15029497146606446
epoch: 28 training_loss 0.13760113678872585 test_loss: 0.1433333396911621
epoch: 29 training_loss 0.13593199204653503 test_loss: 0.135956335067749
epoch: 30 training_loss 0.13979183442890644 test_loss: 0.1392878293991089
epoch: 31 training_loss 0.12865891244262456 test_loss: 0.16109392642974854
epoch: 32 training_loss 0.1452667973935604 test_loss: 0.1212816834449768
epoch: 33 training_loss 0.13002286966890098 test_loss: 0.14262640476226807
epoch: 34 training_loss 0.13868428733199833 test_loss: 0.15079530477523803
epoch: 35 training_loss 0.13849556628614665 test_loss: 0.15107232332229614
epoch: 36 training_loss 0.1355055809393525 test_loss: 0.13695414066314698
epoch: 37 training_loss 0.13065596096217633 test_loss: 0.16635513305664062
epoch: 38 training_loss 0.13173258181661368 test_loss: 0.15347851514816285
epoch: 39 training_loss 0.1389128451421857 test_loss: 0.1365652561187744
epoch: 40 training_loss 0.13425773371011018 test_loss: 0.128582763671875
epoch: 41 training_loss 0.13419826995581388 test_loss: 0.15403826236724855
epoch: 42 training_loss 0.14476474411785603 test_loss: 0.13894492387771606
epoch: 43 training_loss 0.13492169689387082 test_loss: 0.13521356582641603
epoch: 44 training_loss 0.14842263255268334 test_loss: 0.1343239426612854
epoch: 45 training_loss 0.13456321064382792 test_loss: 0.1405152916908264
epoch: 46 training_loss 0.1388968249037862 test_loss: 0.13800350427627564
epoch: 47 training_loss 0.1437704199552536 test_loss: 0.1633831739425659
epoch: 48 training_loss 0.13002319872379303 test_loss: 0.14670920372009277
epoch: 49 training_loss 0.14317340780049562 test_loss: 0.17634141445159912
epoch: 50 training_loss 0.1423627195507288 test_loss: 0.13877228498458863
epoch: 51 training_loss 0.13306971561163664 test_loss: 0.14768238067626954
epoch: 52 training_loss 0.13668301053345203 test_loss: 0.1433472752571106
epoch: 53 training_loss 0.13457998730242252 test_loss: 0.13053462505340577
epoch: 54 training_loss 0.13687408685684205 test_loss: 0.1223643660545349
epoch: 55 training_loss 0.1296100924909115 test_loss: 0.1308513402938843
epoch: 56 training_loss 0.13667570937424897 test_loss: 0.1505826234817505
epoch: 57 training_loss 0.12963556595146655 test_loss: 0.15597833395004274
epoch: 58 training_loss 0.13113037556409834 test_loss: 0.15358364582061768
epoch: 59 training_loss 0.1304861292243004 test_loss: 0.13421742916107177
epoch: 60 training_loss 0.1349995760805905 test_loss: 0.1413716197013855
epoch: 61 training_loss 0.13526453744620084 test_loss: 0.1258637547492981
epoch: 62 training_loss 0.13860906168818474 test_loss: 0.14903639554977416
epoch: 63 training_loss 0.134225078523159 test_loss: 0.15234190225601196
epoch: 64 training_loss 0.13490699913352727 test_loss: 0.14085395336151124
epoch: 65 training_loss 0.13831680260598658 test_loss: 0.13414052724838257
epoch: 66 training_loss 0.13624893244355918 test_loss: 0.1401732325553894
epoch: 67 training_loss 0.12531146256253123 test_loss: 0.11744266748428345
epoch: 68 training_loss 0.13672325771301985 test_loss: 0.12690372467041017
epoch: 69 training_loss 0.13600704522803425 test_loss: 0.14785218238830566
epoch: 70 training_loss 0.13724545173346997 test_loss: 0.13789710998535157
epoch: 71 training_loss 0.1411308788135648 test_loss: 0.12976845502853393
epoch: 72 training_loss 0.13530834555625915 test_loss: 0.12757543325424195
epoch: 73 training_loss 0.13218886479735376 test_loss: 0.1310553789138794
epoch: 74 training_loss 0.13606729589402675 test_loss: 0.1429304838180542
epoch: 75 training_loss 0.13620441768318414 test_loss: 0.12775195837020875
epoch: 76 training_loss 0.1420484198629856 test_loss: 0.12616050243377686
epoch: 77 training_loss 0.1373466382920742 test_loss: 0.13222376108169556
epoch: 78 training_loss 0.13225983623415233 test_loss: 0.11997501850128174
epoch: 79 training_loss 0.1307388499751687 test_loss: 0.14488521814346314
epoch: 80 training_loss 0.12972775124013425 test_loss: 0.14546663761138917
epoch: 81 training_loss 0.13233199544250965 test_loss: 0.14802721738815308
epoch: 82 training_loss 0.13433714713901282 test_loss: 0.13874887228012084
epoch: 83 training_loss 0.13092691875994206 test_loss: 0.1561000108718872
epoch: 84 training_loss 0.1284951054677367 test_loss: 0.12581616640090942
epoch: 85 training_loss 0.13240909311920404 test_loss: 0.17009855508804322
epoch: 86 training_loss 0.14338713824748994 test_loss: 0.1317886233329773
epoch: 87 training_loss 0.12981869768351317 test_loss: 0.13311104774475097
epoch: 88 training_loss 0.12657133765518666 test_loss: 0.13994214534759522
epoch: 89 training_loss 0.12671563398092986 test_loss: 0.12522779703140258
epoch: 90 training_loss 0.1374740683287382 test_loss: 0.16719657182693481
epoch: 91 training_loss 0.13542737554758788 test_loss: 0.12369350194931031
epoch: 92 training_loss 0.13587600097060204 test_loss: 0.1465848922729492
epoch: 93 training_loss 0.13406133126467468 test_loss: 0.13267958164215088
epoch: 94 training_loss 0.13117044050246476 test_loss: 0.12341442108154296
epoch: 95 training_loss 0.13207303375005722 test_loss: 0.16065666675567628
epoch: 96 training_loss 0.13642708260565997 test_loss: 0.11910457611083984
epoch: 97 training_loss 0.12737476274371148 test_loss: 0.1409286618232727
epoch: 98 training_loss 0.13125073730945588 test_loss: 0.14905480146408082
epoch: 99 training_loss 0.1364154537394643 test_loss: 0.13393701314926149
epoch: 100 training_loss 0.1340353399515152 test_loss: 0.14474635124206542
epoch: 101 training_loss 0.12928319077938796 test_loss: 0.14483405351638795
epoch: 102 training_loss 0.1285901288688183 test_loss: 0.11716097593307495
epoch: 103 training_loss 0.13315028853714467 test_loss: 0.13366533517837526
epoch: 104 training_loss 0.13784583497792482 test_loss: 0.12503464221954347
epoch: 105 training_loss 0.12557379230856897 test_loss: 0.1381003737449646
epoch: 106 training_loss 0.13072408381849526 test_loss: 0.14581043720245362
epoch: 107 training_loss 0.13734300803393126 test_loss: 0.1334267497062683
epoch: 108 training_loss 0.1411626624315977 test_loss: 0.1519503951072693
epoch: 109 training_loss 0.12995091203600168 test_loss: 0.15733206272125244
epoch: 110 training_loss 0.13958070242777468 test_loss: 0.1432333469390869
epoch: 111 training_loss 0.12905407067388297 test_loss: 0.1486557364463806
epoch: 112 training_loss 0.13370231673121452 test_loss: 0.14090092182159425
epoch: 113 training_loss 0.12977617926895618 test_loss: 0.134176504611969
epoch: 114 training_loss 0.1306039284542203 test_loss: 0.12164155244827271
epoch: 115 training_loss 0.12340193439275027 test_loss: 0.12712609767913818
epoch: 116 training_loss 0.13586483344435693 test_loss: 0.138766348361969
epoch: 117 training_loss 0.13603834241628646 test_loss: 0.1274745225906372
epoch: 118 training_loss 0.13183137826621533 test_loss: 0.13420039415359497
epoch: 119 training_loss 0.13473101876676083 test_loss: 0.11643227338790893
epoch: 120 training_loss 0.12995013531297445 test_loss: 0.12047417163848877
epoch: 121 training_loss 0.12985899895429612 test_loss: 0.1371253490447998
epoch: 122 training_loss 0.1377657310664654 test_loss: 0.12921561002731324
epoch: 123 training_loss 0.1354863243550062 test_loss: 0.13575531244277955
epoch: 124 training_loss 0.1277647314593196 test_loss: 0.14590305089950562
epoch: 125 training_loss 0.1370577861741185 test_loss: 0.13274840116500855
epoch: 126 training_loss 0.12844715699553488 test_loss: 0.14372948408126832
epoch: 127 training_loss 0.1305208234488964 test_loss: 0.1313481330871582
epoch: 128 training_loss 0.14228559609502553 test_loss: 0.1270667552947998
epoch: 129 training_loss 0.13347533360123634 test_loss: 0.1208489179611206
epoch: 130 training_loss 0.1387840200960636 test_loss: 0.13719791173934937
epoch: 131 training_loss 0.14299191724509 test_loss: 0.1759456992149353
epoch: 132 training_loss 0.1447868936508894 test_loss: 0.15064369440078734
epoch: 133 training_loss 0.1366985983774066 test_loss: 0.12606780529022216
epoch: 134 training_loss 0.1291711215302348 test_loss: 0.12320871353149414
epoch: 135 training_loss 0.14060636330395937 test_loss: 0.15046700239181518
epoch: 136 training_loss 0.13450714997947216 test_loss: 0.14250749349594116
epoch: 137 training_loss 0.1341759828105569 test_loss: 0.13430339097976685
epoch: 138 training_loss 0.12503566667437555 test_loss: 0.15217622518539428
epoch: 139 training_loss 0.13694166289642454 test_loss: 0.14441232681274413
epoch: 140 training_loss 0.1336601186171174 test_loss: 0.144295597076416
epoch: 141 training_loss 0.13089448578655719 test_loss: 0.12645589113235473
epoch: 142 training_loss 0.1283021568134427 test_loss: 0.14931095838546754
epoch: 143 training_loss 0.13085407584905626 test_loss: 0.12840174436569213
epoch: 144 training_loss 0.12958164267241956 test_loss: 0.13288887739181518
epoch: 145 training_loss 0.13927025757730008 test_loss: 0.1483202576637268
epoch: 146 training_loss 0.13456298189237714 test_loss: 0.141808819770813
epoch: 147 training_loss 0.1357410004362464 test_loss: 0.1332552671432495
epoch: 148 training_loss 0.12328175418078899 test_loss: 0.14258086681365967
epoch: 149 training_loss 0.13144533712416887 test_loss: 0.12735675573348998
epoch: 0 training_loss 7.577355937957764 test_loss: 4.38257064819336
epoch: 1 training_loss 3.3697843194007873 test_loss: 2.658927154541016
epoch: 2 training_loss 2.3139425039291384 test_loss: 2.0412067413330077
epoch: 3 training_loss 1.8438619732856751 test_loss: 1.6121627807617187
epoch: 4 training_loss 1.4994406747817992 test_loss: 1.439504337310791
epoch: 5 training_loss 1.3457347416877747 test_loss: 1.2671927452087401
epoch: 6 training_loss 1.2217633759975433 test_loss: 1.1579057693481445
epoch: 7 training_loss 1.118415938615799 test_loss: 1.043111228942871
epoch: 8 training_loss 1.077040412425995 test_loss: 1.0458205223083497
epoch: 9 training_loss 1.0346589636802674 test_loss: 1.089848518371582
epoch: 10 training_loss 0.9669331413507462 test_loss: 0.9388854026794433
epoch: 11 training_loss 0.9248613065481186 test_loss: 0.9226130485534668
epoch: 12 training_loss 0.9190923196077346 test_loss: 0.9076142311096191
epoch: 13 training_loss 0.889360545873642 test_loss: 0.9514917373657227
epoch: 14 training_loss 0.8712136262655258 test_loss: 0.8481942176818847
epoch: 15 training_loss 0.8342937397956848 test_loss: 0.7973787784576416
epoch: 16 training_loss 0.8206936132907867 test_loss: 0.8148536682128906
epoch: 17 training_loss 0.8091992276906967 test_loss: 0.8494993209838867
epoch: 18 training_loss 0.8035624051094055 test_loss: 0.7986661911010742
epoch: 19 training_loss 0.7964826041460037 test_loss: 0.8239667892456055
epoch: 20 training_loss 0.7834013289213181 test_loss: 0.7834586143493653
epoch: 21 training_loss 0.7584976416826248 test_loss: 0.7431187152862548
epoch: 22 training_loss 0.7452424281835556 test_loss: 0.7292939186096191
epoch: 23 training_loss 0.7398989528417588 test_loss: 0.7365379810333252
epoch: 24 training_loss 0.7303686136007309 test_loss: 0.7561088562011719
epoch: 25 training_loss 0.7178897196054459 test_loss: 0.7152828216552735
epoch: 26 training_loss 0.7179257798194886 test_loss: 0.702311897277832
epoch: 27 training_loss 0.7037392729520797 test_loss: 0.7072339534759522
epoch: 28 training_loss 0.6818128544092178 test_loss: 0.7262224674224853
epoch: 29 training_loss 0.6968797218799591 test_loss: 0.6944120407104493
epoch: 30 training_loss 0.6805390816926956 test_loss: 0.7202873229980469
epoch: 31 training_loss 0.6769985896348953 test_loss: 0.660765266418457
epoch: 32 training_loss 0.6711096632480621 test_loss: 0.6655572414398193
epoch: 33 training_loss 0.6633971858024598 test_loss: 0.6757690906524658
epoch: 34 training_loss 0.640688528418541 test_loss: 0.6785289287567139
epoch: 35 training_loss 0.6388536471128464 test_loss: 0.685687255859375
epoch: 36 training_loss 0.6489452600479126 test_loss: 0.6260140895843506
epoch: 37 training_loss 0.6349741625785827 test_loss: 0.6694595813751221
epoch: 38 training_loss 0.6521784120798111 test_loss: 0.6290528774261475
epoch: 39 training_loss 0.6300142115354538 test_loss: 0.6325961112976074
epoch: 40 training_loss 0.6194387322664261 test_loss: 0.6262417316436768
epoch: 41 training_loss 0.6235189783573151 test_loss: 0.5988831043243408
epoch: 42 training_loss 0.6175545340776444 test_loss: 0.6284695625305176
epoch: 43 training_loss 0.6155152654647827 test_loss: 0.7317890167236328
epoch: 44 training_loss 0.6321566367149353 test_loss: 0.6028250694274903
epoch: 45 training_loss 0.5955934190750122 test_loss: 0.6370458602905273
epoch: 46 training_loss 0.5972100710868835 test_loss: 0.6185903549194336
epoch: 47 training_loss 0.6093473726511002 test_loss: 0.5748549461364746
epoch: 48 training_loss 0.6094390630722046 test_loss: 0.606913709640503
epoch: 49 training_loss 0.5869381773471832 test_loss: 0.5897629737854004
epoch: 50 training_loss 0.6079401820898056 test_loss: 0.596971845626831
epoch: 51 training_loss 0.5847952562570572 test_loss: 0.5917770862579346
epoch: 52 training_loss 0.5920750498771667 test_loss: 0.572519588470459
epoch: 53 training_loss 0.5898205184936524 test_loss: 0.5801331043243408
epoch: 54 training_loss 0.5855658954381943 test_loss: 0.5675196170806884
epoch: 55 training_loss 0.5807290950417519 test_loss: 0.5886869907379151
epoch: 56 training_loss 0.5855544418096542 test_loss: 0.5876905441284179
epoch: 57 training_loss 0.5928255587816238 test_loss: 0.5548842906951904
epoch: 58 training_loss 0.5749147140979767 test_loss: 0.5662229061126709
epoch: 59 training_loss 0.5731214046478271 test_loss: 0.5937424182891846
epoch: 60 training_loss 0.5739304512739182 test_loss: 0.579693603515625
epoch: 61 training_loss 0.5597535705566407 test_loss: 0.567282247543335
epoch: 62 training_loss 0.5594465470314026 test_loss: 0.5744478225708007
epoch: 63 training_loss 0.5678665202856064 test_loss: 0.5567896366119385
epoch: 64 training_loss 0.5664599135518074 test_loss: 0.5675207614898682
epoch: 65 training_loss 0.5663206821680069 test_loss: 0.5639697551727295
epoch: 66 training_loss 0.5524898168444633 test_loss: 0.5696125507354737
epoch: 67 training_loss 0.5555524349212646 test_loss: 0.533937644958496
epoch: 68 training_loss 0.5456510916352272 test_loss: 0.5609259128570556
epoch: 69 training_loss 0.5485065564513206 test_loss: 0.5688693046569824
epoch: 70 training_loss 0.5458795887231827 test_loss: 0.5516140937805176
epoch: 71 training_loss 0.5465318730473518 test_loss: 0.5813857555389405
epoch: 72 training_loss 0.5452264314889907 test_loss: 0.5404938220977783
epoch: 73 training_loss 0.5620729640126229 test_loss: 0.5675261974334717
epoch: 74 training_loss 0.5318560492992401 test_loss: 0.5442489147186279
epoch: 75 training_loss 0.5399558171629906 test_loss: 0.5708210468292236
epoch: 76 training_loss 0.5395259386301041 test_loss: 0.5594019889831543
epoch: 77 training_loss 0.5375532081723213 test_loss: 0.5214841842651368
epoch: 78 training_loss 0.5456790399551391 test_loss: 0.571971082687378
epoch: 79 training_loss 0.5371997708082199 test_loss: 0.5493378162384033
epoch: 80 training_loss 0.53421021848917 test_loss: 0.5369351863861084
epoch: 81 training_loss 0.5385080575942993 test_loss: 0.5308614253997803
epoch: 82 training_loss 0.5223951870203019 test_loss: 0.5175510883331299
epoch: 83 training_loss 0.5314172717928887 test_loss: 0.5272618293762207
epoch: 84 training_loss 0.5242746669054031 test_loss: 0.5337181091308594
epoch: 85 training_loss 0.5324476969242096 test_loss: 0.5262115478515625
epoch: 86 training_loss 0.527924373447895 test_loss: 0.5320652484893799
epoch: 87 training_loss 0.5270980626344681 test_loss: 0.520141315460205
epoch: 88 training_loss 0.5254895657300949 test_loss: 0.5344050884246826
epoch: 89 training_loss 0.5279181754589081 test_loss: 0.5292218685150146
epoch: 90 training_loss 0.5251276180148124 test_loss: 0.5069554328918457
epoch: 91 training_loss 0.5274468633532524 test_loss: 0.5019229888916016
epoch: 92 training_loss 0.5235777923464775 test_loss: 0.5211291313171387
epoch: 93 training_loss 0.5161021345853806 test_loss: 0.5206366539001465
epoch: 94 training_loss 0.5125357276201248 test_loss: 0.5118760108947754
epoch: 95 training_loss 0.5250594735145568 test_loss: 0.521680212020874
epoch: 96 training_loss 0.5199931955337525 test_loss: 0.5235077857971191
epoch: 97 training_loss 0.5140227049589157 test_loss: 0.5318535804748535
epoch: 98 training_loss 0.5034429156780242 test_loss: 0.5368916988372803
epoch: 99 training_loss 0.5190297219157219 test_loss: 0.5103851318359375
epoch: 100 training_loss 0.5076011109352112 test_loss: 0.5231081485748291
epoch: 101 training_loss 0.49894807130098345 test_loss: 0.4821990966796875
epoch: 102 training_loss 0.5012195637822151 test_loss: 0.514061450958252
epoch: 103 training_loss 0.5074762853980065 test_loss: 0.5186431407928467
epoch: 104 training_loss 0.5061991500854492 test_loss: 0.5193048477172851
epoch: 105 training_loss 0.5063371959328652 test_loss: 0.5369020462036133
epoch: 106 training_loss 0.5104940864443779 test_loss: 0.5004723548889161
epoch: 107 training_loss 0.4985570496320724 test_loss: 0.4877347469329834
epoch: 108 training_loss 0.5047747674584389 test_loss: 0.5045076370239258
epoch: 109 training_loss 0.496427820622921 test_loss: 0.5072000026702881
epoch: 110 training_loss 0.4984818643331528 test_loss: 0.4915611267089844
epoch: 111 training_loss 0.4951422855257988 test_loss: 0.4944798469543457
epoch: 112 training_loss 0.5033790469169617 test_loss: 0.4859595775604248
epoch: 113 training_loss 0.49797071784734726 test_loss: 0.4895631313323975
epoch: 114 training_loss 0.4927479287981987 test_loss: 0.5039480209350586
epoch: 115 training_loss 0.493434274494648 test_loss: 0.4984330177307129
epoch: 116 training_loss 0.49626892179250714 test_loss: 0.4835012435913086
epoch: 117 training_loss 0.49153743237257 test_loss: 0.5065129280090332
epoch: 118 training_loss 0.4897557020187378 test_loss: 0.5077937126159668
epoch: 119 training_loss 0.49323412716388704 test_loss: 0.49692397117614745
epoch: 120 training_loss 0.49211714178323746 test_loss: 0.4788708209991455
epoch: 121 training_loss 0.4887722960114479 test_loss: 0.4905392646789551
epoch: 122 training_loss 0.4917577999830246 test_loss: 0.47808032035827636
epoch: 123 training_loss 0.48911388039588927 test_loss: 0.47727222442626954
epoch: 124 training_loss 0.48860391080379484 test_loss: 0.5141571521759033
epoch: 125 training_loss 0.48886962205171586 test_loss: 0.47182226181030273
epoch: 126 training_loss 0.4845136415958404 test_loss: 0.47945656776428225
epoch: 127 training_loss 0.4918169376254082 test_loss: 0.5340914249420166
epoch: 128 training_loss 0.49149385660886763 test_loss: 0.5080390930175781
epoch: 129 training_loss 0.48150178909301755 test_loss: 0.4807621955871582
epoch: 130 training_loss 0.4832129666209221 test_loss: 0.5116304397583008
epoch: 131 training_loss 0.48250231057405474 test_loss: 0.5034564971923828
epoch: 132 training_loss 0.4864531594514847 test_loss: 0.4782401084899902
epoch: 133 training_loss 0.472316133081913 test_loss: 0.4841440200805664
epoch: 134 training_loss 0.47714770704507825 test_loss: 0.5085958003997803
epoch: 135 training_loss 0.4772572100162506 test_loss: 0.48581652641296386
epoch: 136 training_loss 0.4883567154407501 test_loss: 0.4886516571044922
epoch: 137 training_loss 0.4776549032330513 test_loss: 0.4701037883758545
epoch: 138 training_loss 0.4734123092889786 test_loss: 0.48772430419921875
epoch: 139 training_loss 0.46613235861063 test_loss: 0.4760457515716553
epoch: 140 training_loss 0.48873655647039416 test_loss: 0.4680917263031006
epoch: 141 training_loss 0.48274869084358213 test_loss: 0.48984413146972655
epoch: 142 training_loss 0.48232321739196776 test_loss: 0.48362717628479
epoch: 143 training_loss 0.48568795412778853 test_loss: 0.46973819732666017
epoch: 144 training_loss 0.47757363587617874 test_loss: 0.5017114639282226
epoch: 145 training_loss 0.4722216394543648 test_loss: 0.48495020866394045
epoch: 146 training_loss 0.4750237214565277 test_loss: 0.5058414459228515
epoch: 147 training_loss 0.47253335773944855 test_loss: 0.4725033760070801
epoch: 148 training_loss 0.47565461426973343 test_loss: 0.51014404296875
epoch: 149 training_loss 0.47422989547252653 test_loss: 0.46868104934692384
3194.161597455298
episode: 0 training return: tensor(-2.1084, device='cuda:0')
episode: 1 training return: tensor(6.3906, device='cuda:0')
episode: 2 training return: tensor(-384.5103, device='cuda:0')
episode: 3 training return: tensor(0.1167, device='cuda:0')
epoch: 1 test_true_pfm: 2417.423894747297 sim_pfm: -141.2535507690627
episode: 4 training return: tensor(-690.3663, device='cuda:0')
episode: 5 training return: tensor(-268.0875, device='cuda:0')
episode: 6 training return: tensor(-427.3184, device='cuda:0')
episode: 7 training return: tensor(-690.2294, device='cuda:0')
epoch: 2 test_true_pfm: 1757.7917104908875 sim_pfm: -409.616570127313
episode: 8 training return: tensor(-618.6906, device='cuda:0')
episode: 9 training return: tensor(26.4195, device='cuda:0')
episode: 10 training return: tensor(-606.3634, device='cuda:0')
episode: 11 training return: tensor(-684.0757, device='cuda:0')
epoch: 3 test_true_pfm: 2640.707528377556 sim_pfm: -436.6950724470953
episode: 12 training return: tensor(-641.6526, device='cuda:0')
episode: 13 training return: tensor(-63.7848, device='cuda:0')
episode: 14 training return: tensor(-31.2094, device='cuda:0')
episode: 15 training return: tensor(-21.1391, device='cuda:0')
epoch: 4 test_true_pfm: 3167.503196893242 sim_pfm: -50.28196703853124
episode: 16 training return: tensor(-495.0833, device='cuda:0')
episode: 17 training return: tensor(-10.4510, device='cuda:0')
episode: 18 training return: tensor(-87.5969, device='cuda:0')
episode: 19 training return: tensor(-352.0597, device='cuda:0')
epoch: 5 test_true_pfm: 2878.5899524701417 sim_pfm: -172.3562108753637
episode: 20 training return: tensor(-57.5284, device='cuda:0')
episode: 21 training return: tensor(-415.4602, device='cuda:0')
episode: 22 training return: tensor(-422.3755, device='cuda:0')
episode: 23 training return: tensor(16.6686, device='cuda:0')
epoch: 6 test_true_pfm: 2436.328371617932 sim_pfm: -429.3599918485076
episode: 24 training return: tensor(-580.7958, device='cuda:0')
episode: 25 training return: tensor(-416.8287, device='cuda:0')
episode: 26 training return: tensor(-256.1157, device='cuda:0')
episode: 27 training return: tensor(-608.3795, device='cuda:0')
epoch: 7 test_true_pfm: 3191.6631198810173 sim_pfm: -117.39790629718725
episode: 28 training return: tensor(-654.4568, device='cuda:0')
episode: 29 training return: tensor(-629.1993, device='cuda:0')
episode: 30 training return: tensor(-560.2155, device='cuda:0')
episode: 31 training return: tensor(-623.9615, device='cuda:0')
epoch: 8 test_true_pfm: 2518.8894030213964 sim_pfm: -25.402659502540093
episode: 32 training return: tensor(-208.4469, device='cuda:0')
episode: 33 training return: tensor(-482.9685, device='cuda:0')
episode: 34 training return: tensor(-56.2464, device='cuda:0')
episode: 35 training return: tensor(-43.1226, device='cuda:0')
epoch: 9 test_true_pfm: 2478.4634647329153 sim_pfm: -444.16824376237736
episode: 36 training return: tensor(-30.9003, device='cuda:0')
episode: 37 training return: tensor(68.0616, device='cuda:0')
episode: 38 training return: tensor(-509.0845, device='cuda:0')
episode: 39 training return: tensor(-341.6259, device='cuda:0')
epoch: 10 test_true_pfm: 2621.178192951004 sim_pfm: -314.31056053887977
episode: 40 training return: tensor(-671.6130, device='cuda:0')
episode: 41 training return: tensor(-250.2074, device='cuda:0')
episode: 42 training return: tensor(41.7349, device='cuda:0')
episode: 43 training return: tensor(-688.2633, device='cuda:0')
epoch: 11 test_true_pfm: 3175.4592556258685 sim_pfm: -148.76050314563327
episode: 44 training return: tensor(-341.9706, device='cuda:0')
episode: 45 training return: tensor(-646.5616, device='cuda:0')
episode: 46 training return: tensor(0.9762, device='cuda:0')
episode: 47 training return: tensor(-489.8837, device='cuda:0')
epoch: 12 test_true_pfm: 2379.180190960677 sim_pfm: -63.768728238297626
episode: 48 training return: tensor(23.7818, device='cuda:0')
episode: 49 training return: tensor(-417.9964, device='cuda:0')
episode: 50 training return: tensor(-655.5684, device='cuda:0')
episode: 51 training return: tensor(-26.1774, device='cuda:0')
epoch: 13 test_true_pfm: 3233.5649143743162 sim_pfm: -78.72339294673293
episode: 52 training return: tensor(-1.5654, device='cuda:0')
episode: 53 training return: tensor(-396.2295, device='cuda:0')
episode: 54 training return: tensor(-379.1680, device='cuda:0')
episode: 55 training return: tensor(-498.6277, device='cuda:0')
epoch: 14 test_true_pfm: 3226.0465527545607 sim_pfm: 2.757751758724529
episode: 56 training return: tensor(-604.4523, device='cuda:0')
episode: 57 training return: tensor(-394.9446, device='cuda:0')
episode: 58 training return: tensor(-262.1493, device='cuda:0')
episode: 59 training return: tensor(-639.9328, device='cuda:0')
epoch: 15 test_true_pfm: 3230.091269282401 sim_pfm: 4.041947117172337
episode: 60 training return: tensor(-609.8735, device='cuda:0')
episode: 61 training return: tensor(-407.9636, device='cuda:0')
episode: 62 training return: tensor(-11.6060, device='cuda:0')
episode: 63 training return: tensor(55.4332, device='cuda:0')
epoch: 16 test_true_pfm: 2772.3028417239934 sim_pfm: 28.8420300425787
episode: 64 training return: tensor(-128.7015, device='cuda:0')
episode: 65 training return: tensor(11.9954, device='cuda:0')
episode: 66 training return: tensor(-417.3180, device='cuda:0')
episode: 67 training return: tensor(-44.2948, device='cuda:0')
epoch: 17 test_true_pfm: 3253.264815419692 sim_pfm: 10.86798698588973
episode: 68 training return: tensor(-340.8335, device='cuda:0')
episode: 69 training return: tensor(-2.9770, device='cuda:0')
episode: 70 training return: tensor(-403.0695, device='cuda:0')
episode: 71 training return: tensor(-3.5263, device='cuda:0')
epoch: 18 test_true_pfm: 3237.027745259586 sim_pfm: 27.73414312020759
episode: 72 training return: tensor(-15.9987, device='cuda:0')
episode: 73 training return: tensor(61.0018, device='cuda:0')
episode: 74 training return: tensor(29.6075, device='cuda:0')
episode: 75 training return: tensor(-43.4253, device='cuda:0')
epoch: 19 test_true_pfm: 3214.7425975678652 sim_pfm: 16.04217200887312
episode: 76 training return: tensor(17.1255, device='cuda:0')
episode: 77 training return: tensor(19.7639, device='cuda:0')
episode: 78 training return: tensor(-11.8376, device='cuda:0')
episode: 79 training return: tensor(50.0061, device='cuda:0')
epoch: 20 test_true_pfm: 3241.671130248616 sim_pfm: 3.386790394084528
episode: 80 training return: tensor(14.4092, device='cuda:0')
episode: 81 training return: tensor(-687.8094, device='cuda:0')
episode: 82 training return: tensor(11.6618, device='cuda:0')
episode: 83 training return: tensor(-17.4505, device='cuda:0')
epoch: 21 test_true_pfm: 3196.1924656763845 sim_pfm: -4.6843987433045795
episode: 84 training return: tensor(1.3648, device='cuda:0')
episode: 85 training return: tensor(4.0922, device='cuda:0')
episode: 86 training return: tensor(-346.4469, device='cuda:0')
episode: 87 training return: tensor(-24.2830, device='cuda:0')
epoch: 22 test_true_pfm: 3240.548153007025 sim_pfm: 38.39195095583758
episode: 88 training return: tensor(1.5331, device='cuda:0')
episode: 89 training return: tensor(-58.0830, device='cuda:0')
episode: 90 training return: tensor(-671.5341, device='cuda:0')
episode: 91 training return: tensor(63.9813, device='cuda:0')
epoch: 23 test_true_pfm: 3221.782670883036 sim_pfm: 15.39915093651507
episode: 92 training return: tensor(-89.8983, device='cuda:0')
episode: 93 training return: tensor(47.6124, device='cuda:0')
episode: 94 training return: tensor(-453.9835, device='cuda:0')
episode: 95 training return: tensor(-255.5934, device='cuda:0')
epoch: 24 test_true_pfm: 3226.7005030069427 sim_pfm: 30.072472798851475
episode: 96 training return: tensor(-307.4535, device='cuda:0')
episode: 97 training return: tensor(-274.3711, device='cuda:0')
episode: 98 training return: tensor(21.9891, device='cuda:0')
episode: 99 training return: tensor(-6.0644, device='cuda:0')
epoch: 25 test_true_pfm: 3223.1735304021145 sim_pfm: -13.410632326384075
episode: 100 training return: tensor(-626.8862, device='cuda:0')
episode: 101 training return: tensor(-18.7177, device='cuda:0')
episode: 102 training return: tensor(5.9018, device='cuda:0')
episode: 103 training return: tensor(2.2813, device='cuda:0')
epoch: 26 test_true_pfm: 3223.8816618114192 sim_pfm: 25.60371589421023
episode: 104 training return: tensor(24.6299, device='cuda:0')
episode: 105 training return: tensor(54.2117, device='cuda:0')
episode: 106 training return: tensor(-23.1663, device='cuda:0')
episode: 107 training return: tensor(-234.4038, device='cuda:0')
epoch: 27 test_true_pfm: 3232.1636346747287 sim_pfm: 24.269540182722267
episode: 108 training return: tensor(36.1937, device='cuda:0')
episode: 109 training return: tensor(-323.3563, device='cuda:0')
episode: 110 training return: tensor(-324.9804, device='cuda:0')
episode: 111 training return: tensor(8.6616, device='cuda:0')
epoch: 28 test_true_pfm: 3227.0843774577747 sim_pfm: 19.93734680468333
episode: 112 training return: tensor(7.5268, device='cuda:0')
episode: 113 training return: tensor(39.1266, device='cuda:0')
episode: 114 training return: tensor(26.8865, device='cuda:0')
episode: 115 training return: tensor(-37.8304, device='cuda:0')
epoch: 29 test_true_pfm: 3232.5082684179974 sim_pfm: 7.349879616677451
episode: 116 training return: tensor(-0.3456, device='cuda:0')
episode: 117 training return: tensor(-640.8645, device='cuda:0')
episode: 118 training return: tensor(-352.9635, device='cuda:0')
episode: 119 training return: tensor(-408.3772, device='cuda:0')
epoch: 30 test_true_pfm: 3213.336534153603 sim_pfm: 17.054596972855506
episode: 120 training return: tensor(-24.5066, device='cuda:0')
episode: 121 training return: tensor(10.3553, device='cuda:0')
episode: 122 training return: tensor(17.6823, device='cuda:0')
episode: 123 training return: tensor(-619.6627, device='cuda:0')
epoch: 31 test_true_pfm: 3247.63710076337 sim_pfm: 1.6480223052300669
episode: 124 training return: tensor(123.5792, device='cuda:0')
episode: 125 training return: tensor(22.9384, device='cuda:0')
episode: 126 training return: tensor(-20.6288, device='cuda:0')
episode: 127 training return: tensor(-672.7132, device='cuda:0')
epoch: 32 test_true_pfm: 3234.903174558525 sim_pfm: 19.234450403988983
episode: 128 training return: tensor(-1.1871, device='cuda:0')
episode: 129 training return: tensor(-414.0310, device='cuda:0')
episode: 130 training return: tensor(-304.8307, device='cuda:0')
episode: 131 training return: tensor(16.2335, device='cuda:0')
epoch: 33 test_true_pfm: 3228.1259204586736 sim_pfm: -4.197696203298013
episode: 132 training return: tensor(-696.1586, device='cuda:0')
episode: 133 training return: tensor(-442.8131, device='cuda:0')
episode: 134 training return: tensor(5.8446, device='cuda:0')
episode: 135 training return: tensor(17.9359, device='cuda:0')
epoch: 34 test_true_pfm: 3265.218182323773 sim_pfm: -13.497089602130776
episode: 136 training return: tensor(-614.6891, device='cuda:0')
episode: 137 training return: tensor(1.6262, device='cuda:0')
episode: 138 training return: tensor(-702.9396, device='cuda:0')
episode: 139 training return: tensor(-572.6954, device='cuda:0')
epoch: 35 test_true_pfm: 3235.5570473282496 sim_pfm: 25.956161555688595
episode: 140 training return: tensor(-621.1450, device='cuda:0')
episode: 141 training return: tensor(37.8942, device='cuda:0')
episode: 142 training return: tensor(-673.7294, device='cuda:0')
episode: 143 training return: tensor(14.0150, device='cuda:0')
epoch: 36 test_true_pfm: 3238.849798252692 sim_pfm: 21.25195534872667
episode: 144 training return: tensor(-93.0965, device='cuda:0')
episode: 145 training return: tensor(12.7262, device='cuda:0')
episode: 146 training return: tensor(1.0604, device='cuda:0')
episode: 147 training return: tensor(5.2581, device='cuda:0')
epoch: 37 test_true_pfm: 3239.8768866900905 sim_pfm: 2.085373126940491
episode: 148 training return: tensor(93.3198, device='cuda:0')
episode: 149 training return: tensor(-269.8355, device='cuda:0')
episode: 150 training return: tensor(-416.3580, device='cuda:0')
episode: 151 training return: tensor(-175.5077, device='cuda:0')
epoch: 38 test_true_pfm: 3248.4014830881574 sim_pfm: 26.255521073103107
episode: 152 training return: tensor(-3.6769, device='cuda:0')
episode: 153 training return: tensor(7.3740, device='cuda:0')
episode: 154 training return: tensor(-5.9903, device='cuda:0')
episode: 155 training return: tensor(-619.7960, device='cuda:0')
epoch: 39 test_true_pfm: 3252.1484225283148 sim_pfm: 23.717066845002893
episode: 156 training return: tensor(-32.7179, device='cuda:0')
episode: 157 training return: tensor(-216.5274, device='cuda:0')
episode: 158 training return: tensor(-597.0626, device='cuda:0')
episode: 159 training return: tensor(45.5091, device='cuda:0')
epoch: 40 test_true_pfm: 3234.828228173815 sim_pfm: 17.487972234608606
episode: 160 training return: tensor(-308.7365, device='cuda:0')
episode: 161 training return: tensor(-218.6318, device='cuda:0')
episode: 162 training return: tensor(-617.6313, device='cuda:0')
episode: 163 training return: tensor(-93.7464, device='cuda:0')
epoch: 41 test_true_pfm: 3247.898000246581 sim_pfm: 1.8497766708023846
episode: 164 training return: tensor(6.0072, device='cuda:0')
episode: 165 training return: tensor(29.1765, device='cuda:0')
episode: 166 training return: tensor(-488.6963, device='cuda:0')
episode: 167 training return: tensor(61.6163, device='cuda:0')
epoch: 42 test_true_pfm: 3204.2368689469936 sim_pfm: -4.78431021872287
episode: 168 training return: tensor(3.7496, device='cuda:0')
episode: 169 training return: tensor(60.9529, device='cuda:0')
episode: 170 training return: tensor(-412.0195, device='cuda:0')
episode: 171 training return: tensor(47.5040, device='cuda:0')
epoch: 43 test_true_pfm: 3250.2466295196373 sim_pfm: 40.412523453182075
episode: 172 training return: tensor(-6.6940, device='cuda:0')
episode: 173 training return: tensor(-638.8914, device='cuda:0')
episode: 174 training return: tensor(-670.4650, device='cuda:0')
episode: 175 training return: tensor(-676.0617, device='cuda:0')
epoch: 44 test_true_pfm: 3224.4998489789027 sim_pfm: 26.17599091793333
episode: 176 training return: tensor(-2.1228, device='cuda:0')
episode: 177 training return: tensor(-318.7794, device='cuda:0')
episode: 178 training return: tensor(-97.1912, device='cuda:0')
episode: 179 training return: tensor(30.5314, device='cuda:0')
epoch: 45 test_true_pfm: 3238.352547315603 sim_pfm: 16.76884755703698
episode: 180 training return: tensor(-425.4056, device='cuda:0')
episode: 181 training return: tensor(-416.5442, device='cuda:0')
episode: 182 training return: tensor(-589.5062, device='cuda:0')
episode: 183 training return: tensor(-428.2765, device='cuda:0')
epoch: 46 test_true_pfm: 3238.836907317365 sim_pfm: 9.065118966177883
episode: 184 training return: tensor(-671.5430, device='cuda:0')
episode: 185 training return: tensor(-673.3580, device='cuda:0')
episode: 186 training return: tensor(-683.7812, device='cuda:0')
episode: 187 training return: tensor(73.9734, device='cuda:0')
epoch: 47 test_true_pfm: 3238.4258884346323 sim_pfm: 21.88721055649997
episode: 188 training return: tensor(1.6025, device='cuda:0')
episode: 189 training return: tensor(-3.0169, device='cuda:0')
episode: 190 training return: tensor(-308.1780, device='cuda:0')
episode: 191 training return: tensor(-21.1603, device='cuda:0')
epoch: 48 test_true_pfm: 3229.2146101235144 sim_pfm: 13.87444296758622
episode: 192 training return: tensor(1.7255, device='cuda:0')
episode: 193 training return: tensor(69.3987, device='cuda:0')
episode: 194 training return: tensor(-415.7783, device='cuda:0')
episode: 195 training return: tensor(-569.4949, device='cuda:0')
epoch: 49 test_true_pfm: 3247.2875569693556 sim_pfm: 30.893395495630102
episode: 196 training return: tensor(-390.6653, device='cuda:0')
episode: 197 training return: tensor(-149.6504, device='cuda:0')
episode: 198 training return: tensor(108.1439, device='cuda:0')
episode: 199 training return: tensor(28.1728, device='cuda:0')
epoch: 50 test_true_pfm: 3228.9333924525467 sim_pfm: 7.558595834105897
episode: 200 training return: tensor(33.3706, device='cuda:0')
episode: 201 training return: tensor(-634.5750, device='cuda:0')
episode: 202 training return: tensor(-8.6538, device='cuda:0')
episode: 203 training return: tensor(34.8814, device='cuda:0')
epoch: 51 test_true_pfm: 3229.778628310552 sim_pfm: 20.281146187432267
episode: 204 training return: tensor(12.0823, device='cuda:0')
episode: 205 training return: tensor(4.3124, device='cuda:0')
episode: 206 training return: tensor(48.7187, device='cuda:0')
episode: 207 training return: tensor(31.7771, device='cuda:0')
epoch: 52 test_true_pfm: 3241.355297846329 sim_pfm: 15.516460588260088
episode: 208 training return: tensor(94.2127, device='cuda:0')
episode: 209 training return: tensor(-3.7224, device='cuda:0')
episode: 210 training return: tensor(22.3260, device='cuda:0')
episode: 211 training return: tensor(1.2538, device='cuda:0')
epoch: 53 test_true_pfm: 3237.9242261113172 sim_pfm: 15.301704726550573
episode: 212 training return: tensor(20.8191, device='cuda:0')
episode: 213 training return: tensor(25.7701, device='cuda:0')
episode: 214 training return: tensor(-404.2094, device='cuda:0')
episode: 215 training return: tensor(-5.5306, device='cuda:0')
epoch: 54 test_true_pfm: 3232.864091581205 sim_pfm: 9.14044808059892
episode: 216 training return: tensor(-13.5218, device='cuda:0')
episode: 217 training return: tensor(-129.0182, device='cuda:0')
episode: 218 training return: tensor(-1.2972, device='cuda:0')
episode: 219 training return: tensor(-308.6085, device='cuda:0')
epoch: 55 test_true_pfm: 3232.4196131509166 sim_pfm: 6.907261118583847
episode: 220 training return: tensor(-409.2283, device='cuda:0')
episode: 221 training return: tensor(-497.3257, device='cuda:0')
episode: 222 training return: tensor(-79.7678, device='cuda:0')
episode: 223 training return: tensor(-240.4505, device='cuda:0')
epoch: 56 test_true_pfm: 3261.4237375166194 sim_pfm: 10.652940404698407
episode: 224 training return: tensor(-337.0044, device='cuda:0')
episode: 225 training return: tensor(31.8837, device='cuda:0')
episode: 226 training return: tensor(13.8592, device='cuda:0')
episode: 227 training return: tensor(-4.4303, device='cuda:0')
epoch: 57 test_true_pfm: 3254.299796397119 sim_pfm: 38.41678788027881
episode: 228 training return: tensor(13.3695, device='cuda:0')
episode: 229 training return: tensor(-404.0378, device='cuda:0')
episode: 230 training return: tensor(-99.6223, device='cuda:0')
episode: 231 training return: tensor(-35.9471, device='cuda:0')
epoch: 58 test_true_pfm: 3227.873529447377 sim_pfm: 25.179157871558953
episode: 232 training return: tensor(-400.8283, device='cuda:0')
episode: 233 training return: tensor(-602.0863, device='cuda:0')
episode: 234 training return: tensor(-687.7663, device='cuda:0')
episode: 235 training return: tensor(25.8142, device='cuda:0')
epoch: 59 test_true_pfm: 2866.4938707534184 sim_pfm: 15.135842330055311
episode: 236 training return: tensor(-21.8965, device='cuda:0')
episode: 237 training return: tensor(9.8650, device='cuda:0')
episode: 238 training return: tensor(43.8462, device='cuda:0')
episode: 239 training return: tensor(25.9345, device='cuda:0')
epoch: 60 test_true_pfm: 3223.125576907168 sim_pfm: 14.260438920018109
episode: 240 training return: tensor(-678.6993, device='cuda:0')
episode: 241 training return: tensor(7.3026, device='cuda:0')
episode: 242 training return: tensor(9.7640, device='cuda:0')
episode: 243 training return: tensor(44.0559, device='cuda:0')
epoch: 61 test_true_pfm: 3248.0039919455544 sim_pfm: 28.648358867076848
episode: 244 training return: tensor(-124.9536, device='cuda:0')
episode: 245 training return: tensor(-673.6033, device='cuda:0')
episode: 246 training return: tensor(7.0354, device='cuda:0')
episode: 247 training return: tensor(-229.9589, device='cuda:0')
epoch: 62 test_true_pfm: 3245.469278335096 sim_pfm: 25.86913043691311
episode: 248 training return: tensor(-129.9846, device='cuda:0')
episode: 249 training return: tensor(44.8086, device='cuda:0')
episode: 250 training return: tensor(-15.3352, device='cuda:0')
episode: 251 training return: tensor(88.5629, device='cuda:0')
epoch: 63 test_true_pfm: 3098.2450372167264 sim_pfm: 15.123032536959121
episode: 252 training return: tensor(-675.0928, device='cuda:0')
episode: 253 training return: tensor(80.9638, device='cuda:0')
episode: 254 training return: tensor(6.8735, device='cuda:0')
episode: 255 training return: tensor(-395.2807, device='cuda:0')
epoch: 64 test_true_pfm: 3238.6312424073276 sim_pfm: 26.386048792822596
episode: 256 training return: tensor(-666.4106, device='cuda:0')
episode: 257 training return: tensor(-613.7590, device='cuda:0')
episode: 258 training return: tensor(33.9079, device='cuda:0')
episode: 259 training return: tensor(-116.6911, device='cuda:0')
epoch: 65 test_true_pfm: 3225.902956050523 sim_pfm: 16.075156139607618
episode: 260 training return: tensor(38.2657, device='cuda:0')
episode: 261 training return: tensor(-378.3419, device='cuda:0')
episode: 262 training return: tensor(-406.3753, device='cuda:0')
episode: 263 training return: tensor(-551.2636, device='cuda:0')
epoch: 66 test_true_pfm: 3226.6887515610924 sim_pfm: 10.590569009577544
episode: 264 training return: tensor(28.7063, device='cuda:0')
episode: 265 training return: tensor(-560.5455, device='cuda:0')
episode: 266 training return: tensor(-611.1740, device='cuda:0')
episode: 267 training return: tensor(3.3023, device='cuda:0')
epoch: 67 test_true_pfm: 2933.7707017166754 sim_pfm: 9.708776935236529
episode: 268 training return: tensor(-219.1108, device='cuda:0')
episode: 269 training return: tensor(6.2184, device='cuda:0')
episode: 270 training return: tensor(-236.1964, device='cuda:0')
episode: 271 training return: tensor(29.0042, device='cuda:0')
epoch: 68 test_true_pfm: 3236.462236844982 sim_pfm: 14.827513972549545
episode: 272 training return: tensor(-400.3967, device='cuda:0')
episode: 273 training return: tensor(-673.2656, device='cuda:0')
episode: 274 training return: tensor(-468.5695, device='cuda:0')
episode: 275 training return: tensor(-14.8398, device='cuda:0')
epoch: 69 test_true_pfm: 3273.147776427115 sim_pfm: 13.567201343985895
episode: 276 training return: tensor(-657.6523, device='cuda:0')
episode: 277 training return: tensor(-569.1507, device='cuda:0')
episode: 278 training return: tensor(19.8455, device='cuda:0')
episode: 279 training return: tensor(9.7828, device='cuda:0')
epoch: 70 test_true_pfm: 3257.5808378463485 sim_pfm: 16.44902483347687
episode: 280 training return: tensor(-22.0986, device='cuda:0')
episode: 281 training return: tensor(21.8192, device='cuda:0')
episode: 282 training return: tensor(-372.8555, device='cuda:0')
episode: 283 training return: tensor(-237.8989, device='cuda:0')
epoch: 71 test_true_pfm: 3243.684577254966 sim_pfm: 55.71141149534378
episode: 284 training return: tensor(-252.2383, device='cuda:0')
episode: 285 training return: tensor(35.8431, device='cuda:0')
episode: 286 training return: tensor(101.2179, device='cuda:0')
episode: 287 training return: tensor(12.2634, device='cuda:0')
epoch: 72 test_true_pfm: 3236.738341376846 sim_pfm: 16.808864772426507
episode: 288 training return: tensor(-48.5981, device='cuda:0')
episode: 289 training return: tensor(14.9596, device='cuda:0')
episode: 290 training return: tensor(13.9301, device='cuda:0')
episode: 291 training return: tensor(13.1809, device='cuda:0')
epoch: 73 test_true_pfm: 3236.9653233647446 sim_pfm: 14.503044182405574
episode: 292 training return: tensor(59.3099, device='cuda:0')
episode: 293 training return: tensor(31.1782, device='cuda:0')
episode: 294 training return: tensor(-573.5997, device='cuda:0')
episode: 295 training return: tensor(-523.6716, device='cuda:0')
epoch: 74 test_true_pfm: 3255.356856027083 sim_pfm: 26.36498192862685
episode: 296 training return: tensor(84.3930, device='cuda:0')
episode: 297 training return: tensor(-54.7780, device='cuda:0')
episode: 298 training return: tensor(-656.9412, device='cuda:0')
episode: 299 training return: tensor(-3.5925, device='cuda:0')
epoch: 75 test_true_pfm: 3247.0691982213775 sim_pfm: 36.359697600428866
episode: 300 training return: tensor(0.4431, device='cuda:0')
episode: 301 training return: tensor(28.3394, device='cuda:0')
episode: 302 training return: tensor(60.3423, device='cuda:0')
episode: 303 training return: tensor(-683.3727, device='cuda:0')
epoch: 76 test_true_pfm: 3246.71530018967 sim_pfm: 7.8462102716148365
episode: 304 training return: tensor(25.9374, device='cuda:0')
episode: 305 training return: tensor(-643.4539, device='cuda:0')
episode: 306 training return: tensor(60.3620, device='cuda:0')
episode: 307 training return: tensor(-34.7470, device='cuda:0')
epoch: 77 test_true_pfm: 3233.787403919479 sim_pfm: 25.48076940109604
episode: 308 training return: tensor(60.1646, device='cuda:0')
episode: 309 training return: tensor(33.3289, device='cuda:0')
episode: 310 training return: tensor(-637.1906, device='cuda:0')
episode: 311 training return: tensor(-168.9675, device='cuda:0')
epoch: 78 test_true_pfm: 3145.837483456433 sim_pfm: -26.64667812428282
episode: 312 training return: tensor(-325.6361, device='cuda:0')
episode: 313 training return: tensor(15.6091, device='cuda:0')
episode: 314 training return: tensor(-341.2159, device='cuda:0')
episode: 315 training return: tensor(11.5620, device='cuda:0')
epoch: 79 test_true_pfm: 3228.4360026831564 sim_pfm: 17.580177431654494
episode: 316 training return: tensor(-488.1063, device='cuda:0')
episode: 317 training return: tensor(123.6620, device='cuda:0')
episode: 318 training return: tensor(16.3493, device='cuda:0')
episode: 319 training return: tensor(-342.2443, device='cuda:0')
epoch: 80 test_true_pfm: 3249.229064698233 sim_pfm: 9.381171667715535
episode: 320 training return: tensor(46.0487, device='cuda:0')
episode: 321 training return: tensor(-100.8295, device='cuda:0')
episode: 322 training return: tensor(-651.9412, device='cuda:0')
episode: 323 training return: tensor(-11.3972, device='cuda:0')
epoch: 81 test_true_pfm: 3241.611785721378 sim_pfm: 21.150267468959402
episode: 324 training return: tensor(24.5582, device='cuda:0')
episode: 325 training return: tensor(-246.5751, device='cuda:0')
episode: 326 training return: tensor(65.7202, device='cuda:0')
episode: 327 training return: tensor(41.3595, device='cuda:0')
epoch: 82 test_true_pfm: 3265.4534105374055 sim_pfm: 12.681709502561716
episode: 328 training return: tensor(-5.3734, device='cuda:0')
episode: 329 training return: tensor(-676.8228, device='cuda:0')
episode: 330 training return: tensor(-656.4552, device='cuda:0')
episode: 331 training return: tensor(-381.3405, device='cuda:0')
epoch: 83 test_true_pfm: 3250.7269555839152 sim_pfm: 35.89908545663153
episode: 332 training return: tensor(-13.5217, device='cuda:0')
episode: 333 training return: tensor(30.5971, device='cuda:0')
episode: 334 training return: tensor(40.9999, device='cuda:0')
episode: 335 training return: tensor(10.7938, device='cuda:0')
epoch: 84 test_true_pfm: 3249.6839292781856 sim_pfm: 43.93864762840288
episode: 336 training return: tensor(53.0395, device='cuda:0')
episode: 337 training return: tensor(-68.4311, device='cuda:0')
episode: 338 training return: tensor(-147.3017, device='cuda:0')
episode: 339 training return: tensor(-147.0410, device='cuda:0')
epoch: 85 test_true_pfm: 3243.5299239878223 sim_pfm: 50.91624700229537
episode: 340 training return: tensor(6.2181, device='cuda:0')
episode: 341 training return: tensor(-389.3820, device='cuda:0')
episode: 342 training return: tensor(-297.4104, device='cuda:0')
episode: 343 training return: tensor(-646.5488, device='cuda:0')
epoch: 86 test_true_pfm: 3273.8043677334886 sim_pfm: 11.038027879141737
episode: 344 training return: tensor(-685.4302, device='cuda:0')
episode: 345 training return: tensor(-678.9839, device='cuda:0')
episode: 346 training return: tensor(113.0488, device='cuda:0')
episode: 347 training return: tensor(-111.5537, device='cuda:0')
epoch: 87 test_true_pfm: 3231.0898780734765 sim_pfm: 6.400150048701714
episode: 348 training return: tensor(-291.5766, device='cuda:0')
episode: 349 training return: tensor(43.2117, device='cuda:0')
episode: 350 training return: tensor(-99.0899, device='cuda:0')
episode: 351 training return: tensor(-2.8159, device='cuda:0')
epoch: 88 test_true_pfm: 3223.887237204776 sim_pfm: 40.79288845833313
episode: 352 training return: tensor(-613.0140, device='cuda:0')
episode: 353 training return: tensor(122.8245, device='cuda:0')
episode: 354 training return: tensor(24.7454, device='cuda:0')
episode: 355 training return: tensor(-16.5169, device='cuda:0')
epoch: 89 test_true_pfm: 3247.7796937266344 sim_pfm: 14.548495015352577
episode: 356 training return: tensor(-476.5669, device='cuda:0')
episode: 357 training return: tensor(-15.4526, device='cuda:0')
episode: 358 training return: tensor(-311.2647, device='cuda:0')
episode: 359 training return: tensor(55.6588, device='cuda:0')
epoch: 90 test_true_pfm: 3277.8626486691705 sim_pfm: 1.488517519746286
episode: 360 training return: tensor(-658.4808, device='cuda:0')
episode: 361 training return: tensor(-213.2933, device='cuda:0')
episode: 362 training return: tensor(-144.4848, device='cuda:0')
episode: 363 training return: tensor(46.3298, device='cuda:0')
epoch: 91 test_true_pfm: 3225.643898547672 sim_pfm: -9.890666130076474
episode: 364 training return: tensor(-595.2392, device='cuda:0')
episode: 365 training return: tensor(-676.1660, device='cuda:0')
episode: 366 training return: tensor(-25.1476, device='cuda:0')
episode: 367 training return: tensor(12.0756, device='cuda:0')
epoch: 92 test_true_pfm: 2562.2692589458984 sim_pfm: -82.89608449549026
episode: 368 training return: tensor(-227.8625, device='cuda:0')
episode: 369 training return: tensor(28.2877, device='cuda:0')
episode: 370 training return: tensor(-495.9611, device='cuda:0')
episode: 371 training return: tensor(-25.6290, device='cuda:0')
epoch: 93 test_true_pfm: 2779.08341755458 sim_pfm: -76.09844669129234
episode: 372 training return: tensor(-14.4479, device='cuda:0')
episode: 373 training return: tensor(-11.6420, device='cuda:0')
episode: 374 training return: tensor(-408.6456, device='cuda:0')
episode: 375 training return: tensor(-507.1635, device='cuda:0')
epoch: 94 test_true_pfm: 3231.8070076417953 sim_pfm: 26.1956154583216
episode: 376 training return: tensor(16.9411, device='cuda:0')
episode: 377 training return: tensor(-627.7412, device='cuda:0')
episode: 378 training return: tensor(-637.8896, device='cuda:0')
episode: 379 training return: tensor(-383.6314, device='cuda:0')
epoch: 95 test_true_pfm: 3128.828986768896 sim_pfm: 49.579338750064686
episode: 380 training return: tensor(-413.3795, device='cuda:0')
episode: 381 training return: tensor(27.0411, device='cuda:0')
episode: 382 training return: tensor(-639.0183, device='cuda:0')
episode: 383 training return: tensor(100.0675, device='cuda:0')
epoch: 96 test_true_pfm: 3254.4210965692414 sim_pfm: 40.360494411715386
episode: 384 training return: tensor(-9.0242, device='cuda:0')
episode: 385 training return: tensor(-18.3573, device='cuda:0')
episode: 386 training return: tensor(-409.9319, device='cuda:0')
episode: 387 training return: tensor(-601.1876, device='cuda:0')
epoch: 97 test_true_pfm: 3139.7551077341727 sim_pfm: 30.627825424802722
episode: 388 training return: tensor(79.5731, device='cuda:0')
episode: 389 training return: tensor(69.5407, device='cuda:0')
episode: 390 training return: tensor(14.9229, device='cuda:0')
episode: 391 training return: tensor(5.1302, device='cuda:0')
epoch: 98 test_true_pfm: 3241.376032406973 sim_pfm: 19.551248083250055
episode: 392 training return: tensor(-385.9679, device='cuda:0')
episode: 393 training return: tensor(-2.3870, device='cuda:0')
episode: 394 training return: tensor(-49.6126, device='cuda:0')
episode: 395 training return: tensor(-416.4805, device='cuda:0')
epoch: 99 test_true_pfm: 1928.710776068631 sim_pfm: -115.4705905095713
episode: 396 training return: tensor(18.3280, device='cuda:0')
episode: 397 training return: tensor(-291.8145, device='cuda:0')
episode: 398 training return: tensor(69.3058, device='cuda:0')
episode: 399 training return: tensor(-569.4739, device='cuda:0')
epoch: 100 test_true_pfm: 3234.6308882657754 sim_pfm: -0.9905401576349201
episode: 400 training return: tensor(23.3748, device='cuda:0')
episode: 401 training return: tensor(32.5465, device='cuda:0')
episode: 402 training return: tensor(-675.3574, device='cuda:0')
episode: 403 training return: tensor(86.4768, device='cuda:0')
epoch: 101 test_true_pfm: 3288.967063982504 sim_pfm: -138.57700867483314
episode: 404 training return: tensor(106.1436, device='cuda:0')
episode: 405 training return: tensor(-685.2040, device='cuda:0')
episode: 406 training return: tensor(27.4639, device='cuda:0')
episode: 407 training return: tensor(-519.5140, device='cuda:0')
epoch: 102 test_true_pfm: 3274.7613511148916 sim_pfm: 63.867205341754016
episode: 408 training return: tensor(139.6582, device='cuda:0')
episode: 409 training return: tensor(-674.8795, device='cuda:0')
episode: 410 training return: tensor(68.7884, device='cuda:0')
episode: 411 training return: tensor(-672.5383, device='cuda:0')
epoch: 103 test_true_pfm: 2767.3681468427353 sim_pfm: 57.63572980010455
episode: 412 training return: tensor(68.6670, device='cuda:0')
episode: 413 training return: tensor(103.3765, device='cuda:0')
episode: 414 training return: tensor(-57.8377, device='cuda:0')
episode: 415 training return: tensor(-510.8638, device='cuda:0')
epoch: 104 test_true_pfm: 2919.148067602284 sim_pfm: 60.645056522102095
episode: 416 training return: tensor(-467.4809, device='cuda:0')
episode: 417 training return: tensor(23.5157, device='cuda:0')
episode: 418 training return: tensor(-562.8650, device='cuda:0')
episode: 419 training return: tensor(-612.3966, device='cuda:0')
epoch: 105 test_true_pfm: 2724.0935075945786 sim_pfm: 4.757625822288294
episode: 420 training return: tensor(-4.2009, device='cuda:0')
episode: 421 training return: tensor(-615.6089, device='cuda:0')
episode: 422 training return: tensor(-588.1977, device='cuda:0')
episode: 423 training return: tensor(-154.8744, device='cuda:0')
epoch: 106 test_true_pfm: 3244.762292349136 sim_pfm: 15.446231550032584
episode: 424 training return: tensor(13.9673, device='cuda:0')
episode: 425 training return: tensor(26.7759, device='cuda:0')
episode: 426 training return: tensor(25.7555, device='cuda:0')
episode: 427 training return: tensor(15.2144, device='cuda:0')
epoch: 107 test_true_pfm: 2752.128883085095 sim_pfm: -111.89869077206822
episode: 428 training return: tensor(4.3775, device='cuda:0')
episode: 429 training return: tensor(-583.5955, device='cuda:0')
episode: 430 training return: tensor(-128.7373, device='cuda:0')
episode: 431 training return: tensor(-657.0341, device='cuda:0')
epoch: 108 test_true_pfm: 3256.502959633926 sim_pfm: 53.65833817536865
episode: 432 training return: tensor(-605.6071, device='cuda:0')
episode: 433 training return: tensor(-679.0474, device='cuda:0')
episode: 434 training return: tensor(60.4684, device='cuda:0')
episode: 435 training return: tensor(54.5451, device='cuda:0')
epoch: 109 test_true_pfm: 3254.779834653238 sim_pfm: 44.249246097557865
episode: 436 training return: tensor(30.1803, device='cuda:0')
episode: 437 training return: tensor(-142.0958, device='cuda:0')
episode: 438 training return: tensor(-170.5151, device='cuda:0')
episode: 439 training return: tensor(-609.7615, device='cuda:0')
epoch: 110 test_true_pfm: 2350.907798747319 sim_pfm: -380.1892014764987
episode: 440 training return: tensor(-72.0485, device='cuda:0')
episode: 441 training return: tensor(9.9641, device='cuda:0')
episode: 442 training return: tensor(24.2309, device='cuda:0')
episode: 443 training return: tensor(-71.7937, device='cuda:0')
epoch: 111 test_true_pfm: 3232.541074425935 sim_pfm: 49.848739553039195
episode: 444 training return: tensor(-16.2100, device='cuda:0')
episode: 445 training return: tensor(48.9403, device='cuda:0')
episode: 446 training return: tensor(-239.8894, device='cuda:0')
episode: 447 training return: tensor(-636.5809, device='cuda:0')
epoch: 112 test_true_pfm: 2626.561399990572 sim_pfm: -29.618541401267674
episode: 448 training return: tensor(36.4756, device='cuda:0')
episode: 449 training return: tensor(-117.2206, device='cuda:0')
episode: 450 training return: tensor(-207.8856, device='cuda:0')
episode: 451 training return: tensor(-610.2434, device='cuda:0')
epoch: 113 test_true_pfm: 2960.749915928073 sim_pfm: 18.751299448971015
episode: 452 training return: tensor(-422.7238, device='cuda:0')
episode: 453 training return: tensor(2.1885, device='cuda:0')
episode: 454 training return: tensor(-329.1564, device='cuda:0')
episode: 455 training return: tensor(-392.4881, device='cuda:0')
epoch: 114 test_true_pfm: 3226.0688482107585 sim_pfm: 44.77035914503116
episode: 456 training return: tensor(80.5457, device='cuda:0')
episode: 457 training return: tensor(-562.5997, device='cuda:0')
episode: 458 training return: tensor(-411.1513, device='cuda:0')
episode: 459 training return: tensor(-340.6478, device='cuda:0')
epoch: 115 test_true_pfm: 3265.88640334551 sim_pfm: 69.94172288564732
episode: 460 training return: tensor(-1.8083, device='cuda:0')
episode: 461 training return: tensor(21.8042, device='cuda:0')
episode: 462 training return: tensor(-500.1793, device='cuda:0')
episode: 463 training return: tensor(-401.4102, device='cuda:0')
epoch: 116 test_true_pfm: 3272.416983436275 sim_pfm: 39.26128384889065
episode: 464 training return: tensor(10.5859, device='cuda:0')
episode: 465 training return: tensor(-613.1976, device='cuda:0')
episode: 466 training return: tensor(42.7717, device='cuda:0')
episode: 467 training return: tensor(-3.1079, device='cuda:0')
epoch: 117 test_true_pfm: 3250.1106469763768 sim_pfm: 52.95824111097803
episode: 468 training return: tensor(36.8781, device='cuda:0')
episode: 469 training return: tensor(-243.2520, device='cuda:0')
episode: 470 training return: tensor(-592.3906, device='cuda:0')
episode: 471 training return: tensor(122.4616, device='cuda:0')
epoch: 118 test_true_pfm: 3266.1206499726322 sim_pfm: 41.08139474071019
episode: 472 training return: tensor(-633.9127, device='cuda:0')
episode: 473 training return: tensor(-579.3485, device='cuda:0')
episode: 474 training return: tensor(43.4003, device='cuda:0')
episode: 475 training return: tensor(16.2448, device='cuda:0')
epoch: 119 test_true_pfm: 2365.2652488600593 sim_pfm: 41.65984825984924
episode: 476 training return: tensor(-403.0789, device='cuda:0')
episode: 477 training return: tensor(19.1889, device='cuda:0')
episode: 478 training return: tensor(2.4279, device='cuda:0')
episode: 479 training return: tensor(-677.0427, device='cuda:0')
epoch: 120 test_true_pfm: 3254.46771621434 sim_pfm: 28.926076068610808
episode: 480 training return: tensor(-496.1110, device='cuda:0')
episode: 481 training return: tensor(-654.8761, device='cuda:0')
episode: 482 training return: tensor(-338.6760, device='cuda:0')
episode: 483 training return: tensor(-326.7099, device='cuda:0')
epoch: 121 test_true_pfm: 3266.9664842303737 sim_pfm: 23.172562302730512
episode: 484 training return: tensor(-237.4559, device='cuda:0')
episode: 485 training return: tensor(-429.9464, device='cuda:0')
episode: 486 training return: tensor(51.7567, device='cuda:0')
episode: 487 training return: tensor(-70.7352, device='cuda:0')
epoch: 122 test_true_pfm: 3262.9340982379035 sim_pfm: 57.90008661783455
episode: 488 training return: tensor(-561.2331, device='cuda:0')
episode: 489 training return: tensor(-89.3885, device='cuda:0')
episode: 490 training return: tensor(-152.6434, device='cuda:0')
episode: 491 training return: tensor(19.8514, device='cuda:0')
epoch: 123 test_true_pfm: 3266.563070911101 sim_pfm: 31.004786659536574
episode: 492 training return: tensor(-418.1235, device='cuda:0')
episode: 493 training return: tensor(-393.0565, device='cuda:0')
episode: 494 training return: tensor(-418.3347, device='cuda:0')
episode: 495 training return: tensor(-145.3148, device='cuda:0')
epoch: 124 test_true_pfm: 3230.2141950984733 sim_pfm: 4.36539421704947
episode: 496 training return: tensor(-519.1970, device='cuda:0')
episode: 497 training return: tensor(-4.7912, device='cuda:0')
episode: 498 training return: tensor(-653.3740, device='cuda:0')
episode: 499 training return: tensor(-402.5852, device='cuda:0')
epoch: 125 test_true_pfm: 3250.9556988705626 sim_pfm: 33.96281691823
episode: 500 training return: tensor(-185.4533, device='cuda:0')
episode: 501 training return: tensor(-571.4114, device='cuda:0')
episode: 502 training return: tensor(-639.8107, device='cuda:0')
episode: 503 training return: tensor(-79.4176, device='cuda:0')
epoch: 126 test_true_pfm: 3283.6676874891145 sim_pfm: 57.03519207262434
episode: 504 training return: tensor(32.1036, device='cuda:0')
episode: 505 training return: tensor(-480.9178, device='cuda:0')
episode: 506 training return: tensor(-401.8214, device='cuda:0')
episode: 507 training return: tensor(-416.2475, device='cuda:0')
epoch: 127 test_true_pfm: 2527.678205753169 sim_pfm: -78.37694502579204
episode: 508 training return: tensor(-58.4553, device='cuda:0')
episode: 509 training return: tensor(32.8345, device='cuda:0')
episode: 510 training return: tensor(-468.1198, device='cuda:0')
episode: 511 training return: tensor(88.9361, device='cuda:0')
epoch: 128 test_true_pfm: 2974.991906284869 sim_pfm: -100.48230573810481
episode: 512 training return: tensor(-334.1467, device='cuda:0')
episode: 513 training return: tensor(140.7076, device='cuda:0')
episode: 514 training return: tensor(-589.3560, device='cuda:0')
episode: 515 training return: tensor(14.4042, device='cuda:0')
epoch: 129 test_true_pfm: 2746.7162262822385 sim_pfm: 48.62367591737226
episode: 516 training return: tensor(28.3153, device='cuda:0')
episode: 517 training return: tensor(-351.8379, device='cuda:0')
episode: 518 training return: tensor(52.2291, device='cuda:0')
episode: 519 training return: tensor(2.4677, device='cuda:0')
epoch: 130 test_true_pfm: 2982.5965307421043 sim_pfm: 51.792370016803034
episode: 520 training return: tensor(-134.4471, device='cuda:0')
episode: 521 training return: tensor(-664.6912, device='cuda:0')
episode: 522 training return: tensor(-607.4300, device='cuda:0')
episode: 523 training return: tensor(-657.9033, device='cuda:0')
epoch: 131 test_true_pfm: 2221.889595476116 sim_pfm: 83.95041337518099
episode: 524 training return: tensor(-84.7972, device='cuda:0')
episode: 525 training return: tensor(105.2969, device='cuda:0')
episode: 526 training return: tensor(-3.3710, device='cuda:0')
episode: 527 training return: tensor(1.7925, device='cuda:0')
epoch: 132 test_true_pfm: 3152.07584608527 sim_pfm: 23.560147819962975
episode: 528 training return: tensor(48.6868, device='cuda:0')
episode: 529 training return: tensor(-678.4892, device='cuda:0')
episode: 530 training return: tensor(19.3384, device='cuda:0')
episode: 531 training return: tensor(-669.0598, device='cuda:0')
epoch: 133 test_true_pfm: 3167.0312369851326 sim_pfm: 53.256483155768365
episode: 532 training return: tensor(-476.0245, device='cuda:0')
episode: 533 training return: tensor(73.7447, device='cuda:0')
episode: 534 training return: tensor(-332.5583, device='cuda:0')
episode: 535 training return: tensor(36.2336, device='cuda:0')
epoch: 134 test_true_pfm: 3127.961209990201 sim_pfm: 6.189144469147625
episode: 536 training return: tensor(-410.0291, device='cuda:0')
episode: 537 training return: tensor(-22.1184, device='cuda:0')
episode: 538 training return: tensor(-364.7915, device='cuda:0')
episode: 539 training return: tensor(39.1400, device='cuda:0')
epoch: 135 test_true_pfm: 3291.144561034754 sim_pfm: 59.13164407490209
episode: 540 training return: tensor(29.4508, device='cuda:0')
episode: 541 training return: tensor(-589.8036, device='cuda:0')
episode: 542 training return: tensor(-585.8471, device='cuda:0')
episode: 543 training return: tensor(91.7205, device='cuda:0')
epoch: 136 test_true_pfm: 2688.851002819658 sim_pfm: 39.58612324883385
episode: 544 training return: tensor(-504.6118, device='cuda:0')
episode: 545 training return: tensor(-1.3057, device='cuda:0')
episode: 546 training return: tensor(-404.5962, device='cuda:0')
episode: 547 training return: tensor(-467.2955, device='cuda:0')
epoch: 137 test_true_pfm: 3246.8614503896574 sim_pfm: 15.341292931048278
episode: 548 training return: tensor(-180.6556, device='cuda:0')
episode: 549 training return: tensor(-355.0824, device='cuda:0')
episode: 550 training return: tensor(-6.5747, device='cuda:0')
episode: 551 training return: tensor(-649.5161, device='cuda:0')
epoch: 138 test_true_pfm: 3282.5882748768704 sim_pfm: -184.88319367092723
episode: 552 training return: tensor(-16.2469, device='cuda:0')
episode: 553 training return: tensor(-587.2402, device='cuda:0')
episode: 554 training return: tensor(-687.1622, device='cuda:0')
episode: 555 training return: tensor(62.7607, device='cuda:0')
epoch: 139 test_true_pfm: 2984.090852724989 sim_pfm: -66.58031888028684
episode: 556 training return: tensor(-75.8870, device='cuda:0')
episode: 557 training return: tensor(-612.4600, device='cuda:0')
episode: 558 training return: tensor(-673.5822, device='cuda:0')
episode: 559 training return: tensor(-516.7646, device='cuda:0')
epoch: 140 test_true_pfm: 3216.2523502642475 sim_pfm: -4.82256025216581
episode: 560 training return: tensor(-594.0400, device='cuda:0')
episode: 561 training return: tensor(-589.0402, device='cuda:0')
episode: 562 training return: tensor(99.9613, device='cuda:0')
episode: 563 training return: tensor(-323.6393, device='cuda:0')
epoch: 141 test_true_pfm: 3271.048309295115 sim_pfm: -115.46384059394283
episode: 564 training return: tensor(73.8107, device='cuda:0')
episode: 565 training return: tensor(-172.0427, device='cuda:0')
episode: 566 training return: tensor(-115.4435, device='cuda:0')
episode: 567 training return: tensor(-411.0118, device='cuda:0')
epoch: 142 test_true_pfm: 2314.459612625247 sim_pfm: -129.37456337657446
episode: 568 training return: tensor(-433.3242, device='cuda:0')
episode: 569 training return: tensor(-231.1893, device='cuda:0')
episode: 570 training return: tensor(-69.6869, device='cuda:0')
episode: 571 training return: tensor(-688.1819, device='cuda:0')
epoch: 143 test_true_pfm: 2743.7830298621047 sim_pfm: 26.810505638141574
episode: 572 training return: tensor(-336.4742, device='cuda:0')
episode: 573 training return: tensor(-498.4421, device='cuda:0')
episode: 574 training return: tensor(-325.4029, device='cuda:0')
episode: 575 training return: tensor(-596.2284, device='cuda:0')
epoch: 144 test_true_pfm: 3252.1385872617684 sim_pfm: 44.62738258064686
episode: 576 training return: tensor(-683.6568, device='cuda:0')
episode: 577 training return: tensor(-655.6284, device='cuda:0')
episode: 578 training return: tensor(-587.2081, device='cuda:0')
episode: 579 training return: tensor(86.1590, device='cuda:0')
epoch: 145 test_true_pfm: 2362.706902222566 sim_pfm: -227.99749542377927
episode: 580 training return: tensor(-337.9801, device='cuda:0')
episode: 581 training return: tensor(5.0017, device='cuda:0')
episode: 582 training return: tensor(124.8661, device='cuda:0')
episode: 583 training return: tensor(-362.0045, device='cuda:0')
epoch: 146 test_true_pfm: 3223.1634896369706 sim_pfm: 71.47425831790315
episode: 584 training return: tensor(-561.2165, device='cuda:0')
episode: 585 training return: tensor(-490.2098, device='cuda:0')
episode: 586 training return: tensor(-403.7126, device='cuda:0')
episode: 587 training return: tensor(120.7282, device='cuda:0')
epoch: 147 test_true_pfm: 2944.940885556291 sim_pfm: -68.65219620071973
episode: 588 training return: tensor(-486.5195, device='cuda:0')
episode: 589 training return: tensor(16.4110, device='cuda:0')
episode: 590 training return: tensor(-536.8147, device='cuda:0')
episode: 591 training return: tensor(-682.3318, device='cuda:0')
epoch: 148 test_true_pfm: 3272.4342815926507 sim_pfm: -77.57776580315355
episode: 592 training return: tensor(-383.8735, device='cuda:0')
episode: 593 training return: tensor(-371.6015, device='cuda:0')
episode: 594 training return: tensor(88.4582, device='cuda:0')
episode: 595 training return: tensor(-626.5172, device='cuda:0')
epoch: 149 test_true_pfm: 1342.5513648976548 sim_pfm: -449.415653867162
episode: 596 training return: tensor(-415.0275, device='cuda:0')
episode: 597 training return: tensor(57.8442, device='cuda:0')
episode: 598 training return: tensor(69.2059, device='cuda:0')
episode: 599 training return: tensor(18.3079, device='cuda:0')
epoch: 150 test_true_pfm: 3258.684585350846 sim_pfm: 14.08654618675549
