['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'behavior', '--traj', 'medium', '--seed', '7', '--data', '100000', '--regu', '0.2']
epoch: 0 training_loss 0.30832467779517175 test_loss: 0.20704755783081055
epoch: 1 training_loss 0.18046960286796093 test_loss: 0.14692813158035278
epoch: 2 training_loss 0.1504828631132841 test_loss: 0.1554482698440552
epoch: 3 training_loss 0.13814059570431708 test_loss: 0.12975419759750367
epoch: 4 training_loss 0.1278168461844325 test_loss: 0.1267237663269043
epoch: 5 training_loss 0.1307987515628338 test_loss: 0.13558878898620605
epoch: 6 training_loss 0.12211834918707609 test_loss: 0.13448489904403688
epoch: 7 training_loss 0.12158796634525061 test_loss: 0.14373302459716797
epoch: 8 training_loss 0.11940208684653043 test_loss: 0.1139703631401062
epoch: 9 training_loss 0.1194978516548872 test_loss: 0.10582542419433594
epoch: 10 training_loss 0.10780929137021303 test_loss: 0.10726305246353149
epoch: 11 training_loss 0.12138632914051413 test_loss: 0.11232132911682129
epoch: 12 training_loss 0.11010060094296932 test_loss: 0.12145346403121948
epoch: 13 training_loss 0.11648255135864019 test_loss: 0.1068803310394287
epoch: 14 training_loss 0.11206291018053889 test_loss: 0.1256717324256897
epoch: 15 training_loss 0.11347784608602524 test_loss: 0.107345712184906
epoch: 16 training_loss 0.11835893135517836 test_loss: 0.10031065940856934
epoch: 17 training_loss 0.11555905625224114 test_loss: 0.1328797698020935
epoch: 18 training_loss 0.11171376053243876 test_loss: 0.1107212781906128
epoch: 19 training_loss 0.10682042619213462 test_loss: 0.1008684515953064
epoch: 20 training_loss 0.11340357478708028 test_loss: 0.12115747928619384
epoch: 21 training_loss 0.10228095756843686 test_loss: 0.1361190438270569
epoch: 22 training_loss 0.11152016660198569 test_loss: 0.12432597875595093
epoch: 23 training_loss 0.10856521677225828 test_loss: 0.09289770126342774
epoch: 24 training_loss 0.11009363371878862 test_loss: 0.11787972450256348
epoch: 25 training_loss 0.10805534735321999 test_loss: 0.10241814851760864
epoch: 26 training_loss 0.10060706194490195 test_loss: 0.1127804160118103
epoch: 27 training_loss 0.11494273256510495 test_loss: 0.11284847259521484
epoch: 28 training_loss 0.11269829634577036 test_loss: 0.12539539337158204
epoch: 29 training_loss 0.11719800926744937 test_loss: 0.14000144004821777
epoch: 30 training_loss 0.10736848058179022 test_loss: 0.10036475658416748
epoch: 31 training_loss 0.10575057726353407 test_loss: 0.12425503730773926
epoch: 32 training_loss 0.10948032781481742 test_loss: 0.12154583930969239
epoch: 33 training_loss 0.11408828165382147 test_loss: 0.11379410028457641
epoch: 34 training_loss 0.1012635263800621 test_loss: 0.09689709544181824
epoch: 35 training_loss 0.09898596808314324 test_loss: 0.10974944829940796
epoch: 36 training_loss 0.11037800617516041 test_loss: 0.1271503448486328
epoch: 37 training_loss 0.10833076192066074 test_loss: 0.09869555234909058
epoch: 38 training_loss 0.10833080035634339 test_loss: 0.11348860263824463
epoch: 39 training_loss 0.11479155039414764 test_loss: 0.11628413200378418
epoch: 40 training_loss 0.10191810734570027 test_loss: 0.11047872304916381
epoch: 41 training_loss 0.1145914505980909 test_loss: 0.10509759187698364
epoch: 42 training_loss 0.10620924109593034 test_loss: 0.11369274854660034
epoch: 43 training_loss 0.11294366165995598 test_loss: 0.1031537413597107
epoch: 44 training_loss 0.105516172144562 test_loss: 0.0883399248123169
epoch: 45 training_loss 0.10811906646937132 test_loss: 0.10233330726623535
epoch: 46 training_loss 0.10387291956692934 test_loss: 0.11134601831436157
epoch: 47 training_loss 0.1047258274257183 test_loss: 0.12087302207946778
epoch: 48 training_loss 0.10605746369808912 test_loss: 0.09893699288368225
epoch: 49 training_loss 0.11187275972217321 test_loss: 0.10373753309249878
epoch: 50 training_loss 0.10143658336251975 test_loss: 0.11942170858383179
epoch: 51 training_loss 0.11121034320443869 test_loss: 0.10158034563064575
epoch: 52 training_loss 0.10610896237194538 test_loss: 0.1169488787651062
epoch: 53 training_loss 0.10412130180746317 test_loss: 0.1312796950340271
epoch: 54 training_loss 0.11117172248661518 test_loss: 0.1041562557220459
epoch: 55 training_loss 0.10170436747372151 test_loss: 0.10651657581329346
epoch: 56 training_loss 0.10721178978681564 test_loss: 0.10515172481536865
epoch: 57 training_loss 0.10316301042214036 test_loss: 0.10410135984420776
epoch: 58 training_loss 0.10679105129092932 test_loss: 0.09851387739181519
epoch: 59 training_loss 0.10307559139095247 test_loss: 0.11456938982009887
epoch: 60 training_loss 0.10777416434139013 test_loss: 0.10028668642044067
epoch: 61 training_loss 0.10751603281125427 test_loss: 0.12029566764831542
epoch: 62 training_loss 0.10291370946913958 test_loss: 0.12721140384674073
epoch: 63 training_loss 0.10750615175813437 test_loss: 0.0929760217666626
epoch: 64 training_loss 0.1079329059459269 test_loss: 0.08594070076942444
epoch: 65 training_loss 0.11325335025787353 test_loss: 0.10705480575561524
epoch: 66 training_loss 0.10602965798228979 test_loss: 0.10516811609268188
epoch: 67 training_loss 0.10784462196752428 test_loss: 0.09834704399108887
epoch: 68 training_loss 0.10266386575996876 test_loss: 0.11729327440261841
epoch: 69 training_loss 0.09998178482055664 test_loss: 0.11401419639587403
epoch: 70 training_loss 0.10040586797520518 test_loss: 0.10198763608932496
epoch: 71 training_loss 0.11149457365274429 test_loss: 0.12281324863433837
epoch: 72 training_loss 0.10908767588436603 test_loss: 0.10374454259872437
epoch: 73 training_loss 0.10494007917121052 test_loss: 0.10260554552078247
epoch: 74 training_loss 0.1107883427478373 test_loss: 0.11488571166992187
epoch: 75 training_loss 0.10992082387208939 test_loss: 0.10022668838500977
epoch: 76 training_loss 0.09829500734806061 test_loss: 0.0986519694328308
epoch: 77 training_loss 0.10684190200641751 test_loss: 0.08846753239631652
epoch: 78 training_loss 0.10571565560996532 test_loss: 0.09535366296768188
epoch: 79 training_loss 0.108131053596735 test_loss: 0.12115786075592042
epoch: 80 training_loss 0.09661826733499765 test_loss: 0.11076438426971436
epoch: 81 training_loss 0.10727584086358548 test_loss: 0.10454034805297852
epoch: 82 training_loss 0.1061261840350926 test_loss: 0.09093948006629944
epoch: 83 training_loss 0.1074376616254449 test_loss: 0.10603576898574829
epoch: 84 training_loss 0.10378163535147905 test_loss: 0.1014866828918457
epoch: 85 training_loss 0.10180275635793805 test_loss: 0.13068928718566894
epoch: 86 training_loss 0.10400459486991168 test_loss: 0.10173952579498291
epoch: 87 training_loss 0.09794086238369346 test_loss: 0.10175201892852784
epoch: 88 training_loss 0.10738258531317114 test_loss: 0.10860626697540283
epoch: 89 training_loss 0.09983056623488665 test_loss: 0.1199649691581726
epoch: 90 training_loss 0.10535656295716762 test_loss: 0.11022695302963256
epoch: 91 training_loss 0.10585490372031928 test_loss: 0.10566848516464233
epoch: 92 training_loss 0.10484931569546461 test_loss: 0.1036860466003418
epoch: 93 training_loss 0.09986854929476977 test_loss: 0.09939695000648499
epoch: 94 training_loss 0.10598836086690426 test_loss: 0.09997299313545227
epoch: 95 training_loss 0.1083613109216094 test_loss: 0.09889674782752991
epoch: 96 training_loss 0.09416184447705746 test_loss: 0.09586294889450073
epoch: 97 training_loss 0.10608736326918006 test_loss: 0.10595673322677612
epoch: 98 training_loss 0.10091687768697738 test_loss: 0.11253151893615723
epoch: 99 training_loss 0.1029480779543519 test_loss: 0.1088895320892334
epoch: 100 training_loss 0.09918372001498937 test_loss: 0.10945228338241578
epoch: 101 training_loss 0.10631788337603211 test_loss: 0.11323275566101074
epoch: 102 training_loss 0.09961922019720078 test_loss: 0.10192898511886597
epoch: 103 training_loss 0.1038655336946249 test_loss: 0.10899548530578614
epoch: 104 training_loss 0.10861290151253343 test_loss: 0.09159277677536011
epoch: 105 training_loss 0.10752136688679456 test_loss: 0.10273399353027343
epoch: 106 training_loss 0.09190829735249281 test_loss: 0.10728497505187988
epoch: 107 training_loss 0.1079889278486371 test_loss: 0.1226554274559021
epoch: 108 training_loss 0.10364454228430986 test_loss: 0.0982084572315216
epoch: 109 training_loss 0.11218703333288431 test_loss: 0.10099838972091675
epoch: 110 training_loss 0.10595293968915939 test_loss: 0.09955689907073975
epoch: 111 training_loss 0.11524484006687999 test_loss: 0.11075530052185059
epoch: 112 training_loss 0.1092139757424593 test_loss: 0.10290229320526123
epoch: 113 training_loss 0.10620691208168864 test_loss: 0.09814727902412415
epoch: 114 training_loss 0.10303488602861761 test_loss: 0.10563867092132569
epoch: 115 training_loss 0.10363683624193072 test_loss: 0.10917707681655883
epoch: 116 training_loss 0.11144129673019051 test_loss: 0.10824174880981445
epoch: 117 training_loss 0.09910092143341899 test_loss: 0.11670012474060058
epoch: 118 training_loss 0.09554461034014822 test_loss: 0.12160202264785766
epoch: 119 training_loss 0.10660442650318146 test_loss: 0.0943880021572113
epoch: 120 training_loss 0.10402249526232481 test_loss: 0.12040401697158813
epoch: 121 training_loss 0.1019105302169919 test_loss: 0.1060904622077942
epoch: 122 training_loss 0.11414391424506903 test_loss: 0.11596795320510864
epoch: 123 training_loss 0.1109906217828393 test_loss: 0.10627379417419433
epoch: 124 training_loss 0.10674415420740843 test_loss: 0.11757041215896606
epoch: 125 training_loss 0.10712884806096554 test_loss: 0.11017630100250245
epoch: 126 training_loss 0.10399214580655097 test_loss: 0.09359979033470153
epoch: 127 training_loss 0.10207560461014509 test_loss: 0.10656265020370484
epoch: 128 training_loss 0.1046535661816597 test_loss: 0.0947355568408966
epoch: 129 training_loss 0.10220791725441813 test_loss: 0.09876797795295715
epoch: 130 training_loss 0.10747332520782947 test_loss: 0.09208730459213257
epoch: 131 training_loss 0.10137134078890085 test_loss: 0.10784775018692017
epoch: 132 training_loss 0.10361086424440145 test_loss: 0.10491687059402466
epoch: 133 training_loss 0.09767809867858887 test_loss: 0.10811777114868164
epoch: 134 training_loss 0.10316203848458827 test_loss: 0.1021049976348877
epoch: 135 training_loss 0.09910757586359978 test_loss: 0.11344962120056153
epoch: 136 training_loss 0.10330353729426861 test_loss: 0.11060227155685425
epoch: 137 training_loss 0.0960670425184071 test_loss: 0.10631974935531616
epoch: 138 training_loss 0.1088705575093627 test_loss: 0.10616581439971924
epoch: 139 training_loss 0.09638339065015317 test_loss: 0.09755933284759521
epoch: 140 training_loss 0.10338823841884733 test_loss: 0.10622900724411011
epoch: 141 training_loss 0.10472915075719356 test_loss: 0.09315223693847656
epoch: 142 training_loss 0.11154491882771253 test_loss: 0.11960675716400146
epoch: 143 training_loss 0.10047557037323714 test_loss: 0.1037206768989563
epoch: 144 training_loss 0.10379468686878682 test_loss: 0.10756680965423585
epoch: 145 training_loss 0.10010576776228845 test_loss: 0.09244373440742493
epoch: 146 training_loss 0.10452907914295792 test_loss: 0.09010481238365173
epoch: 147 training_loss 0.10852268047630786 test_loss: 0.10052480697631835
epoch: 148 training_loss 0.0973366892337799 test_loss: 0.09256662130355835
epoch: 149 training_loss 0.10033216681331396 test_loss: 0.11270341873168946
epoch: 0 training_loss 54.10380865097046 test_loss: 26.118756103515626
epoch: 1 training_loss 18.944807367324827 test_loss: 15.359652709960937
epoch: 2 training_loss 13.526458024978638 test_loss: 11.861551666259766
epoch: 3 training_loss 10.791687231063843 test_loss: 9.5866455078125
epoch: 4 training_loss 8.820090169906615 test_loss: 8.168408966064453
epoch: 5 training_loss 7.587865672111511 test_loss: 7.139065551757812
epoch: 6 training_loss 6.620388722419738 test_loss: 6.179052734375
epoch: 7 training_loss 5.834147424697876 test_loss: 5.7603096008300785
epoch: 8 training_loss 5.354054403305054 test_loss: 4.9783283233642575
epoch: 9 training_loss 4.793255486488342 test_loss: 4.509869384765625
epoch: 10 training_loss 4.505256721973419 test_loss: 4.29694709777832
epoch: 11 training_loss 4.089091637134552 test_loss: 3.9933330535888674
epoch: 12 training_loss 4.036738197803498 test_loss: 3.904753875732422
epoch: 13 training_loss 3.7319402742385863 test_loss: 3.7663124084472654
epoch: 14 training_loss 3.5442820048332213 test_loss: 3.665949249267578
epoch: 15 training_loss 3.5625823831558225 test_loss: 3.413036346435547
epoch: 16 training_loss 3.3818470072746276 test_loss: 3.4662132263183594
epoch: 17 training_loss 3.314719166755676 test_loss: 3.192705535888672
epoch: 18 training_loss 3.077066147327423 test_loss: 3.3698345184326173
epoch: 19 training_loss 3.029468514919281 test_loss: 3.0189123153686523
epoch: 20 training_loss 2.992632167339325 test_loss: 3.124180030822754
epoch: 21 training_loss 2.9084243535995484 test_loss: 2.9688179016113283
epoch: 22 training_loss 2.850625853538513 test_loss: 2.7464811325073244
epoch: 23 training_loss 2.829617874622345 test_loss: 2.839433288574219
epoch: 24 training_loss 2.6931881952285766 test_loss: 2.8208972930908205
epoch: 25 training_loss 2.679301691055298 test_loss: 2.5940895080566406
epoch: 26 training_loss 2.6307841396331786 test_loss: 2.5256967544555664
epoch: 27 training_loss 2.6121970224380493 test_loss: 2.5455623626708985
epoch: 28 training_loss 2.468967776298523 test_loss: 2.516267776489258
epoch: 29 training_loss 2.520103852748871 test_loss: 2.4365718841552733
epoch: 30 training_loss 2.5110896110534666 test_loss: 2.436422348022461
epoch: 31 training_loss 2.4353405046463013 test_loss: 2.514214515686035
epoch: 32 training_loss 2.428132698535919 test_loss: 2.3933380126953123
epoch: 33 training_loss 2.3264664721488955 test_loss: 2.2639739990234373
epoch: 34 training_loss 2.2975870287418365 test_loss: 2.477799987792969
epoch: 35 training_loss 2.3596753919124605 test_loss: 2.2889951705932616
epoch: 36 training_loss 2.3078471171855925 test_loss: 2.2561357498168944
epoch: 37 training_loss 2.2551357984542846 test_loss: 2.371788215637207
epoch: 38 training_loss 2.2377601754665375 test_loss: 2.1645151138305665
epoch: 39 training_loss 2.206399600505829 test_loss: 2.164226531982422
epoch: 40 training_loss 2.1731364870071412 test_loss: 2.26027774810791
epoch: 41 training_loss 2.126603219509125 test_loss: 2.189783477783203
epoch: 42 training_loss 2.139631726741791 test_loss: 2.1326271057128907
epoch: 43 training_loss 2.145258378982544 test_loss: 2.1264036178588865
epoch: 44 training_loss 2.10691969037056 test_loss: 2.2234521865844727
epoch: 45 training_loss 2.070044301748276 test_loss: 2.117698860168457
epoch: 46 training_loss 2.040388196706772 test_loss: 2.0968175888061524
epoch: 47 training_loss 2.0914118885993958 test_loss: 2.0191013336181642
epoch: 48 training_loss 2.024228972196579 test_loss: 1.992665672302246
epoch: 49 training_loss 2.0298916375637055 test_loss: 2.021718406677246
epoch: 50 training_loss 2.010742552280426 test_loss: 2.039716911315918
epoch: 51 training_loss 1.9899706530570984 test_loss: 1.9406793594360352
epoch: 52 training_loss 1.9478342342376709 test_loss: 2.014988327026367
epoch: 53 training_loss 1.9423179721832275 test_loss: 1.9049016952514648
epoch: 54 training_loss 1.9584250783920287 test_loss: 1.9542528152465821
epoch: 55 training_loss 1.951450765132904 test_loss: 1.990443801879883
epoch: 56 training_loss 1.9117667114734649 test_loss: 2.0110260009765626
epoch: 57 training_loss 1.9452551865577699 test_loss: 1.9382965087890625
epoch: 58 training_loss 1.9035611963272094 test_loss: 1.9322343826293946
epoch: 59 training_loss 1.885687118768692 test_loss: 1.9740144729614257
epoch: 60 training_loss 1.8738594722747803 test_loss: 1.8761226654052734
epoch: 61 training_loss 1.87241095662117 test_loss: 1.8559715270996093
epoch: 62 training_loss 1.85656672000885 test_loss: 1.8429315567016602
epoch: 63 training_loss 1.8275947320461272 test_loss: 1.835377311706543
epoch: 64 training_loss 1.836490545272827 test_loss: 1.9640705108642578
epoch: 65 training_loss 1.790826153755188 test_loss: 1.8457696914672852
epoch: 66 training_loss 1.8681031739711762 test_loss: 1.951003074645996
epoch: 67 training_loss 1.8079181122779846 test_loss: 1.7602935791015626
epoch: 68 training_loss 1.8283498358726502 test_loss: 1.842302131652832
epoch: 69 training_loss 1.7984002542495727 test_loss: 1.7710540771484375
epoch: 70 training_loss 1.7796688020229339 test_loss: 1.8372806549072265
epoch: 71 training_loss 1.774976898431778 test_loss: 1.9790878295898438
epoch: 72 training_loss 1.7838779807090759 test_loss: 1.7507818222045899
epoch: 73 training_loss 1.7458050787448882 test_loss: 1.8136075973510741
epoch: 74 training_loss 1.7661926364898681 test_loss: 1.8806793212890625
epoch: 75 training_loss 1.7671256935596467 test_loss: 1.7996248245239257
epoch: 76 training_loss 1.7241245877742768 test_loss: 1.712250328063965
epoch: 77 training_loss 1.7571231162548064 test_loss: 1.768185806274414
epoch: 78 training_loss 1.7090955257415772 test_loss: 1.7066360473632813
epoch: 79 training_loss 1.7246091592311859 test_loss: 1.7517391204833985
epoch: 80 training_loss 1.7031826174259186 test_loss: 1.7624902725219727
epoch: 81 training_loss 1.7536217427253724 test_loss: 1.707148551940918
epoch: 82 training_loss 1.6999859404563904 test_loss: 1.7097288131713868
epoch: 83 training_loss 1.741742125749588 test_loss: 1.7388206481933595
epoch: 84 training_loss 1.696617110967636 test_loss: 1.6575119018554687
epoch: 85 training_loss 1.6967774391174317 test_loss: 1.7326515197753907
epoch: 86 training_loss 1.6816688191890716 test_loss: 1.686595344543457
epoch: 87 training_loss 1.6636170268058776 test_loss: 1.7032363891601563
epoch: 88 training_loss 1.6840611374378205 test_loss: 1.760784912109375
epoch: 89 training_loss 1.6615548717975617 test_loss: 1.637347412109375
epoch: 90 training_loss 1.668323233127594 test_loss: 1.6640863418579102
epoch: 91 training_loss 1.6683494782447814 test_loss: 1.688631248474121
epoch: 92 training_loss 1.6641728794574737 test_loss: 1.6867767333984376
epoch: 93 training_loss 1.653923486471176 test_loss: 1.6025497436523437
epoch: 94 training_loss 1.663706098794937 test_loss: 1.6816446304321289
epoch: 95 training_loss 1.681810646057129 test_loss: 1.7198013305664062
epoch: 96 training_loss 1.6332174611091614 test_loss: 1.6103933334350586
epoch: 97 training_loss 1.6283970963954926 test_loss: 1.606574249267578
epoch: 98 training_loss 1.632612920999527 test_loss: 1.643315315246582
epoch: 99 training_loss 1.6173018395900727 test_loss: 1.6425243377685548
epoch: 100 training_loss 1.6709850072860717 test_loss: 1.5905258178710937
epoch: 101 training_loss 1.6198724377155305 test_loss: 1.6731832504272461
epoch: 102 training_loss 1.6002379190921783 test_loss: 1.6450477600097657
epoch: 103 training_loss 1.6017525935173034 test_loss: 1.6785648345947266
epoch: 104 training_loss 1.6012922573089599 test_loss: 1.6342500686645507
epoch: 105 training_loss 1.6061625611782073 test_loss: 1.681087112426758
epoch: 106 training_loss 1.629963583946228 test_loss: 1.649622917175293
epoch: 107 training_loss 1.55944540143013 test_loss: 1.6018531799316407
epoch: 108 training_loss 1.5788202047348023 test_loss: 1.5998780250549316
epoch: 109 training_loss 1.593341771364212 test_loss: 1.6311553955078124
epoch: 110 training_loss 1.595800631046295 test_loss: 1.6541114807128907
epoch: 111 training_loss 1.5622785151004792 test_loss: 1.6125186920166015
epoch: 112 training_loss 1.5800701582431793 test_loss: 1.608180046081543
epoch: 113 training_loss 1.5691498398780823 test_loss: 1.5816585540771484
epoch: 114 training_loss 1.565274692773819 test_loss: 1.5878786087036132
epoch: 115 training_loss 1.5599713945388793 test_loss: 1.644403839111328
epoch: 116 training_loss 1.5406841599941254 test_loss: 1.5314958572387696
epoch: 117 training_loss 1.5786135482788086 test_loss: 1.5196555137634278
epoch: 118 training_loss 1.577062063217163 test_loss: 1.5789024353027343
epoch: 119 training_loss 1.5653513550758362 test_loss: 1.5830960273742676
epoch: 120 training_loss 1.5695931088924409 test_loss: 1.584755802154541
epoch: 121 training_loss 1.5979675734043122 test_loss: 1.542825698852539
epoch: 122 training_loss 1.5537938582897186 test_loss: 1.5486811637878417
epoch: 123 training_loss 1.542758218050003 test_loss: 1.5879117965698242
epoch: 124 training_loss 1.5462826311588287 test_loss: 1.583017921447754
epoch: 125 training_loss 1.533693895339966 test_loss: 1.577747917175293
epoch: 126 training_loss 1.5279759991168975 test_loss: 1.5507682800292968
epoch: 127 training_loss 1.5267362737655639 test_loss: 1.534450626373291
epoch: 128 training_loss 1.517553926706314 test_loss: 1.5434250831604004
epoch: 129 training_loss 1.5175903964042663 test_loss: 1.5067148208618164
epoch: 130 training_loss 1.527770949602127 test_loss: 1.6417951583862305
epoch: 131 training_loss 1.5154870128631592 test_loss: 1.5566652297973633
epoch: 132 training_loss 1.5276585984230042 test_loss: 1.5003904342651366
epoch: 133 training_loss 1.50677925825119 test_loss: 1.5681503295898438
epoch: 134 training_loss 1.493303325176239 test_loss: 1.545248317718506
epoch: 135 training_loss 1.5030763280391692 test_loss: 1.565865707397461
epoch: 136 training_loss 1.5044960534572602 test_loss: 1.535239315032959
epoch: 137 training_loss 1.4863175535202027 test_loss: 1.6028364181518555
epoch: 138 training_loss 1.4871701085567475 test_loss: 1.5044675827026368
epoch: 139 training_loss 1.4985236787796021 test_loss: 1.510096263885498
epoch: 140 training_loss 1.5085568273067473 test_loss: 1.5005719184875488
epoch: 141 training_loss 1.507374804019928 test_loss: 1.4844965934753418
epoch: 142 training_loss 1.5204145991802216 test_loss: 1.4894062995910644
epoch: 143 training_loss 1.50204971909523 test_loss: 1.4989133834838868
epoch: 144 training_loss 1.490917031764984 test_loss: 1.4912990570068358
epoch: 145 training_loss 1.4970953595638274 test_loss: 1.5098641395568848
epoch: 146 training_loss 1.4823536968231201 test_loss: 1.532804584503174
epoch: 147 training_loss 1.4791675770282746 test_loss: 1.468153190612793
epoch: 148 training_loss 1.472641842365265 test_loss: 1.519247341156006
epoch: 149 training_loss 1.4565068256855012 test_loss: 1.4979517936706543
5090.891025792227
episode: 0 training return: tensor(-283.2713, device='cuda:0')
episode: 1 training return: tensor(-287.3763, device='cuda:0')
episode: 2 training return: tensor(-241.7322, device='cuda:0')
episode: 3 training return: tensor(-238.0228, device='cuda:0')
epoch: 1 test_true_pfm: 4910.065215472448 sim_pfm: 5.659387280136191
episode: 4 training return: tensor(-286.6093, device='cuda:0')
episode: 5 training return: tensor(-213.6693, device='cuda:0')
episode: 6 training return: tensor(-163.2529, device='cuda:0')
episode: 7 training return: tensor(-247.0482, device='cuda:0')
epoch: 2 test_true_pfm: 5036.67143886185 sim_pfm: -126.44028380171706
episode: 8 training return: tensor(-197.7937, device='cuda:0')
episode: 9 training return: tensor(-228.5213, device='cuda:0')
episode: 10 training return: tensor(-266.4408, device='cuda:0')
episode: 11 training return: tensor(-284.8887, device='cuda:0')
epoch: 3 test_true_pfm: 5001.325380912187 sim_pfm: 25.1478843287332
episode: 12 training return: tensor(-326.1780, device='cuda:0')
episode: 13 training return: tensor(-228.3772, device='cuda:0')
episode: 14 training return: tensor(-263.6625, device='cuda:0')
episode: 15 training return: tensor(-250.8430, device='cuda:0')
epoch: 4 test_true_pfm: 5180.206419340429 sim_pfm: -115.86789825817687
episode: 16 training return: tensor(-347.9822, device='cuda:0')
episode: 17 training return: tensor(-210.8318, device='cuda:0')
episode: 18 training return: tensor(-142.8163, device='cuda:0')
episode: 19 training return: tensor(-292.2200, device='cuda:0')
epoch: 5 test_true_pfm: 5225.050390973358 sim_pfm: 13.73505284592587
episode: 20 training return: tensor(-272.2686, device='cuda:0')
episode: 21 training return: tensor(-174.4619, device='cuda:0')
episode: 22 training return: tensor(-127.6380, device='cuda:0')
episode: 23 training return: tensor(-101.5528, device='cuda:0')
epoch: 6 test_true_pfm: 5096.184098500894 sim_pfm: -26.88641505907678
episode: 24 training return: tensor(-279.3410, device='cuda:0')
episode: 25 training return: tensor(-260.5735, device='cuda:0')
episode: 26 training return: tensor(-286.5705, device='cuda:0')
episode: 27 training return: tensor(-846.0105, device='cuda:0')
epoch: 7 test_true_pfm: 5177.732365000247 sim_pfm: 19.977533934793126
episode: 28 training return: tensor(-109.4918, device='cuda:0')
episode: 29 training return: tensor(-177.3135, device='cuda:0')
episode: 30 training return: tensor(-240.3293, device='cuda:0')
episode: 31 training return: tensor(-86.1957, device='cuda:0')
epoch: 8 test_true_pfm: 5209.651318235249 sim_pfm: -22.65405419069187
episode: 32 training return: tensor(-216.0583, device='cuda:0')
episode: 33 training return: tensor(-105.2169, device='cuda:0')
episode: 34 training return: tensor(-74.1604, device='cuda:0')
episode: 35 training return: tensor(-40.6468, device='cuda:0')
epoch: 9 test_true_pfm: 5132.627825525488 sim_pfm: 10.612512206367683
episode: 36 training return: tensor(-171.2758, device='cuda:0')
episode: 37 training return: tensor(-130.7282, device='cuda:0')
episode: 38 training return: tensor(-75.5477, device='cuda:0')
episode: 39 training return: tensor(-103.3050, device='cuda:0')
epoch: 10 test_true_pfm: 5232.301709361477 sim_pfm: 75.16121648585734
episode: 40 training return: tensor(-96.3261, device='cuda:0')
episode: 41 training return: tensor(-217.2307, device='cuda:0')
episode: 42 training return: tensor(-50.1509, device='cuda:0')
episode: 43 training return: tensor(-158.0090, device='cuda:0')
epoch: 11 test_true_pfm: 5406.936463417586 sim_pfm: 117.06355224798124
episode: 44 training return: tensor(-103.2943, device='cuda:0')
episode: 45 training return: tensor(-47.7452, device='cuda:0')
episode: 46 training return: tensor(-85.0767, device='cuda:0')
episode: 47 training return: tensor(5.6160, device='cuda:0')
epoch: 12 test_true_pfm: 5299.790514609943 sim_pfm: 75.91847305124004
episode: 48 training return: tensor(-55.9946, device='cuda:0')
episode: 49 training return: tensor(-188.6144, device='cuda:0')
episode: 50 training return: tensor(-98.2977, device='cuda:0')
episode: 51 training return: tensor(197.7829, device='cuda:0')
epoch: 13 test_true_pfm: 5289.260533803575 sim_pfm: 121.01925729537227
episode: 52 training return: tensor(-32.7018, device='cuda:0')
episode: 53 training return: tensor(-44.8799, device='cuda:0')
episode: 54 training return: tensor(-38.7635, device='cuda:0')
episode: 55 training return: tensor(-30.4174, device='cuda:0')
epoch: 14 test_true_pfm: 5314.549671642756 sim_pfm: 157.91203480481636
episode: 56 training return: tensor(-100.4474, device='cuda:0')
episode: 57 training return: tensor(-18.5352, device='cuda:0')
episode: 58 training return: tensor(-28.8880, device='cuda:0')
episode: 59 training return: tensor(43.3420, device='cuda:0')
epoch: 15 test_true_pfm: 5375.334973843262 sim_pfm: 113.27451642730739
episode: 60 training return: tensor(-73.6685, device='cuda:0')
episode: 61 training return: tensor(-2.8242, device='cuda:0')
episode: 62 training return: tensor(-62.5276, device='cuda:0')
episode: 63 training return: tensor(-119.2746, device='cuda:0')
epoch: 16 test_true_pfm: 5445.758683175554 sim_pfm: 106.61914111476896
episode: 64 training return: tensor(71.3258, device='cuda:0')
episode: 65 training return: tensor(67.6967, device='cuda:0')
episode: 66 training return: tensor(-3.1282, device='cuda:0')
episode: 67 training return: tensor(149.5535, device='cuda:0')
epoch: 17 test_true_pfm: 5462.046425838979 sim_pfm: 216.08793833495778
episode: 68 training return: tensor(46.0940, device='cuda:0')
episode: 69 training return: tensor(-72.7243, device='cuda:0')
episode: 70 training return: tensor(133.3150, device='cuda:0')
episode: 71 training return: tensor(42.7455, device='cuda:0')
epoch: 18 test_true_pfm: 5472.217584273897 sim_pfm: 236.55526855525872
episode: 72 training return: tensor(-37.8586, device='cuda:0')
episode: 73 training return: tensor(110.7795, device='cuda:0')
episode: 74 training return: tensor(1.8820, device='cuda:0')
episode: 75 training return: tensor(97.4560, device='cuda:0')
epoch: 19 test_true_pfm: 5404.335477187653 sim_pfm: 158.81413027624754
episode: 76 training return: tensor(21.9826, device='cuda:0')
episode: 77 training return: tensor(-65.9980, device='cuda:0')
episode: 78 training return: tensor(111.5513, device='cuda:0')
episode: 79 training return: tensor(159.8560, device='cuda:0')
epoch: 20 test_true_pfm: 5530.673502772314 sim_pfm: 221.81054731603945
episode: 80 training return: tensor(132.0349, device='cuda:0')
episode: 81 training return: tensor(91.1551, device='cuda:0')
episode: 82 training return: tensor(39.6319, device='cuda:0')
episode: 83 training return: tensor(56.8344, device='cuda:0')
epoch: 21 test_true_pfm: 5522.445434441892 sim_pfm: 202.2449629593951
episode: 84 training return: tensor(65.8133, device='cuda:0')
episode: 85 training return: tensor(-17.6020, device='cuda:0')
episode: 86 training return: tensor(144.8200, device='cuda:0')
episode: 87 training return: tensor(145.3896, device='cuda:0')
epoch: 22 test_true_pfm: 5589.145184044973 sim_pfm: 228.7424188221339
episode: 88 training return: tensor(105.4106, device='cuda:0')
episode: 89 training return: tensor(-18.7552, device='cuda:0')
episode: 90 training return: tensor(109.6780, device='cuda:0')
episode: 91 training return: tensor(43.6789, device='cuda:0')
epoch: 23 test_true_pfm: 5636.729540517025 sim_pfm: 283.49085158716963
episode: 92 training return: tensor(118.6511, device='cuda:0')
episode: 93 training return: tensor(-27.5583, device='cuda:0')
episode: 94 training return: tensor(103.4379, device='cuda:0')
episode: 95 training return: tensor(170.6051, device='cuda:0')
epoch: 24 test_true_pfm: 5569.622392484404 sim_pfm: 283.3597460326273
episode: 96 training return: tensor(123.9138, device='cuda:0')
episode: 97 training return: tensor(152.6526, device='cuda:0')
episode: 98 training return: tensor(163.1637, device='cuda:0')
episode: 99 training return: tensor(158.6558, device='cuda:0')
epoch: 25 test_true_pfm: 5699.539019072111 sim_pfm: 247.12695037724916
episode: 100 training return: tensor(99.1816, device='cuda:0')
episode: 101 training return: tensor(138.3703, device='cuda:0')
episode: 102 training return: tensor(220.3022, device='cuda:0')
episode: 103 training return: tensor(120.9960, device='cuda:0')
epoch: 26 test_true_pfm: 5606.537682419661 sim_pfm: 281.75449108618585
episode: 104 training return: tensor(140.2234, device='cuda:0')
episode: 105 training return: tensor(165.3311, device='cuda:0')
episode: 106 training return: tensor(232.3662, device='cuda:0')
episode: 107 training return: tensor(143.5502, device='cuda:0')
epoch: 27 test_true_pfm: 5586.558520325428 sim_pfm: 341.81600384035846
episode: 108 training return: tensor(208.1856, device='cuda:0')
episode: 109 training return: tensor(266.5322, device='cuda:0')
episode: 110 training return: tensor(197.8509, device='cuda:0')
episode: 111 training return: tensor(276.2279, device='cuda:0')
epoch: 28 test_true_pfm: 5590.11296777082 sim_pfm: 344.82676239310723
episode: 112 training return: tensor(226.5421, device='cuda:0')
episode: 113 training return: tensor(225.5796, device='cuda:0')
episode: 114 training return: tensor(241.1034, device='cuda:0')
episode: 115 training return: tensor(90.0658, device='cuda:0')
epoch: 29 test_true_pfm: 5750.307598476877 sim_pfm: 311.63630653184373
episode: 116 training return: tensor(129.5379, device='cuda:0')
episode: 117 training return: tensor(124.0166, device='cuda:0')
episode: 118 training return: tensor(170.1505, device='cuda:0')
episode: 119 training return: tensor(288.5208, device='cuda:0')
epoch: 30 test_true_pfm: 5720.644971889585 sim_pfm: 376.5111878661749
episode: 120 training return: tensor(179.6703, device='cuda:0')
episode: 121 training return: tensor(240.1655, device='cuda:0')
episode: 122 training return: tensor(240.7857, device='cuda:0')
episode: 123 training return: tensor(249.8707, device='cuda:0')
epoch: 31 test_true_pfm: 5711.742079403888 sim_pfm: 347.11053192025673
episode: 124 training return: tensor(218.7149, device='cuda:0')
episode: 125 training return: tensor(96.4445, device='cuda:0')
episode: 126 training return: tensor(196.4247, device='cuda:0')
episode: 127 training return: tensor(178.9413, device='cuda:0')
epoch: 32 test_true_pfm: 5775.907692311995 sim_pfm: 331.4767094424945
episode: 128 training return: tensor(235.5036, device='cuda:0')
episode: 129 training return: tensor(225.8105, device='cuda:0')
episode: 130 training return: tensor(181.5485, device='cuda:0')
episode: 131 training return: tensor(265.8784, device='cuda:0')
epoch: 33 test_true_pfm: 5783.827422647978 sim_pfm: 373.10911276956904
episode: 132 training return: tensor(146.3236, device='cuda:0')
episode: 133 training return: tensor(308.9593, device='cuda:0')
episode: 134 training return: tensor(236.1692, device='cuda:0')
episode: 135 training return: tensor(210.8152, device='cuda:0')
epoch: 34 test_true_pfm: 5667.535060107509 sim_pfm: 321.53474930832937
episode: 136 training return: tensor(232.7446, device='cuda:0')
episode: 137 training return: tensor(230.7403, device='cuda:0')
episode: 138 training return: tensor(226.7843, device='cuda:0')
episode: 139 training return: tensor(349.6194, device='cuda:0')
epoch: 35 test_true_pfm: 5747.893052320992 sim_pfm: 381.1900213410845
episode: 140 training return: tensor(253.7195, device='cuda:0')
episode: 141 training return: tensor(299.5335, device='cuda:0')
episode: 142 training return: tensor(68.7378, device='cuda:0')
episode: 143 training return: tensor(260.6237, device='cuda:0')
epoch: 36 test_true_pfm: 5906.785550448271 sim_pfm: 410.3844882916892
episode: 144 training return: tensor(218.2608, device='cuda:0')
episode: 145 training return: tensor(226.3219, device='cuda:0')
episode: 146 training return: tensor(187.1946, device='cuda:0')
episode: 147 training return: tensor(212.1632, device='cuda:0')
epoch: 37 test_true_pfm: 5864.105501474124 sim_pfm: 413.4643841209666
episode: 148 training return: tensor(257.5011, device='cuda:0')
episode: 149 training return: tensor(201.2552, device='cuda:0')
episode: 150 training return: tensor(217.5333, device='cuda:0')
episode: 151 training return: tensor(177.2178, device='cuda:0')
epoch: 38 test_true_pfm: 5816.796406850332 sim_pfm: 437.09676315361867
episode: 152 training return: tensor(222.5421, device='cuda:0')
episode: 153 training return: tensor(64.0303, device='cuda:0')
episode: 154 training return: tensor(261.5159, device='cuda:0')
episode: 155 training return: tensor(277.7670, device='cuda:0')
epoch: 39 test_true_pfm: 5794.126213923151 sim_pfm: 401.86237071749446
episode: 156 training return: tensor(191.2301, device='cuda:0')
episode: 157 training return: tensor(221.7169, device='cuda:0')
episode: 158 training return: tensor(374.7553, device='cuda:0')
episode: 159 training return: tensor(300.7698, device='cuda:0')
epoch: 40 test_true_pfm: 5812.08463487343 sim_pfm: 420.3666100183812
episode: 160 training return: tensor(317.6246, device='cuda:0')
episode: 161 training return: tensor(210.9018, device='cuda:0')
episode: 162 training return: tensor(278.8545, device='cuda:0')
episode: 163 training return: tensor(224.6396, device='cuda:0')
epoch: 41 test_true_pfm: 5806.207617386201 sim_pfm: 444.6828572018033
episode: 164 training return: tensor(189.5957, device='cuda:0')
episode: 165 training return: tensor(162.6354, device='cuda:0')
episode: 166 training return: tensor(299.2611, device='cuda:0')
episode: 167 training return: tensor(164.2371, device='cuda:0')
epoch: 42 test_true_pfm: 5816.415430660084 sim_pfm: 454.03576732613146
episode: 168 training return: tensor(124.8754, device='cuda:0')
episode: 169 training return: tensor(320.8410, device='cuda:0')
episode: 170 training return: tensor(202.0112, device='cuda:0')
episode: 171 training return: tensor(119.6754, device='cuda:0')
epoch: 43 test_true_pfm: 5836.947702007645 sim_pfm: 454.90395645946654
episode: 172 training return: tensor(270.3315, device='cuda:0')
episode: 173 training return: tensor(355.8131, device='cuda:0')
episode: 174 training return: tensor(224.8971, device='cuda:0')
episode: 175 training return: tensor(325.5469, device='cuda:0')
epoch: 44 test_true_pfm: 5911.208450742346 sim_pfm: 459.5315358440469
episode: 176 training return: tensor(317.2205, device='cuda:0')
episode: 177 training return: tensor(153.4406, device='cuda:0')
episode: 178 training return: tensor(305.0046, device='cuda:0')
episode: 179 training return: tensor(315.6351, device='cuda:0')
epoch: 45 test_true_pfm: 5893.601249136769 sim_pfm: 437.3452242915325
episode: 180 training return: tensor(241.7753, device='cuda:0')
episode: 181 training return: tensor(363.0184, device='cuda:0')
episode: 182 training return: tensor(338.8179, device='cuda:0')
episode: 183 training return: tensor(299.3665, device='cuda:0')
epoch: 46 test_true_pfm: 5925.28726749207 sim_pfm: 476.1218249652884
episode: 184 training return: tensor(335.8058, device='cuda:0')
episode: 185 training return: tensor(302.3904, device='cuda:0')
episode: 186 training return: tensor(279.5643, device='cuda:0')
episode: 187 training return: tensor(294.8046, device='cuda:0')
epoch: 47 test_true_pfm: 5788.9806634584675 sim_pfm: 449.78301452674594
episode: 188 training return: tensor(297.2641, device='cuda:0')
episode: 189 training return: tensor(286.4951, device='cuda:0')
episode: 190 training return: tensor(310.5123, device='cuda:0')
episode: 191 training return: tensor(336.3833, device='cuda:0')
epoch: 48 test_true_pfm: 5880.779243045094 sim_pfm: 463.37524688153644
episode: 192 training return: tensor(298.0916, device='cuda:0')
episode: 193 training return: tensor(275.3503, device='cuda:0')
episode: 194 training return: tensor(390.8043, device='cuda:0')
episode: 195 training return: tensor(309.6968, device='cuda:0')
epoch: 49 test_true_pfm: 5955.330393691223 sim_pfm: 395.5286304440427
episode: 196 training return: tensor(278.4769, device='cuda:0')
episode: 197 training return: tensor(281.4581, device='cuda:0')
episode: 198 training return: tensor(363.0761, device='cuda:0')
episode: 199 training return: tensor(318.7944, device='cuda:0')
epoch: 50 test_true_pfm: 5882.025020813201 sim_pfm: 494.33112620038446
episode: 200 training return: tensor(305.3707, device='cuda:0')
episode: 201 training return: tensor(222.8292, device='cuda:0')
episode: 202 training return: tensor(270.2396, device='cuda:0')
episode: 203 training return: tensor(283.8012, device='cuda:0')
epoch: 51 test_true_pfm: 5930.334069741778 sim_pfm: 461.0997422916892
episode: 204 training return: tensor(352.3653, device='cuda:0')
episode: 205 training return: tensor(284.7062, device='cuda:0')
episode: 206 training return: tensor(285.1799, device='cuda:0')
episode: 207 training return: tensor(360.8005, device='cuda:0')
epoch: 52 test_true_pfm: 5903.159276960373 sim_pfm: 496.0601972754036
episode: 208 training return: tensor(309.6960, device='cuda:0')
episode: 209 training return: tensor(360.5400, device='cuda:0')
episode: 210 training return: tensor(318.7810, device='cuda:0')
episode: 211 training return: tensor(378.2404, device='cuda:0')
epoch: 53 test_true_pfm: 5905.925278565138 sim_pfm: 430.4461623230697
episode: 212 training return: tensor(337.2649, device='cuda:0')
episode: 213 training return: tensor(309.1268, device='cuda:0')
episode: 214 training return: tensor(318.1962, device='cuda:0')
episode: 215 training return: tensor(321.3661, device='cuda:0')
epoch: 54 test_true_pfm: 5949.173435572877 sim_pfm: 488.9415358348051
episode: 216 training return: tensor(288.1358, device='cuda:0')
episode: 217 training return: tensor(259.9832, device='cuda:0')
episode: 218 training return: tensor(268.1372, device='cuda:0')
episode: 219 training return: tensor(339.7830, device='cuda:0')
epoch: 55 test_true_pfm: 5967.3360961781 sim_pfm: 487.9379422023776
episode: 220 training return: tensor(403.8892, device='cuda:0')
episode: 221 training return: tensor(313.4094, device='cuda:0')
episode: 222 training return: tensor(297.9647, device='cuda:0')
episode: 223 training return: tensor(367.1635, device='cuda:0')
epoch: 56 test_true_pfm: 5899.505237322363 sim_pfm: 455.53168318526394
episode: 224 training return: tensor(382.9607, device='cuda:0')
episode: 225 training return: tensor(371.9532, device='cuda:0')
episode: 226 training return: tensor(356.1479, device='cuda:0')
episode: 227 training return: tensor(380.5503, device='cuda:0')
epoch: 57 test_true_pfm: 5896.523372448205 sim_pfm: 493.58564947429113
episode: 228 training return: tensor(358.6484, device='cuda:0')
episode: 229 training return: tensor(353.4979, device='cuda:0')
episode: 230 training return: tensor(385.3038, device='cuda:0')
episode: 231 training return: tensor(306.1577, device='cuda:0')
epoch: 58 test_true_pfm: 5962.846786345489 sim_pfm: 478.1141680969158
episode: 232 training return: tensor(298.8984, device='cuda:0')
episode: 233 training return: tensor(340.3102, device='cuda:0')
episode: 234 training return: tensor(367.1512, device='cuda:0')
episode: 235 training return: tensor(370.7793, device='cuda:0')
epoch: 59 test_true_pfm: 5995.14659479033 sim_pfm: 502.95637644591625
episode: 236 training return: tensor(381.9645, device='cuda:0')
episode: 237 training return: tensor(389.4418, device='cuda:0')
episode: 238 training return: tensor(355.2977, device='cuda:0')
episode: 239 training return: tensor(286.7386, device='cuda:0')
epoch: 60 test_true_pfm: 5959.496324018521 sim_pfm: 510.02316775320406
episode: 240 training return: tensor(350.3391, device='cuda:0')
episode: 241 training return: tensor(377.3237, device='cuda:0')
episode: 242 training return: tensor(358.8062, device='cuda:0')
episode: 243 training return: tensor(337.5594, device='cuda:0')
epoch: 61 test_true_pfm: 5991.091426028267 sim_pfm: 510.633564942594
episode: 244 training return: tensor(365.6417, device='cuda:0')
episode: 245 training return: tensor(352.1031, device='cuda:0')
episode: 246 training return: tensor(313.3747, device='cuda:0')
episode: 247 training return: tensor(310.8463, device='cuda:0')
epoch: 62 test_true_pfm: 5963.812394202708 sim_pfm: 496.17272219530423
episode: 248 training return: tensor(313.1372, device='cuda:0')
episode: 249 training return: tensor(426.4082, device='cuda:0')
episode: 250 training return: tensor(383.8002, device='cuda:0')
episode: 251 training return: tensor(323.4483, device='cuda:0')
epoch: 63 test_true_pfm: 5934.435708911348 sim_pfm: 490.1217989000531
episode: 252 training return: tensor(392.0538, device='cuda:0')
episode: 253 training return: tensor(413.9912, device='cuda:0')
episode: 254 training return: tensor(258.3192, device='cuda:0')
episode: 255 training return: tensor(356.0106, device='cuda:0')
epoch: 64 test_true_pfm: 5929.5215345077995 sim_pfm: 467.12538773126045
episode: 256 training return: tensor(398.8432, device='cuda:0')
episode: 257 training return: tensor(333.9987, device='cuda:0')
episode: 258 training return: tensor(314.0927, device='cuda:0')
episode: 259 training return: tensor(419.8463, device='cuda:0')
epoch: 65 test_true_pfm: 5995.799816077924 sim_pfm: 496.15555669177166
episode: 260 training return: tensor(372.3874, device='cuda:0')
episode: 261 training return: tensor(376.0635, device='cuda:0')
episode: 262 training return: tensor(376.4774, device='cuda:0')
episode: 263 training return: tensor(454.2527, device='cuda:0')
epoch: 66 test_true_pfm: 6000.110214127849 sim_pfm: 514.8670787798861
episode: 264 training return: tensor(371.4977, device='cuda:0')
episode: 265 training return: tensor(378.3228, device='cuda:0')
episode: 266 training return: tensor(375.7043, device='cuda:0')
episode: 267 training return: tensor(366.8700, device='cuda:0')
epoch: 67 test_true_pfm: 6041.2862544707505 sim_pfm: 544.9734365170394
episode: 268 training return: tensor(364.4850, device='cuda:0')
episode: 269 training return: tensor(364.1627, device='cuda:0')
episode: 270 training return: tensor(208.9892, device='cuda:0')
episode: 271 training return: tensor(384.4641, device='cuda:0')
epoch: 68 test_true_pfm: 5975.648036636903 sim_pfm: 514.1443622244211
episode: 272 training return: tensor(378.1140, device='cuda:0')
episode: 273 training return: tensor(428.6068, device='cuda:0')
episode: 274 training return: tensor(371.7934, device='cuda:0')
episode: 275 training return: tensor(332.6117, device='cuda:0')
epoch: 69 test_true_pfm: 5991.889610936259 sim_pfm: 525.7460108273372
episode: 276 training return: tensor(400.7150, device='cuda:0')
episode: 277 training return: tensor(337.9154, device='cuda:0')
episode: 278 training return: tensor(333.4258, device='cuda:0')
episode: 279 training return: tensor(323.5333, device='cuda:0')
epoch: 70 test_true_pfm: 6008.460339298552 sim_pfm: 509.53886951482855
episode: 280 training return: tensor(370.1385, device='cuda:0')
episode: 281 training return: tensor(419.4455, device='cuda:0')
episode: 282 training return: tensor(359.4694, device='cuda:0')
episode: 283 training return: tensor(353.9283, device='cuda:0')
epoch: 71 test_true_pfm: 5967.204749818407 sim_pfm: 480.18588666963234
episode: 284 training return: tensor(408.1476, device='cuda:0')
episode: 285 training return: tensor(306.9226, device='cuda:0')
episode: 286 training return: tensor(358.5951, device='cuda:0')
episode: 287 training return: tensor(293.0433, device='cuda:0')
epoch: 72 test_true_pfm: 6017.208767804464 sim_pfm: 536.2314752490978
episode: 288 training return: tensor(353.4242, device='cuda:0')
episode: 289 training return: tensor(329.5940, device='cuda:0')
episode: 290 training return: tensor(339.6686, device='cuda:0')
episode: 291 training return: tensor(331.9219, device='cuda:0')
epoch: 73 test_true_pfm: 6040.03459401229 sim_pfm: 544.7521898468646
episode: 292 training return: tensor(442.2293, device='cuda:0')
episode: 293 training return: tensor(391.1345, device='cuda:0')
episode: 294 training return: tensor(362.1579, device='cuda:0')
episode: 295 training return: tensor(340.0173, device='cuda:0')
epoch: 74 test_true_pfm: 6078.735144584504 sim_pfm: 532.2345055733362
episode: 296 training return: tensor(411.7435, device='cuda:0')
episode: 297 training return: tensor(264.5080, device='cuda:0')
episode: 298 training return: tensor(307.3075, device='cuda:0')
episode: 299 training return: tensor(413.4746, device='cuda:0')
epoch: 75 test_true_pfm: 5959.236357471814 sim_pfm: 554.6003570709339
episode: 300 training return: tensor(394.3696, device='cuda:0')
episode: 301 training return: tensor(405.7792, device='cuda:0')
episode: 302 training return: tensor(387.7139, device='cuda:0')
episode: 303 training return: tensor(319.8071, device='cuda:0')
epoch: 76 test_true_pfm: 6064.663659199387 sim_pfm: 561.1842164456417
episode: 304 training return: tensor(366.9755, device='cuda:0')
episode: 305 training return: tensor(360.9948, device='cuda:0')
episode: 306 training return: tensor(383.5026, device='cuda:0')
episode: 307 training return: tensor(404.4929, device='cuda:0')
epoch: 77 test_true_pfm: 5991.072781139464 sim_pfm: 536.4362528844116
episode: 308 training return: tensor(443.6613, device='cuda:0')
episode: 309 training return: tensor(298.0770, device='cuda:0')
episode: 310 training return: tensor(418.3937, device='cuda:0')
episode: 311 training return: tensor(376.4816, device='cuda:0')
epoch: 78 test_true_pfm: 6003.125582469419 sim_pfm: 531.7999532957716
episode: 312 training return: tensor(405.7498, device='cuda:0')
episode: 313 training return: tensor(326.0143, device='cuda:0')
episode: 314 training return: tensor(405.8995, device='cuda:0')
episode: 315 training return: tensor(395.5803, device='cuda:0')
epoch: 79 test_true_pfm: 6021.437669710645 sim_pfm: 546.3090665686759
episode: 316 training return: tensor(394.0827, device='cuda:0')
episode: 317 training return: tensor(387.5005, device='cuda:0')
episode: 318 training return: tensor(478.6206, device='cuda:0')
episode: 319 training return: tensor(318.3357, device='cuda:0')
epoch: 80 test_true_pfm: 6020.116854629766 sim_pfm: 514.3888115694281
episode: 320 training return: tensor(420.3472, device='cuda:0')
episode: 321 training return: tensor(412.2437, device='cuda:0')
episode: 322 training return: tensor(381.4925, device='cuda:0')
episode: 323 training return: tensor(364.1360, device='cuda:0')
epoch: 81 test_true_pfm: 6041.810041607395 sim_pfm: 526.8022190586198
episode: 324 training return: tensor(480.3999, device='cuda:0')
episode: 325 training return: tensor(486.0929, device='cuda:0')
episode: 326 training return: tensor(390.1970, device='cuda:0')
episode: 327 training return: tensor(356.1371, device='cuda:0')
epoch: 82 test_true_pfm: 6040.2881944161245 sim_pfm: 558.6547757388713
episode: 328 training return: tensor(410.3480, device='cuda:0')
episode: 329 training return: tensor(342.6739, device='cuda:0')
episode: 330 training return: tensor(394.0297, device='cuda:0')
episode: 331 training return: tensor(420.5593, device='cuda:0')
epoch: 83 test_true_pfm: 5984.260941801487 sim_pfm: 528.1644454871033
episode: 332 training return: tensor(421.5708, device='cuda:0')
episode: 333 training return: tensor(432.1786, device='cuda:0')
episode: 334 training return: tensor(390.2012, device='cuda:0')
episode: 335 training return: tensor(315.4669, device='cuda:0')
epoch: 84 test_true_pfm: 6082.56721718395 sim_pfm: 548.6171713479076
episode: 336 training return: tensor(422.8678, device='cuda:0')
episode: 337 training return: tensor(381.6551, device='cuda:0')
episode: 338 training return: tensor(383.3618, device='cuda:0')
episode: 339 training return: tensor(324.1186, device='cuda:0')
epoch: 85 test_true_pfm: 6035.532454369349 sim_pfm: 520.9201669360822
episode: 340 training return: tensor(391.9753, device='cuda:0')
episode: 341 training return: tensor(391.7936, device='cuda:0')
episode: 342 training return: tensor(382.7185, device='cuda:0')
episode: 343 training return: tensor(391.0948, device='cuda:0')
epoch: 86 test_true_pfm: 6017.627532970485 sim_pfm: 554.4986953630092
episode: 344 training return: tensor(422.9320, device='cuda:0')
episode: 345 training return: tensor(399.1921, device='cuda:0')
episode: 346 training return: tensor(470.6234, device='cuda:0')
episode: 347 training return: tensor(359.9870, device='cuda:0')
epoch: 87 test_true_pfm: 5986.621571368287 sim_pfm: 538.2121364663471
episode: 348 training return: tensor(341.7688, device='cuda:0')
episode: 349 training return: tensor(445.3520, device='cuda:0')
episode: 350 training return: tensor(387.9321, device='cuda:0')
episode: 351 training return: tensor(365.4992, device='cuda:0')
epoch: 88 test_true_pfm: 6001.515809831689 sim_pfm: 563.4892187679458
episode: 352 training return: tensor(432.3347, device='cuda:0')
episode: 353 training return: tensor(454.3226, device='cuda:0')
episode: 354 training return: tensor(381.1815, device='cuda:0')
episode: 355 training return: tensor(377.0212, device='cuda:0')
epoch: 89 test_true_pfm: 6006.712096899238 sim_pfm: 561.9073668116083
episode: 356 training return: tensor(381.9473, device='cuda:0')
episode: 357 training return: tensor(449.6153, device='cuda:0')
episode: 358 training return: tensor(404.8069, device='cuda:0')
episode: 359 training return: tensor(414.4127, device='cuda:0')
epoch: 90 test_true_pfm: 5997.372487993484 sim_pfm: 514.6933555597789
episode: 360 training return: tensor(408.8951, device='cuda:0')
episode: 361 training return: tensor(457.1019, device='cuda:0')
episode: 362 training return: tensor(398.7151, device='cuda:0')
episode: 363 training return: tensor(379.0664, device='cuda:0')
epoch: 91 test_true_pfm: 6002.593089980003 sim_pfm: 516.7531216222948
episode: 364 training return: tensor(435.4335, device='cuda:0')
episode: 365 training return: tensor(413.9177, device='cuda:0')
episode: 366 training return: tensor(466.0678, device='cuda:0')
episode: 367 training return: tensor(397.6193, device='cuda:0')
epoch: 92 test_true_pfm: 6099.582350432378 sim_pfm: 581.1040148815373
episode: 368 training return: tensor(451.9823, device='cuda:0')
episode: 369 training return: tensor(444.4066, device='cuda:0')
episode: 370 training return: tensor(416.2845, device='cuda:0')
episode: 371 training return: tensor(496.9557, device='cuda:0')
epoch: 93 test_true_pfm: 6044.451770916553 sim_pfm: 574.2359260606657
episode: 372 training return: tensor(400.6268, device='cuda:0')
episode: 373 training return: tensor(436.1714, device='cuda:0')
episode: 374 training return: tensor(427.1956, device='cuda:0')
episode: 375 training return: tensor(414.7618, device='cuda:0')
epoch: 94 test_true_pfm: 6106.687188 sim_pfm: 562.2358939661257
episode: 376 training return: tensor(477.5287, device='cuda:0')
episode: 377 training return: tensor(410.6088, device='cuda:0')
episode: 378 training return: tensor(336.1884, device='cuda:0')
episode: 379 training return: tensor(353.4021, device='cuda:0')
epoch: 95 test_true_pfm: 6079.248448345189 sim_pfm: 522.5665366218697
episode: 380 training return: tensor(429.1933, device='cuda:0')
episode: 381 training return: tensor(341.9565, device='cuda:0')
episode: 382 training return: tensor(447.7974, device='cuda:0')
episode: 383 training return: tensor(522.8522, device='cuda:0')
epoch: 96 test_true_pfm: 6034.626117286239 sim_pfm: 537.0807332850624
episode: 384 training return: tensor(365.7603, device='cuda:0')
episode: 385 training return: tensor(458.3273, device='cuda:0')
episode: 386 training return: tensor(419.1893, device='cuda:0')
episode: 387 training return: tensor(417.1543, device='cuda:0')
epoch: 97 test_true_pfm: 6033.170208338837 sim_pfm: 552.2045596264361
episode: 388 training return: tensor(425.6042, device='cuda:0')
episode: 389 training return: tensor(299.6970, device='cuda:0')
episode: 390 training return: tensor(432.0927, device='cuda:0')
episode: 391 training return: tensor(456.4797, device='cuda:0')
epoch: 98 test_true_pfm: 6038.4600610994285 sim_pfm: 560.3979733812739
episode: 392 training return: tensor(418.9593, device='cuda:0')
episode: 393 training return: tensor(412.0867, device='cuda:0')
episode: 394 training return: tensor(444.3206, device='cuda:0')
episode: 395 training return: tensor(389.9027, device='cuda:0')
epoch: 99 test_true_pfm: 6090.095734712287 sim_pfm: 570.16734268461
episode: 396 training return: tensor(360.1590, device='cuda:0')
episode: 397 training return: tensor(393.6344, device='cuda:0')
episode: 398 training return: tensor(394.4908, device='cuda:0')
episode: 399 training return: tensor(389.7065, device='cuda:0')
epoch: 100 test_true_pfm: 6098.42564751667 sim_pfm: 559.7225997095617
episode: 400 training return: tensor(435.4673, device='cuda:0')
episode: 401 training return: tensor(457.9689, device='cuda:0')
episode: 402 training return: tensor(349.4475, device='cuda:0')
episode: 403 training return: tensor(356.3104, device='cuda:0')
epoch: 101 test_true_pfm: 6035.484374945929 sim_pfm: 574.8762018515845
episode: 404 training return: tensor(394.7035, device='cuda:0')
episode: 405 training return: tensor(437.0248, device='cuda:0')
episode: 406 training return: tensor(454.3635, device='cuda:0')
episode: 407 training return: tensor(457.3005, device='cuda:0')
epoch: 102 test_true_pfm: 6054.158615578058 sim_pfm: 550.8490133970239
episode: 408 training return: tensor(465.1262, device='cuda:0')
episode: 409 training return: tensor(441.1997, device='cuda:0')
episode: 410 training return: tensor(421.5080, device='cuda:0')
episode: 411 training return: tensor(411.7673, device='cuda:0')
epoch: 103 test_true_pfm: 6122.305154318222 sim_pfm: 575.1434989245996
episode: 412 training return: tensor(391.7510, device='cuda:0')
episode: 413 training return: tensor(446.0460, device='cuda:0')
episode: 414 training return: tensor(413.3706, device='cuda:0')
episode: 415 training return: tensor(341.9580, device='cuda:0')
epoch: 104 test_true_pfm: 6027.347354257611 sim_pfm: 577.3985488321632
episode: 416 training return: tensor(441.0405, device='cuda:0')
episode: 417 training return: tensor(463.7399, device='cuda:0')
episode: 418 training return: tensor(387.4454, device='cuda:0')
episode: 419 training return: tensor(431.1216, device='cuda:0')
epoch: 105 test_true_pfm: 6036.237581750544 sim_pfm: 582.8563504992829
episode: 420 training return: tensor(452.0363, device='cuda:0')
episode: 421 training return: tensor(387.0801, device='cuda:0')
episode: 422 training return: tensor(469.0618, device='cuda:0')
episode: 423 training return: tensor(472.5042, device='cuda:0')
epoch: 106 test_true_pfm: 6051.627482816737 sim_pfm: 551.6791844642818
episode: 424 training return: tensor(406.0725, device='cuda:0')
episode: 425 training return: tensor(430.9846, device='cuda:0')
episode: 426 training return: tensor(405.0332, device='cuda:0')
episode: 427 training return: tensor(440.0074, device='cuda:0')
epoch: 107 test_true_pfm: 6086.860306835874 sim_pfm: 571.5502781170266
episode: 428 training return: tensor(393.8977, device='cuda:0')
episode: 429 training return: tensor(464.2463, device='cuda:0')
episode: 430 training return: tensor(499.0887, device='cuda:0')
episode: 431 training return: tensor(447.7475, device='cuda:0')
epoch: 108 test_true_pfm: 6084.542980382853 sim_pfm: 553.3982138108404
episode: 432 training return: tensor(442.5104, device='cuda:0')
episode: 433 training return: tensor(478.3784, device='cuda:0')
episode: 434 training return: tensor(385.9575, device='cuda:0')
episode: 435 training return: tensor(475.2121, device='cuda:0')
epoch: 109 test_true_pfm: 6094.207906493694 sim_pfm: 580.8149517003234
episode: 436 training return: tensor(490.1262, device='cuda:0')
episode: 437 training return: tensor(415.8398, device='cuda:0')
episode: 438 training return: tensor(404.8098, device='cuda:0')
episode: 439 training return: tensor(467.4675, device='cuda:0')
epoch: 110 test_true_pfm: 6107.059970857555 sim_pfm: 570.6844343788495
episode: 440 training return: tensor(414.9736, device='cuda:0')
episode: 441 training return: tensor(369.2049, device='cuda:0')
episode: 442 training return: tensor(343.4395, device='cuda:0')
episode: 443 training return: tensor(413.9174, device='cuda:0')
epoch: 111 test_true_pfm: 6055.762534412468 sim_pfm: 556.7759026362231
episode: 444 training return: tensor(425.7733, device='cuda:0')
episode: 445 training return: tensor(412.1771, device='cuda:0')
episode: 446 training return: tensor(364.0080, device='cuda:0')
episode: 447 training return: tensor(459.1479, device='cuda:0')
epoch: 112 test_true_pfm: 6145.606303960335 sim_pfm: 575.0552571964994
episode: 448 training return: tensor(448.9014, device='cuda:0')
episode: 449 training return: tensor(461.6424, device='cuda:0')
episode: 450 training return: tensor(450.2255, device='cuda:0')
episode: 451 training return: tensor(503.7646, device='cuda:0')
epoch: 113 test_true_pfm: 6033.29575330506 sim_pfm: 558.9949836509768
episode: 452 training return: tensor(384.6185, device='cuda:0')
episode: 453 training return: tensor(449.5001, device='cuda:0')
episode: 454 training return: tensor(468.8673, device='cuda:0')
episode: 455 training return: tensor(462.0381, device='cuda:0')
epoch: 114 test_true_pfm: 6073.098837681813 sim_pfm: 533.0068609887676
episode: 456 training return: tensor(440.8468, device='cuda:0')
episode: 457 training return: tensor(455.0106, device='cuda:0')
episode: 458 training return: tensor(460.3302, device='cuda:0')
episode: 459 training return: tensor(419.1259, device='cuda:0')
epoch: 115 test_true_pfm: 6174.823050633695 sim_pfm: 608.1640100601362
episode: 460 training return: tensor(444.2902, device='cuda:0')
episode: 461 training return: tensor(460.9471, device='cuda:0')
episode: 462 training return: tensor(371.1622, device='cuda:0')
episode: 463 training return: tensor(420.3751, device='cuda:0')
epoch: 116 test_true_pfm: 6122.369018681631 sim_pfm: 566.8794611944662
episode: 464 training return: tensor(472.4966, device='cuda:0')
episode: 465 training return: tensor(434.3616, device='cuda:0')
episode: 466 training return: tensor(442.9169, device='cuda:0')
episode: 467 training return: tensor(463.1679, device='cuda:0')
epoch: 117 test_true_pfm: 6024.5405542180915 sim_pfm: 565.4457133914208
episode: 468 training return: tensor(369.8520, device='cuda:0')
episode: 469 training return: tensor(466.2772, device='cuda:0')
episode: 470 training return: tensor(423.9321, device='cuda:0')
episode: 471 training return: tensor(439.3339, device='cuda:0')
epoch: 118 test_true_pfm: 6066.635759182737 sim_pfm: 586.4733264570241
episode: 472 training return: tensor(511.8942, device='cuda:0')
episode: 473 training return: tensor(380.4991, device='cuda:0')
episode: 474 training return: tensor(437.8090, device='cuda:0')
episode: 475 training return: tensor(434.7518, device='cuda:0')
epoch: 119 test_true_pfm: 6062.268256363858 sim_pfm: 570.9826617402529
episode: 476 training return: tensor(443.8529, device='cuda:0')
episode: 477 training return: tensor(479.8450, device='cuda:0')
episode: 478 training return: tensor(460.0438, device='cuda:0')
episode: 479 training return: tensor(477.8784, device='cuda:0')
epoch: 120 test_true_pfm: 6063.975106804987 sim_pfm: 525.8789292109819
episode: 480 training return: tensor(426.8419, device='cuda:0')
episode: 481 training return: tensor(337.0682, device='cuda:0')
episode: 482 training return: tensor(428.3593, device='cuda:0')
episode: 483 training return: tensor(417.1941, device='cuda:0')
epoch: 121 test_true_pfm: 6107.642979952154 sim_pfm: 604.7205966291561
episode: 484 training return: tensor(526.0598, device='cuda:0')
episode: 485 training return: tensor(439.8910, device='cuda:0')
episode: 486 training return: tensor(450.6278, device='cuda:0')
episode: 487 training return: tensor(375.8466, device='cuda:0')
epoch: 122 test_true_pfm: 6102.508592987237 sim_pfm: 581.7843771075131
episode: 488 training return: tensor(367.2952, device='cuda:0')
episode: 489 training return: tensor(466.8112, device='cuda:0')
episode: 490 training return: tensor(422.6380, device='cuda:0')
episode: 491 training return: tensor(426.2949, device='cuda:0')
epoch: 123 test_true_pfm: 6110.913490036072 sim_pfm: 589.750701415042
episode: 492 training return: tensor(458.8510, device='cuda:0')
episode: 493 training return: tensor(461.9580, device='cuda:0')
episode: 494 training return: tensor(476.4753, device='cuda:0')
episode: 495 training return: tensor(413.6287, device='cuda:0')
epoch: 124 test_true_pfm: 6076.740105459737 sim_pfm: 570.0471593566375
episode: 496 training return: tensor(474.8736, device='cuda:0')
episode: 497 training return: tensor(429.1503, device='cuda:0')
episode: 498 training return: tensor(426.0310, device='cuda:0')
episode: 499 training return: tensor(456.9097, device='cuda:0')
epoch: 125 test_true_pfm: 6035.968826008295 sim_pfm: 570.8374468610855
episode: 500 training return: tensor(398.6998, device='cuda:0')
episode: 501 training return: tensor(409.4422, device='cuda:0')
episode: 502 training return: tensor(481.0593, device='cuda:0')
episode: 503 training return: tensor(471.1065, device='cuda:0')
epoch: 126 test_true_pfm: 6126.812927863958 sim_pfm: 604.8129844784077
episode: 504 training return: tensor(419.6867, device='cuda:0')
episode: 505 training return: tensor(362.0378, device='cuda:0')
episode: 506 training return: tensor(418.2802, device='cuda:0')
episode: 507 training return: tensor(491.8133, device='cuda:0')
epoch: 127 test_true_pfm: 6090.066938424517 sim_pfm: 581.548287370378
episode: 508 training return: tensor(451.2825, device='cuda:0')
episode: 509 training return: tensor(459.6603, device='cuda:0')
episode: 510 training return: tensor(504.9545, device='cuda:0')
episode: 511 training return: tensor(491.6086, device='cuda:0')
epoch: 128 test_true_pfm: 6096.775878005424 sim_pfm: 607.4023234297347
episode: 512 training return: tensor(527.4450, device='cuda:0')
episode: 513 training return: tensor(485.1425, device='cuda:0')
episode: 514 training return: tensor(473.8472, device='cuda:0')
episode: 515 training return: tensor(481.5140, device='cuda:0')
epoch: 129 test_true_pfm: 6143.420822235607 sim_pfm: 619.08509299388
episode: 516 training return: tensor(436.2935, device='cuda:0')
episode: 517 training return: tensor(359.5388, device='cuda:0')
episode: 518 training return: tensor(261.5748, device='cuda:0')
episode: 519 training return: tensor(497.8799, device='cuda:0')
epoch: 130 test_true_pfm: 6115.952888692249 sim_pfm: 617.375677398794
episode: 520 training return: tensor(478.9463, device='cuda:0')
episode: 521 training return: tensor(496.9224, device='cuda:0')
episode: 522 training return: tensor(418.0397, device='cuda:0')
episode: 523 training return: tensor(457.6495, device='cuda:0')
epoch: 131 test_true_pfm: 6074.333956329588 sim_pfm: 574.6429546472306
episode: 524 training return: tensor(424.7051, device='cuda:0')
episode: 525 training return: tensor(472.4648, device='cuda:0')
episode: 526 training return: tensor(467.7869, device='cuda:0')
episode: 527 training return: tensor(434.8470, device='cuda:0')
epoch: 132 test_true_pfm: 6171.160954612351 sim_pfm: 602.2390360434074
episode: 528 training return: tensor(493.0111, device='cuda:0')
episode: 529 training return: tensor(488.9099, device='cuda:0')
episode: 530 training return: tensor(505.4647, device='cuda:0')
episode: 531 training return: tensor(496.3414, device='cuda:0')
epoch: 133 test_true_pfm: 6152.290484136806 sim_pfm: 548.8662147603076
episode: 532 training return: tensor(502.0081, device='cuda:0')
episode: 533 training return: tensor(378.1588, device='cuda:0')
episode: 534 training return: tensor(465.6106, device='cuda:0')
episode: 535 training return: tensor(451.0281, device='cuda:0')
epoch: 134 test_true_pfm: 6176.570719991004 sim_pfm: 598.825632231698
episode: 536 training return: tensor(467.7376, device='cuda:0')
episode: 537 training return: tensor(436.1947, device='cuda:0')
episode: 538 training return: tensor(508.7101, device='cuda:0')
episode: 539 training return: tensor(447.2018, device='cuda:0')
epoch: 135 test_true_pfm: 6117.144562762312 sim_pfm: 588.4854396365894
episode: 540 training return: tensor(467.0862, device='cuda:0')
episode: 541 training return: tensor(425.2215, device='cuda:0')
episode: 542 training return: tensor(423.2534, device='cuda:0')
episode: 543 training return: tensor(530.9871, device='cuda:0')
epoch: 136 test_true_pfm: 6099.337982831587 sim_pfm: 592.1903467001781
episode: 544 training return: tensor(417.1770, device='cuda:0')
episode: 545 training return: tensor(433.0767, device='cuda:0')
episode: 546 training return: tensor(482.2960, device='cuda:0')
episode: 547 training return: tensor(471.5338, device='cuda:0')
epoch: 137 test_true_pfm: 6127.864810249051 sim_pfm: 562.162287240848
episode: 548 training return: tensor(448.0863, device='cuda:0')
episode: 549 training return: tensor(423.7668, device='cuda:0')
episode: 550 training return: tensor(431.5909, device='cuda:0')
episode: 551 training return: tensor(471.8753, device='cuda:0')
epoch: 138 test_true_pfm: 6126.847538507963 sim_pfm: 597.6276528383218
episode: 552 training return: tensor(425.2980, device='cuda:0')
episode: 553 training return: tensor(444.9484, device='cuda:0')
episode: 554 training return: tensor(418.9531, device='cuda:0')
episode: 555 training return: tensor(433.1544, device='cuda:0')
epoch: 139 test_true_pfm: 6127.372673641982 sim_pfm: 604.0945194376787
episode: 556 training return: tensor(473.9627, device='cuda:0')
episode: 557 training return: tensor(496.9567, device='cuda:0')
episode: 558 training return: tensor(496.0700, device='cuda:0')
episode: 559 training return: tensor(447.8824, device='cuda:0')
epoch: 140 test_true_pfm: 6150.783847884937 sim_pfm: 615.5665937655527
episode: 560 training return: tensor(491.3086, device='cuda:0')
episode: 561 training return: tensor(463.5841, device='cuda:0')
episode: 562 training return: tensor(447.1582, device='cuda:0')
episode: 563 training return: tensor(517.4191, device='cuda:0')
epoch: 141 test_true_pfm: 6120.641520128905 sim_pfm: 537.7905094653057
episode: 564 training return: tensor(464.3804, device='cuda:0')
episode: 565 training return: tensor(519.6034, device='cuda:0')
episode: 566 training return: tensor(511.0040, device='cuda:0')
episode: 567 training return: tensor(459.3206, device='cuda:0')
epoch: 142 test_true_pfm: 6230.663216972968 sim_pfm: 594.0956852462454
episode: 568 training return: tensor(357.8837, device='cuda:0')
episode: 569 training return: tensor(484.1787, device='cuda:0')
episode: 570 training return: tensor(415.1285, device='cuda:0')
episode: 571 training return: tensor(436.6802, device='cuda:0')
epoch: 143 test_true_pfm: 6133.264165883316 sim_pfm: 579.8534875511832
episode: 572 training return: tensor(495.6430, device='cuda:0')
episode: 573 training return: tensor(449.7745, device='cuda:0')
episode: 574 training return: tensor(432.3476, device='cuda:0')
episode: 575 training return: tensor(489.4235, device='cuda:0')
epoch: 144 test_true_pfm: 6206.0108337476895 sim_pfm: 635.4537321517904
episode: 576 training return: tensor(449.3094, device='cuda:0')
episode: 577 training return: tensor(380.4046, device='cuda:0')
episode: 578 training return: tensor(505.2178, device='cuda:0')
episode: 579 training return: tensor(438.8961, device='cuda:0')
epoch: 145 test_true_pfm: 6140.867548626297 sim_pfm: 592.1053100369076
episode: 580 training return: tensor(427.9745, device='cuda:0')
episode: 581 training return: tensor(501.2982, device='cuda:0')
episode: 582 training return: tensor(365.9828, device='cuda:0')
episode: 583 training return: tensor(463.8299, device='cuda:0')
epoch: 146 test_true_pfm: 6173.286835542237 sim_pfm: 611.2843222833859
episode: 584 training return: tensor(477.6599, device='cuda:0')
episode: 585 training return: tensor(444.7943, device='cuda:0')
episode: 586 training return: tensor(486.6866, device='cuda:0')
episode: 587 training return: tensor(448.1784, device='cuda:0')
epoch: 147 test_true_pfm: 6120.930243047175 sim_pfm: 604.0902433041095
episode: 588 training return: tensor(499.4989, device='cuda:0')
episode: 589 training return: tensor(522.9855, device='cuda:0')
episode: 590 training return: tensor(538.1516, device='cuda:0')
episode: 591 training return: tensor(449.8696, device='cuda:0')
epoch: 148 test_true_pfm: 6113.787952300638 sim_pfm: 612.9324211231433
episode: 592 training return: tensor(501.6047, device='cuda:0')
episode: 593 training return: tensor(459.2294, device='cuda:0')
episode: 594 training return: tensor(453.6155, device='cuda:0')
episode: 595 training return: tensor(457.3076, device='cuda:0')
epoch: 149 test_true_pfm: 6129.389644861844 sim_pfm: 584.6467626304171
episode: 596 training return: tensor(534.6786, device='cuda:0')
episode: 597 training return: tensor(534.2880, device='cuda:0')
episode: 598 training return: tensor(455.4464, device='cuda:0')
episode: 599 training return: tensor(478.4023, device='cuda:0')
epoch: 150 test_true_pfm: 6183.40168769287 sim_pfm: 637.9243092109682
