['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '0', '--data', '10000']
epoch: 0 training_loss 0.2632052056491375 test_loss: 0.21494638919830322
epoch: 1 training_loss 0.19901206769049168 test_loss: 0.2095780372619629
epoch: 2 training_loss 0.19079635225236416 test_loss: 0.19911478757858275
epoch: 3 training_loss 0.19095659837126733 test_loss: 0.20332744121551513
epoch: 4 training_loss 0.19195477180182935 test_loss: 0.1878410816192627
epoch: 5 training_loss 0.18587258599698545 test_loss: 0.20253925323486327
epoch: 6 training_loss 0.18322810970246792 test_loss: 0.19486323595046998
epoch: 7 training_loss 0.18651313684880733 test_loss: 0.18846310377120973
epoch: 8 training_loss 0.18455301955342293 test_loss: 0.18991657495498657
epoch: 9 training_loss 0.19284292101860045 test_loss: 0.18852519989013672
epoch: 10 training_loss 0.18384539268910885 test_loss: 0.19411555528640748
epoch: 11 training_loss 0.17446593329310417 test_loss: 0.20689723491668702
epoch: 12 training_loss 0.18942456625401974 test_loss: 0.203572416305542
epoch: 13 training_loss 0.18348113410174846 test_loss: 0.1871179461479187
epoch: 14 training_loss 0.1840456051379442 test_loss: 0.18650780916213988
epoch: 15 training_loss 0.18649774231016636 test_loss: 0.18869462013244628
epoch: 16 training_loss 0.17911529652774333 test_loss: 0.20583570003509521
epoch: 17 training_loss 0.18115741096436977 test_loss: 0.1748667597770691
epoch: 18 training_loss 0.1724006427824497 test_loss: 0.20322041511535643
epoch: 19 training_loss 0.18291774921119214 test_loss: 0.19710261821746827
epoch: 20 training_loss 0.17681972198188306 test_loss: 0.1737236738204956
epoch: 21 training_loss 0.17293164037168027 test_loss: 0.19656623601913453
epoch: 22 training_loss 0.1765137805044651 test_loss: 0.19616080522537233
epoch: 23 training_loss 0.1764857705682516 test_loss: 0.2023766040802002
epoch: 24 training_loss 0.18650217797607183 test_loss: 0.18969919681549072
epoch: 25 training_loss 0.18049310453236103 test_loss: 0.19474040269851683
epoch: 26 training_loss 0.18033590465784072 test_loss: 0.1917851209640503
epoch: 27 training_loss 0.18117020547389984 test_loss: 0.19638173580169677
epoch: 28 training_loss 0.1816530203074217 test_loss: 0.20817043781280517
epoch: 29 training_loss 0.17790460117161275 test_loss: 0.1916939616203308
epoch: 30 training_loss 0.17400605879724027 test_loss: 0.19969075918197632
epoch: 31 training_loss 0.169382548853755 test_loss: 0.1829086422920227
epoch: 32 training_loss 0.17253049403429033 test_loss: 0.18019942045211793
epoch: 33 training_loss 0.17914440289139746 test_loss: 0.18244773149490356
epoch: 34 training_loss 0.17411311984062194 test_loss: 0.18165957927703857
epoch: 35 training_loss 0.17276660330593585 test_loss: 0.19131416082382202
epoch: 36 training_loss 0.16950487054884433 test_loss: 0.1944520354270935
epoch: 37 training_loss 0.17155721731483936 test_loss: 0.18637207746505738
epoch: 38 training_loss 0.17360595539212226 test_loss: 0.18672096729278564
epoch: 39 training_loss 0.17251971170306205 test_loss: 0.18132312297821046
epoch: 40 training_loss 0.1749441262334585 test_loss: 0.19639999866485597
epoch: 41 training_loss 0.1701741551607847 test_loss: 0.18965256214141846
epoch: 42 training_loss 0.17482075989246368 test_loss: 0.17993277311325073
epoch: 43 training_loss 0.17797353096306323 test_loss: 0.18638757467269898
epoch: 44 training_loss 0.17476692996919155 test_loss: 0.18786518573760985
epoch: 45 training_loss 0.17733364813029767 test_loss: 0.18464800119400024
epoch: 46 training_loss 0.17642095044255257 test_loss: 0.19031814336776734
epoch: 47 training_loss 0.17033258654177189 test_loss: 0.19188300371170045
epoch: 48 training_loss 0.1741603247821331 test_loss: 0.18991221189498902
epoch: 49 training_loss 0.1801665809750557 test_loss: 0.191666316986084
epoch: 50 training_loss 0.1736331457644701 test_loss: 0.19928233623504638
epoch: 51 training_loss 0.17814136177301407 test_loss: 0.19566408395767212
epoch: 52 training_loss 0.1703728787600994 test_loss: 0.19532912969589233
epoch: 53 training_loss 0.17025302041321994 test_loss: 0.1821897029876709
epoch: 54 training_loss 0.17146788999438287 test_loss: 0.2004741907119751
epoch: 55 training_loss 0.17280540913343428 test_loss: 0.17997374534606933
epoch: 56 training_loss 0.17399083107709884 test_loss: 0.19600491523742675
epoch: 57 training_loss 0.17430774189531803 test_loss: 0.189698326587677
epoch: 58 training_loss 0.17650816947221756 test_loss: 0.17443355321884155
epoch: 59 training_loss 0.176446548178792 test_loss: 0.18801788091659546
epoch: 60 training_loss 0.1714386922121048 test_loss: 0.19176459312438965
epoch: 61 training_loss 0.17439508832991124 test_loss: 0.1880601763725281
epoch: 62 training_loss 0.17227976769208908 test_loss: 0.1862055778503418
epoch: 63 training_loss 0.1658407101035118 test_loss: 0.17832417488098146
epoch: 64 training_loss 0.17448871083557604 test_loss: 0.18585672378540039
epoch: 65 training_loss 0.17654563426971437 test_loss: 0.18983858823776245
epoch: 66 training_loss 0.1786895413696766 test_loss: 0.18424768447875978
epoch: 67 training_loss 0.180692735388875 test_loss: 0.1891780376434326
epoch: 68 training_loss 0.1736259885877371 test_loss: 0.19523727893829346
epoch: 69 training_loss 0.17325001068413257 test_loss: 0.19144814014434813
epoch: 70 training_loss 0.17789676979184152 test_loss: 0.18502585887908934
epoch: 71 training_loss 0.168839548304677 test_loss: 0.18925611972808837
epoch: 72 training_loss 0.16886198192834853 test_loss: 0.1781562328338623
epoch: 73 training_loss 0.17242023415863514 test_loss: 0.1834813952445984
epoch: 74 training_loss 0.1710651421174407 test_loss: 0.1786000609397888
epoch: 75 training_loss 0.1647659481316805 test_loss: 0.19147396087646484
epoch: 76 training_loss 0.16572958201169968 test_loss: 0.19903843402862548
epoch: 77 training_loss 0.16922692432999611 test_loss: 0.18456448316574098
epoch: 78 training_loss 0.17182351924479008 test_loss: 0.19165678024291993
epoch: 79 training_loss 0.17762958370149134 test_loss: 0.1895349144935608
epoch: 80 training_loss 0.170677854642272 test_loss: 0.18779327869415283
epoch: 81 training_loss 0.16905576825141908 test_loss: 0.1725286602973938
epoch: 82 training_loss 0.1678700664639473 test_loss: 0.1803287148475647
epoch: 83 training_loss 0.16995287001132964 test_loss: 0.19422494173049926
epoch: 84 training_loss 0.16808189913630486 test_loss: 0.19275221824645997
epoch: 85 training_loss 0.17141821667551993 test_loss: 0.19777131080627441
epoch: 86 training_loss 0.17157787561416626 test_loss: 0.18204678297042848
epoch: 87 training_loss 0.16738428071141243 test_loss: 0.18389312028884888
epoch: 88 training_loss 0.1718818285316229 test_loss: 0.18274333477020263
epoch: 89 training_loss 0.16905421793460845 test_loss: 0.18186254501342775
epoch: 90 training_loss 0.16832891024649144 test_loss: 0.2032595157623291
epoch: 91 training_loss 0.17392041347920895 test_loss: 0.19264895915985109
epoch: 92 training_loss 0.1676528762280941 test_loss: 0.19381260871887207
epoch: 93 training_loss 0.17469931349158288 test_loss: 0.18623250722885132
epoch: 94 training_loss 0.16297927282750607 test_loss: 0.1834725856781006
epoch: 95 training_loss 0.17265609137713908 test_loss: 0.18033580780029296
epoch: 96 training_loss 0.16909768916666507 test_loss: 0.18970757722854614
epoch: 97 training_loss 0.16955642521381378 test_loss: 0.18302505016326903
epoch: 98 training_loss 0.1733532102406025 test_loss: 0.19367848634719848
epoch: 99 training_loss 0.16249670706689356 test_loss: 0.18737648725509642
epoch: 100 training_loss 0.1698021137714386 test_loss: 0.18638616800308228
epoch: 101 training_loss 0.16712146878242493 test_loss: 0.18199561834335326
epoch: 102 training_loss 0.16741822861135006 test_loss: 0.18326158523559571
epoch: 103 training_loss 0.16842240549623966 test_loss: 0.1797144055366516
epoch: 104 training_loss 0.16881524369120599 test_loss: 0.18343355655670165
epoch: 105 training_loss 0.16702167689800262 test_loss: 0.17777178287506104
epoch: 106 training_loss 0.16235769018530846 test_loss: 0.1926816940307617
epoch: 107 training_loss 0.1672395420819521 test_loss: 0.18964468240737914
epoch: 108 training_loss 0.16056765891611577 test_loss: 0.19620921611785888
epoch: 109 training_loss 0.16837204232811928 test_loss: 0.1884131908416748
epoch: 110 training_loss 0.17150022871792317 test_loss: 0.18679027557373046
epoch: 111 training_loss 0.17252848200500012 test_loss: 0.17054518461227416
epoch: 112 training_loss 0.16197623930871485 test_loss: 0.17904623746871948
epoch: 113 training_loss 0.16199855588376522 test_loss: 0.19072916507720947
epoch: 114 training_loss 0.17017214231193065 test_loss: 0.1955207109451294
epoch: 115 training_loss 0.17105390191078185 test_loss: 0.18860223293304443
epoch: 116 training_loss 0.1668237615376711 test_loss: 0.1950618863105774
epoch: 117 training_loss 0.17303839921951295 test_loss: 0.20766222476959229
epoch: 118 training_loss 0.16583851851522924 test_loss: 0.18362975120544434
epoch: 119 training_loss 0.1638859359920025 test_loss: 0.18570642471313475
epoch: 120 training_loss 0.16705468390136957 test_loss: 0.18329373598098755
epoch: 121 training_loss 0.17167100206017494 test_loss: 0.1915619730949402
epoch: 122 training_loss 0.1675638161599636 test_loss: 0.1870458245277405
epoch: 123 training_loss 0.16425438694655894 test_loss: 0.18889665603637695
epoch: 124 training_loss 0.16236990690231323 test_loss: 0.1795525908470154
epoch: 125 training_loss 0.172703240737319 test_loss: 0.19098148345947266
epoch: 126 training_loss 0.16470598556101324 test_loss: 0.19039437770843506
epoch: 127 training_loss 0.16658152975142002 test_loss: 0.19451335668563843
epoch: 128 training_loss 0.16636087618768214 test_loss: 0.18908512592315674
epoch: 129 training_loss 0.17349011555314064 test_loss: 0.18759483098983765
epoch: 130 training_loss 0.1673623999208212 test_loss: 0.1781360149383545
epoch: 131 training_loss 0.16575331367552282 test_loss: 0.18610693216323854
epoch: 132 training_loss 0.16792509719729423 test_loss: 0.19556281566619874
epoch: 133 training_loss 0.16389133594930172 test_loss: 0.17816662788391113
epoch: 134 training_loss 0.1641915522515774 test_loss: 0.19009937047958375
epoch: 135 training_loss 0.17037114441394807 test_loss: 0.17867969274520873
epoch: 136 training_loss 0.16673225946724415 test_loss: 0.18307597637176515
epoch: 137 training_loss 0.16839993074536325 test_loss: 0.19722933769226075
epoch: 138 training_loss 0.17309402041137217 test_loss: 0.18367644548416137
epoch: 139 training_loss 0.16517932012677192 test_loss: 0.193406081199646
epoch: 140 training_loss 0.16738297361880541 test_loss: 0.18213196992874145
epoch: 141 training_loss 0.16915409583598376 test_loss: 0.18441531658172608
epoch: 142 training_loss 0.16949358746409415 test_loss: 0.20263028144836426
epoch: 143 training_loss 0.16523343876004218 test_loss: 0.1781318187713623
epoch: 144 training_loss 0.1653901367634535 test_loss: 0.19351345300674438
epoch: 145 training_loss 0.16353310089558362 test_loss: 0.192945659160614
epoch: 146 training_loss 0.16918049052357673 test_loss: 0.1857288122177124
epoch: 147 training_loss 0.1685228180140257 test_loss: 0.19019228219985962
epoch: 148 training_loss 0.1670683651417494 test_loss: 0.1825541377067566
epoch: 149 training_loss 0.16474306613206863 test_loss: 0.18533625602722167
epoch: 0 training_loss 8.77560321331024 test_loss: 5.357721328735352
epoch: 1 training_loss 4.018050594329834 test_loss: 3.1274858474731446
epoch: 2 training_loss 2.5625799119472505 test_loss: 2.231851005554199
epoch: 3 training_loss 2.02666597366333 test_loss: 1.8293281555175782
epoch: 4 training_loss 1.7158132529258727 test_loss: 1.6347795486450196
epoch: 5 training_loss 1.5348937892913819 test_loss: 1.4948615074157714
epoch: 6 training_loss 1.435947368144989 test_loss: 1.3994600296020507
epoch: 7 training_loss 1.3422752797603608 test_loss: 1.335338020324707
epoch: 8 training_loss 1.2762891268730163 test_loss: 1.2308466911315918
epoch: 9 training_loss 1.2179848265647888 test_loss: 1.2113447189331055
epoch: 10 training_loss 1.1606589752435683 test_loss: 1.1207704544067383
epoch: 11 training_loss 1.1130687576532363 test_loss: 1.0830533027648925
epoch: 12 training_loss 1.071761880517006 test_loss: 1.0844293594360352
epoch: 13 training_loss 1.0581201034784318 test_loss: 1.0321922302246094
epoch: 14 training_loss 1.0146153795719146 test_loss: 1.00158748626709
epoch: 15 training_loss 0.9759708285331726 test_loss: 0.9650709152221679
epoch: 16 training_loss 0.9546911746263504 test_loss: 0.946647834777832
epoch: 17 training_loss 0.9353340625762939 test_loss: 0.9156636238098145
epoch: 18 training_loss 0.9007149982452393 test_loss: 0.8846442222595214
epoch: 19 training_loss 0.887382509112358 test_loss: 0.878366756439209
epoch: 20 training_loss 0.8714729064702987 test_loss: 0.8572043418884278
epoch: 21 training_loss 0.8646894466876983 test_loss: 0.8379302024841309
epoch: 22 training_loss 0.8461284530162811 test_loss: 0.9003703117370605
epoch: 23 training_loss 0.8258589977025985 test_loss: 0.8170835494995117
epoch: 24 training_loss 0.8058795213699341 test_loss: 0.7827788829803467
epoch: 25 training_loss 0.7885990107059478 test_loss: 0.7927143573760986
epoch: 26 training_loss 0.770910250544548 test_loss: 0.8188995361328125
epoch: 27 training_loss 0.7788284486532211 test_loss: 0.7659825801849365
epoch: 28 training_loss 0.7474720221757889 test_loss: 0.772831392288208
epoch: 29 training_loss 0.7561916142702103 test_loss: 0.7730693817138672
epoch: 30 training_loss 0.7588947552442551 test_loss: 0.7484522819519043
epoch: 31 training_loss 0.7376061689853668 test_loss: 0.7230640888214112
epoch: 32 training_loss 0.7180685126781463 test_loss: 0.7317177772521972
epoch: 33 training_loss 0.7168796324729919 test_loss: 0.7584067344665527
epoch: 34 training_loss 0.7081283068656922 test_loss: 0.6951714992523194
epoch: 35 training_loss 0.699705468416214 test_loss: 0.7080926418304443
epoch: 36 training_loss 0.6927938419580459 test_loss: 0.7063923358917237
epoch: 37 training_loss 0.7067301869392395 test_loss: 0.6929686546325684
epoch: 38 training_loss 0.6841463100910187 test_loss: 0.6815576076507568
epoch: 39 training_loss 0.6817787301540374 test_loss: 0.6802376270294189
epoch: 40 training_loss 0.6805153292417526 test_loss: 0.68656325340271
epoch: 41 training_loss 0.6704886722564697 test_loss: 0.6729274749755859
epoch: 42 training_loss 0.6598968422412872 test_loss: 0.664320707321167
epoch: 43 training_loss 0.6502610808610916 test_loss: 0.6526394367218018
epoch: 44 training_loss 0.6441632300615311 test_loss: 0.6540470123291016
epoch: 45 training_loss 0.6481474065780639 test_loss: 0.6618781089782715
epoch: 46 training_loss 0.6442128163576126 test_loss: 0.6789445877075195
epoch: 47 training_loss 0.6415254080295563 test_loss: 0.6495532512664794
epoch: 48 training_loss 0.6452183997631074 test_loss: 0.6589666843414307
epoch: 49 training_loss 0.6340908336639405 test_loss: 0.6338289737701416
epoch: 50 training_loss 0.6449409568309784 test_loss: 0.633177661895752
epoch: 51 training_loss 0.6299965518712998 test_loss: 0.6293241500854492
epoch: 52 training_loss 0.6234595626592636 test_loss: 0.6311740398406982
epoch: 53 training_loss 0.6202825528383255 test_loss: 0.6331346511840821
epoch: 54 training_loss 0.6261269855499267 test_loss: 0.6224661350250245
epoch: 55 training_loss 0.6155068105459214 test_loss: 0.6186789035797119
epoch: 56 training_loss 0.6084409517049789 test_loss: 0.6094447135925293
epoch: 57 training_loss 0.61406081199646 test_loss: 0.6071581363677978
epoch: 58 training_loss 0.6008829957246781 test_loss: 0.600636625289917
epoch: 59 training_loss 0.600746283531189 test_loss: 0.6306480884552002
epoch: 60 training_loss 0.5988979887962341 test_loss: 0.6124038696289062
epoch: 61 training_loss 0.5944891065359116 test_loss: 0.5824907302856446
epoch: 62 training_loss 0.5933930534124374 test_loss: 0.5906902313232422
epoch: 63 training_loss 0.5845144048333168 test_loss: 0.6100724220275879
epoch: 64 training_loss 0.5874868720769882 test_loss: 0.5796041011810302
epoch: 65 training_loss 0.5867799711227417 test_loss: 0.6232054233551025
epoch: 66 training_loss 0.5808854937553406 test_loss: 0.6021453380584717
epoch: 67 training_loss 0.5765897354483605 test_loss: 0.5705393314361572
epoch: 68 training_loss 0.5863906154036522 test_loss: 0.5903407096862793
epoch: 69 training_loss 0.5731338185071945 test_loss: 0.5707494258880615
epoch: 70 training_loss 0.5772917062044144 test_loss: 0.5870685577392578
epoch: 71 training_loss 0.5654559105634689 test_loss: 0.5653068542480468
epoch: 72 training_loss 0.570230768918991 test_loss: 0.6008677005767822
epoch: 73 training_loss 0.5636624249815941 test_loss: 0.5724925041198731
epoch: 74 training_loss 0.5657011538743972 test_loss: 0.564254903793335
epoch: 75 training_loss 0.5834722918272018 test_loss: 0.5919261932373047
epoch: 76 training_loss 0.5579925718903541 test_loss: 0.5700820446014404
epoch: 77 training_loss 0.55471744120121 test_loss: 0.563871431350708
epoch: 78 training_loss 0.5563319292664528 test_loss: 0.5653660774230957
epoch: 79 training_loss 0.5526574105024338 test_loss: 0.5637152194976807
epoch: 80 training_loss 0.5551260045170784 test_loss: 0.5568164348602295
epoch: 81 training_loss 0.5602305296063423 test_loss: 0.5356461048126221
epoch: 82 training_loss 0.5442578291893005 test_loss: 0.5764474391937255
epoch: 83 training_loss 0.5491863435506821 test_loss: 0.5381661891937256
epoch: 84 training_loss 0.536861675977707 test_loss: 0.5489863395690918
epoch: 85 training_loss 0.5376827719807625 test_loss: 0.5438313484191895
epoch: 86 training_loss 0.5370054838061332 test_loss: 0.5408662796020508
epoch: 87 training_loss 0.5427737933397293 test_loss: 0.5468840599060059
epoch: 88 training_loss 0.5399510762095452 test_loss: 0.5501847267150879
epoch: 89 training_loss 0.5405702060461044 test_loss: 0.5605189323425293
epoch: 90 training_loss 0.5358871239423751 test_loss: 0.543232774734497
epoch: 91 training_loss 0.5363345545530319 test_loss: 0.5336747169494629
epoch: 92 training_loss 0.5261207684874535 test_loss: 0.5261567592620849
epoch: 93 training_loss 0.5417953556776047 test_loss: 0.5407177925109863
epoch: 94 training_loss 0.5384887373447418 test_loss: 0.5269426345825196
epoch: 95 training_loss 0.5340408930182456 test_loss: 0.5520674228668213
epoch: 96 training_loss 0.5317607617378235 test_loss: 0.5462918758392334
epoch: 97 training_loss 0.5299000215530395 test_loss: 0.5263876914978027
epoch: 98 training_loss 0.5297080406546593 test_loss: 0.5202598094940185
epoch: 99 training_loss 0.5222826939821243 test_loss: 0.5520508766174317
epoch: 100 training_loss 0.5167068496346474 test_loss: 0.5418488025665283
epoch: 101 training_loss 0.5201769289374352 test_loss: 0.5285914421081543
epoch: 102 training_loss 0.5249089768528938 test_loss: 0.529469108581543
epoch: 103 training_loss 0.5259508761763573 test_loss: 0.5475590705871582
epoch: 104 training_loss 0.519484452009201 test_loss: 0.5432634353637695
epoch: 105 training_loss 0.5175508806109428 test_loss: 0.5214149951934814
epoch: 106 training_loss 0.513002969622612 test_loss: 0.5392127990722656
epoch: 107 training_loss 0.5186961528658867 test_loss: 0.5130434513092041
epoch: 108 training_loss 0.5197438907623291 test_loss: 0.5384893417358398
epoch: 109 training_loss 0.5191893079876899 test_loss: 0.5164184093475341
epoch: 110 training_loss 0.5246452975273133 test_loss: 0.5218699932098388
epoch: 111 training_loss 0.5122059750556945 test_loss: 0.5160473823547364
epoch: 112 training_loss 0.5103069043159485 test_loss: 0.500376844406128
epoch: 113 training_loss 0.5101199468970299 test_loss: 0.5215934753417969
epoch: 114 training_loss 0.5097092705965042 test_loss: 0.5112502098083496
epoch: 115 training_loss 0.5123635971546173 test_loss: 0.5058223724365234
epoch: 116 training_loss 0.5069683900475502 test_loss: 0.5158034801483155
epoch: 117 training_loss 0.511160337626934 test_loss: 0.5013490200042725
epoch: 118 training_loss 0.5168508386611939 test_loss: 0.5563605308532715
epoch: 119 training_loss 0.5068539750576019 test_loss: 0.5172536849975586
epoch: 120 training_loss 0.5157988592982292 test_loss: 0.5228652954101562
epoch: 121 training_loss 0.5004630401730538 test_loss: 0.5250984191894531
epoch: 122 training_loss 0.5017723739147186 test_loss: 0.5050458908081055
epoch: 123 training_loss 0.5074243110418319 test_loss: 0.5039326667785644
epoch: 124 training_loss 0.4982301405072212 test_loss: 0.5061571598052979
epoch: 125 training_loss 0.49624169528484346 test_loss: 0.49120068550109863
epoch: 126 training_loss 0.5044465965032577 test_loss: 0.5161491870880127
epoch: 127 training_loss 0.5033045345544815 test_loss: 0.5148941516876221
epoch: 128 training_loss 0.49392337769269945 test_loss: 0.4860652446746826
epoch: 129 training_loss 0.5062601533532143 test_loss: 0.4891796588897705
epoch: 130 training_loss 0.49611065834760665 test_loss: 0.49018406867980957
epoch: 131 training_loss 0.49171765059232714 test_loss: 0.4996196746826172
epoch: 132 training_loss 0.4932020020484924 test_loss: 0.5049698829650879
epoch: 133 training_loss 0.49689730554819106 test_loss: 0.5149542808532714
epoch: 134 training_loss 0.49395365864038465 test_loss: 0.4974623680114746
epoch: 135 training_loss 0.49096485137939455 test_loss: 0.4882761001586914
epoch: 136 training_loss 0.4942043909430504 test_loss: 0.4929506301879883
epoch: 137 training_loss 0.49012977778911593 test_loss: 0.5037621021270752
epoch: 138 training_loss 0.4832230520248413 test_loss: 0.4970224857330322
epoch: 139 training_loss 0.4920935359597206 test_loss: 0.4783937931060791
epoch: 140 training_loss 0.4880943423509598 test_loss: 0.5430593967437745
epoch: 141 training_loss 0.4939259934425354 test_loss: 0.5097139358520508
epoch: 142 training_loss 0.4926100736856461 test_loss: 0.49852190017700193
epoch: 143 training_loss 0.499684134721756 test_loss: 0.49079217910766604
epoch: 144 training_loss 0.4883274528384209 test_loss: 0.4971770286560059
epoch: 145 training_loss 0.48038678914308547 test_loss: 0.4845928192138672
epoch: 146 training_loss 0.4880855083465576 test_loss: 0.4909506320953369
epoch: 147 training_loss 0.49318321913480756 test_loss: 0.5000243186950684
epoch: 148 training_loss 0.48312800377607346 test_loss: 0.47775955200195314
epoch: 149 training_loss 0.477265904545784 test_loss: 0.4799307346343994
2030.2802052227441
episode: 0 training return: tensor(-406.5630, device='cuda:0')
episode: 1 training return: tensor(-24.0526, device='cuda:0')
episode: 2 training return: tensor(-211.7203, device='cuda:0')
episode: 3 training return: tensor(-396.2487, device='cuda:0')
epoch: 1 test_true_pfm: 1373.858796464903 sim_pfm: -403.4567620079033
episode: 4 training return: tensor(-116.9732, device='cuda:0')
episode: 5 training return: tensor(-185.4022, device='cuda:0')
episode: 6 training return: tensor(-395.3646, device='cuda:0')
episode: 7 training return: tensor(-454.5380, device='cuda:0')
epoch: 2 test_true_pfm: 1843.343495348881 sim_pfm: -264.7205529033187
episode: 8 training return: tensor(64.8147, device='cuda:0')
episode: 9 training return: tensor(-27.6678, device='cuda:0')
episode: 10 training return: tensor(-278.5771, device='cuda:0')
episode: 11 training return: tensor(-274.9633, device='cuda:0')
epoch: 3 test_true_pfm: 2180.16104569953 sim_pfm: 13.184040398259336
episode: 12 training return: tensor(38.8596, device='cuda:0')
episode: 13 training return: tensor(-321.5556, device='cuda:0')
episode: 14 training return: tensor(-211.7776, device='cuda:0')
episode: 15 training return: tensor(-332.6602, device='cuda:0')
epoch: 4 test_true_pfm: 1880.8598895834255 sim_pfm: -379.1285401477362
episode: 16 training return: tensor(-111.9212, device='cuda:0')
episode: 17 training return: tensor(-177.5249, device='cuda:0')
episode: 18 training return: tensor(-225.0842, device='cuda:0')
episode: 19 training return: tensor(58.1992, device='cuda:0')
epoch: 5 test_true_pfm: 1378.8647085056111 sim_pfm: -438.9743198935951
episode: 20 training return: tensor(-388.4859, device='cuda:0')
episode: 21 training return: tensor(-226.5161, device='cuda:0')
episode: 22 training return: tensor(-151.3839, device='cuda:0')
episode: 23 training return: tensor(-207.0402, device='cuda:0')
epoch: 6 test_true_pfm: 1300.9986298530964 sim_pfm: -422.18790051700006
episode: 24 training return: tensor(-367.4219, device='cuda:0')
episode: 25 training return: tensor(156.4182, device='cuda:0')
episode: 26 training return: tensor(-230.3434, device='cuda:0')
episode: 27 training return: tensor(-437.7578, device='cuda:0')
epoch: 7 test_true_pfm: 2007.7604509651112 sim_pfm: -324.50214945627766
episode: 28 training return: tensor(-402.1385, device='cuda:0')
episode: 29 training return: tensor(-311.1845, device='cuda:0')
episode: 30 training return: tensor(-272.5559, device='cuda:0')
episode: 31 training return: tensor(-401.7665, device='cuda:0')
epoch: 8 test_true_pfm: 1406.5178272278354 sim_pfm: -435.0829288410799
episode: 32 training return: tensor(-200.2836, device='cuda:0')
episode: 33 training return: tensor(-451.7484, device='cuda:0')
episode: 34 training return: tensor(-435.7794, device='cuda:0')
episode: 35 training return: tensor(26.3745, device='cuda:0')
epoch: 9 test_true_pfm: 1360.0273508382327 sim_pfm: -428.2214326793037
episode: 36 training return: tensor(19.5245, device='cuda:0')
episode: 37 training return: tensor(-182.3128, device='cuda:0')
episode: 38 training return: tensor(-390.0354, device='cuda:0')
episode: 39 training return: tensor(-48.0092, device='cuda:0')
epoch: 10 test_true_pfm: 2016.7863154899796 sim_pfm: -368.8623191496202
episode: 40 training return: tensor(-323.4682, device='cuda:0')
episode: 41 training return: tensor(-402.1446, device='cuda:0')
episode: 42 training return: tensor(-457.4783, device='cuda:0')
episode: 43 training return: tensor(-447.1820, device='cuda:0')
epoch: 11 test_true_pfm: 1356.7825504825637 sim_pfm: -423.8140928755747
episode: 44 training return: tensor(-399.2635, device='cuda:0')
episode: 45 training return: tensor(-368.2468, device='cuda:0')
episode: 46 training return: tensor(-436.3033, device='cuda:0')
episode: 47 training return: tensor(-411.6730, device='cuda:0')
epoch: 12 test_true_pfm: 1605.0275912610848 sim_pfm: -373.7934647154664
episode: 48 training return: tensor(-365.1320, device='cuda:0')
episode: 49 training return: tensor(-432.9498, device='cuda:0')
episode: 50 training return: tensor(-103.9194, device='cuda:0')
episode: 51 training return: tensor(-213.3373, device='cuda:0')
epoch: 13 test_true_pfm: 2306.203783188706 sim_pfm: -127.30065451647776
episode: 52 training return: tensor(232.9583, device='cuda:0')
episode: 53 training return: tensor(-231.4014, device='cuda:0')
episode: 54 training return: tensor(-342.7415, device='cuda:0')
episode: 55 training return: tensor(-307.9321, device='cuda:0')
epoch: 14 test_true_pfm: 2491.6409221780364 sim_pfm: -137.94404825748643
episode: 56 training return: tensor(-409.2117, device='cuda:0')
episode: 57 training return: tensor(-406.4864, device='cuda:0')
episode: 58 training return: tensor(-394.9412, device='cuda:0')
episode: 59 training return: tensor(-220.0736, device='cuda:0')
epoch: 15 test_true_pfm: 1857.2075704198953 sim_pfm: -184.58525389130227
episode: 60 training return: tensor(-211.1762, device='cuda:0')
episode: 61 training return: tensor(-451.8398, device='cuda:0')
episode: 62 training return: tensor(-322.0258, device='cuda:0')
episode: 63 training return: tensor(-365.1657, device='cuda:0')
epoch: 16 test_true_pfm: 2075.274427331159 sim_pfm: -195.8271916361215
episode: 64 training return: tensor(-38.6967, device='cuda:0')
episode: 65 training return: tensor(-443.0857, device='cuda:0')
episode: 66 training return: tensor(-273.3758, device='cuda:0')
episode: 67 training return: tensor(-400.0432, device='cuda:0')
epoch: 17 test_true_pfm: 2169.378059368541 sim_pfm: -72.11833506991388
episode: 68 training return: tensor(-269.3052, device='cuda:0')
episode: 69 training return: tensor(-207.5396, device='cuda:0')
episode: 70 training return: tensor(-364.6624, device='cuda:0')
episode: 71 training return: tensor(-369.1550, device='cuda:0')
epoch: 18 test_true_pfm: 2186.35360692737 sim_pfm: 3.2771168329539555
episode: 72 training return: tensor(-389.5776, device='cuda:0')
episode: 73 training return: tensor(-398.6943, device='cuda:0')
episode: 74 training return: tensor(-116.7117, device='cuda:0')
episode: 75 training return: tensor(92.8170, device='cuda:0')
epoch: 19 test_true_pfm: 2384.2994910884713 sim_pfm: -185.47343693724056
episode: 76 training return: tensor(-396.1378, device='cuda:0')
episode: 77 training return: tensor(-364.9753, device='cuda:0')
episode: 78 training return: tensor(186.4981, device='cuda:0')
episode: 79 training return: tensor(-399.1389, device='cuda:0')
epoch: 20 test_true_pfm: 2589.4655337546096 sim_pfm: -141.44482110068202
episode: 80 training return: tensor(-357.4907, device='cuda:0')
episode: 81 training return: tensor(-361.4098, device='cuda:0')
episode: 82 training return: tensor(-19.3994, device='cuda:0')
episode: 83 training return: tensor(-189.4093, device='cuda:0')
epoch: 21 test_true_pfm: 2294.9455584396583 sim_pfm: -34.201139503507875
episode: 84 training return: tensor(-142.7408, device='cuda:0')
episode: 85 training return: tensor(-412.3353, device='cuda:0')
episode: 86 training return: tensor(-161.2447, device='cuda:0')
episode: 87 training return: tensor(-366.7015, device='cuda:0')
epoch: 22 test_true_pfm: 1695.6051468993762 sim_pfm: -293.48133471793454
episode: 88 training return: tensor(-355.9477, device='cuda:0')
episode: 89 training return: tensor(-28.9970, device='cuda:0')
episode: 90 training return: tensor(-414.2444, device='cuda:0')
episode: 91 training return: tensor(-412.1921, device='cuda:0')
epoch: 23 test_true_pfm: 2188.2758413449246 sim_pfm: -113.3871717361714
episode: 92 training return: tensor(76.2860, device='cuda:0')
episode: 93 training return: tensor(-192.2039, device='cuda:0')
episode: 94 training return: tensor(-115.7837, device='cuda:0')
episode: 95 training return: tensor(-228.9136, device='cuda:0')
epoch: 24 test_true_pfm: 2596.9769371470875 sim_pfm: 19.950766831617027
episode: 96 training return: tensor(-221.3651, device='cuda:0')
episode: 97 training return: tensor(95.9658, device='cuda:0')
episode: 98 training return: tensor(-429.5963, device='cuda:0')
episode: 99 training return: tensor(-359.3180, device='cuda:0')
epoch: 25 test_true_pfm: 2185.538985565306 sim_pfm: -92.8491607028991
episode: 100 training return: tensor(-272.2068, device='cuda:0')
episode: 101 training return: tensor(-402.7268, device='cuda:0')
episode: 102 training return: tensor(-407.0149, device='cuda:0')
episode: 103 training return: tensor(192.2042, device='cuda:0')
epoch: 26 test_true_pfm: 2361.5878198586947 sim_pfm: 175.59386437112698
episode: 104 training return: tensor(-448.4132, device='cuda:0')
episode: 105 training return: tensor(-221.3910, device='cuda:0')
episode: 106 training return: tensor(-404.4705, device='cuda:0')
episode: 107 training return: tensor(-285.3244, device='cuda:0')
epoch: 27 test_true_pfm: 2276.885345354507 sim_pfm: 67.92522588932964
episode: 108 training return: tensor(-432.1147, device='cuda:0')
episode: 109 training return: tensor(-407.5258, device='cuda:0')
episode: 110 training return: tensor(-362.1049, device='cuda:0')
episode: 111 training return: tensor(-291.2114, device='cuda:0')
epoch: 28 test_true_pfm: 2392.4221915124504 sim_pfm: -59.789688563963864
episode: 112 training return: tensor(-126.1494, device='cuda:0')
episode: 113 training return: tensor(-199.7516, device='cuda:0')
episode: 114 training return: tensor(61.6208, device='cuda:0')
episode: 115 training return: tensor(-408.5943, device='cuda:0')
epoch: 29 test_true_pfm: 2154.8786641662987 sim_pfm: -172.8255531390702
episode: 116 training return: tensor(-403.8030, device='cuda:0')
episode: 117 training return: tensor(-401.5260, device='cuda:0')
episode: 118 training return: tensor(-47.0887, device='cuda:0')
episode: 119 training return: tensor(-358.8634, device='cuda:0')
epoch: 30 test_true_pfm: 2600.6950672922576 sim_pfm: -189.8381159073227
episode: 120 training return: tensor(-67.5891, device='cuda:0')
episode: 121 training return: tensor(-279.0621, device='cuda:0')
episode: 122 training return: tensor(-446.6649, device='cuda:0')
episode: 123 training return: tensor(-393.5182, device='cuda:0')
epoch: 31 test_true_pfm: 2313.160909676409 sim_pfm: 55.37463002024257
episode: 124 training return: tensor(-394.9886, device='cuda:0')
episode: 125 training return: tensor(-395.6477, device='cuda:0')
episode: 126 training return: tensor(-310.6252, device='cuda:0')
episode: 127 training return: tensor(-317.0067, device='cuda:0')
epoch: 32 test_true_pfm: 2682.8607497480234 sim_pfm: -121.57095038441669
episode: 128 training return: tensor(-406.2547, device='cuda:0')
episode: 129 training return: tensor(-210.8452, device='cuda:0')
episode: 130 training return: tensor(-379.4120, device='cuda:0')
episode: 131 training return: tensor(-120.6750, device='cuda:0')
epoch: 33 test_true_pfm: 2060.0910795370614 sim_pfm: -27.239301605363533
episode: 132 training return: tensor(-391.6994, device='cuda:0')
episode: 133 training return: tensor(-402.6404, device='cuda:0')
episode: 134 training return: tensor(-198.6925, device='cuda:0')
episode: 135 training return: tensor(-13.5113, device='cuda:0')
epoch: 34 test_true_pfm: 1959.51267333989 sim_pfm: 3.6918598538807905
episode: 136 training return: tensor(-396.2125, device='cuda:0')
episode: 137 training return: tensor(-402.3141, device='cuda:0')
episode: 138 training return: tensor(-196.6224, device='cuda:0')
episode: 139 training return: tensor(-394.6157, device='cuda:0')
epoch: 35 test_true_pfm: 2502.565736246572 sim_pfm: -122.65478712892703
episode: 140 training return: tensor(-394.2518, device='cuda:0')
episode: 141 training return: tensor(-388.6676, device='cuda:0')
episode: 142 training return: tensor(-381.7980, device='cuda:0')
episode: 143 training return: tensor(-246.7802, device='cuda:0')
epoch: 36 test_true_pfm: 2599.0363064477556 sim_pfm: -21.610417865895823
episode: 144 training return: tensor(-408.7893, device='cuda:0')
episode: 145 training return: tensor(-137.8853, device='cuda:0')
episode: 146 training return: tensor(-310.3935, device='cuda:0')
episode: 147 training return: tensor(232.0129, device='cuda:0')
epoch: 37 test_true_pfm: 2690.295227610663 sim_pfm: 39.98026858617474
episode: 148 training return: tensor(-210.6126, device='cuda:0')
episode: 149 training return: tensor(-109.9530, device='cuda:0')
episode: 150 training return: tensor(-220.8416, device='cuda:0')
episode: 151 training return: tensor(-26.1917, device='cuda:0')
epoch: 38 test_true_pfm: 2622.8043190464045 sim_pfm: -11.053455098764971
episode: 152 training return: tensor(-372.4693, device='cuda:0')
episode: 153 training return: tensor(-396.9983, device='cuda:0')
episode: 154 training return: tensor(-370.2110, device='cuda:0')
episode: 155 training return: tensor(-386.6556, device='cuda:0')
epoch: 39 test_true_pfm: 2289.9821133376627 sim_pfm: -157.36318525431366
episode: 156 training return: tensor(-322.2117, device='cuda:0')
episode: 157 training return: tensor(-409.6924, device='cuda:0')
episode: 158 training return: tensor(-404.8398, device='cuda:0')
episode: 159 training return: tensor(-163.2664, device='cuda:0')
epoch: 40 test_true_pfm: 2332.3785195654314 sim_pfm: -160.22790817931914
episode: 160 training return: tensor(-409.2045, device='cuda:0')
episode: 161 training return: tensor(-380.3541, device='cuda:0')
episode: 162 training return: tensor(-132.1858, device='cuda:0')
episode: 163 training return: tensor(-112.3770, device='cuda:0')
epoch: 41 test_true_pfm: 2152.1977253080986 sim_pfm: 62.81457510625478
episode: 164 training return: tensor(-393.3547, device='cuda:0')
episode: 165 training return: tensor(-367.6240, device='cuda:0')
episode: 166 training return: tensor(-295.9027, device='cuda:0')
episode: 167 training return: tensor(-104.9107, device='cuda:0')
epoch: 42 test_true_pfm: 2668.039123393116 sim_pfm: -150.6140478311378
episode: 168 training return: tensor(-228.1695, device='cuda:0')
episode: 169 training return: tensor(-336.4052, device='cuda:0')
episode: 170 training return: tensor(-315.0853, device='cuda:0')
episode: 171 training return: tensor(-389.2667, device='cuda:0')
epoch: 43 test_true_pfm: 2584.331485694886 sim_pfm: 75.57526454682618
episode: 172 training return: tensor(-104.3932, device='cuda:0')
episode: 173 training return: tensor(-210.8759, device='cuda:0')
episode: 174 training return: tensor(147.3283, device='cuda:0')
episode: 175 training return: tensor(-390.4361, device='cuda:0')
epoch: 44 test_true_pfm: 3311.47324193442 sim_pfm: 19.614419677255984
episode: 176 training return: tensor(-279.2342, device='cuda:0')
episode: 177 training return: tensor(-22.0031, device='cuda:0')
episode: 178 training return: tensor(-376.6384, device='cuda:0')
episode: 179 training return: tensor(-236.8683, device='cuda:0')
epoch: 45 test_true_pfm: 3045.9156125516165 sim_pfm: -208.05411786667537
episode: 180 training return: tensor(-137.6383, device='cuda:0')
episode: 181 training return: tensor(-422.6689, device='cuda:0')
episode: 182 training return: tensor(-142.0556, device='cuda:0')
episode: 183 training return: tensor(-55.8441, device='cuda:0')
epoch: 46 test_true_pfm: 2311.8460215822106 sim_pfm: 8.135091346300518
episode: 184 training return: tensor(-332.4737, device='cuda:0')
episode: 185 training return: tensor(-367.0329, device='cuda:0')
episode: 186 training return: tensor(-357.6449, device='cuda:0')
episode: 187 training return: tensor(-386.1115, device='cuda:0')
epoch: 47 test_true_pfm: 2261.6185404857442 sim_pfm: -17.23191099766215
episode: 188 training return: tensor(-375.9705, device='cuda:0')
episode: 189 training return: tensor(-400.8587, device='cuda:0')
episode: 190 training return: tensor(-453.0434, device='cuda:0')
episode: 191 training return: tensor(-377.1226, device='cuda:0')
epoch: 48 test_true_pfm: 2335.851864865062 sim_pfm: -14.94693782896502
episode: 192 training return: tensor(-65.0610, device='cuda:0')
episode: 193 training return: tensor(-453.1898, device='cuda:0')
episode: 194 training return: tensor(-445.9167, device='cuda:0')
episode: 195 training return: tensor(-397.8967, device='cuda:0')
epoch: 49 test_true_pfm: 2449.5988296871224 sim_pfm: -72.47409310160826
episode: 196 training return: tensor(-269.6003, device='cuda:0')
episode: 197 training return: tensor(-278.7217, device='cuda:0')
episode: 198 training return: tensor(-271.3527, device='cuda:0')
episode: 199 training return: tensor(-331.5683, device='cuda:0')
epoch: 50 test_true_pfm: 2300.8114220870225 sim_pfm: 45.57863128182362
episode: 200 training return: tensor(-164.1209, device='cuda:0')
episode: 201 training return: tensor(-217.8917, device='cuda:0')
episode: 202 training return: tensor(-397.9233, device='cuda:0')
episode: 203 training return: tensor(-26.7659, device='cuda:0')
epoch: 51 test_true_pfm: 2792.4022773480506 sim_pfm: -140.82066172908526
episode: 204 training return: tensor(-120.5069, device='cuda:0')
episode: 205 training return: tensor(-393.9377, device='cuda:0')
episode: 206 training return: tensor(-12.3838, device='cuda:0')
episode: 207 training return: tensor(-387.5318, device='cuda:0')
epoch: 52 test_true_pfm: 2920.313187329041 sim_pfm: -49.411066524412796
episode: 208 training return: tensor(-400.8216, device='cuda:0')
episode: 209 training return: tensor(-206.3706, device='cuda:0')
episode: 210 training return: tensor(-386.4445, device='cuda:0')
episode: 211 training return: tensor(-15.6360, device='cuda:0')
epoch: 53 test_true_pfm: 2270.4326131910548 sim_pfm: -97.66003679725691
episode: 212 training return: tensor(-392.3828, device='cuda:0')
episode: 213 training return: tensor(-133.0042, device='cuda:0')
episode: 214 training return: tensor(-384.6429, device='cuda:0')
episode: 215 training return: tensor(-397.1594, device='cuda:0')
epoch: 54 test_true_pfm: 2646.1574839928667 sim_pfm: -40.67333377499987
episode: 216 training return: tensor(-206.8046, device='cuda:0')
episode: 217 training return: tensor(181.2137, device='cuda:0')
episode: 218 training return: tensor(-405.1038, device='cuda:0')
episode: 219 training return: tensor(-398.8752, device='cuda:0')
epoch: 55 test_true_pfm: 2472.25677869368 sim_pfm: -143.9160072656426
episode: 220 training return: tensor(-102.6887, device='cuda:0')
episode: 221 training return: tensor(-290.7696, device='cuda:0')
episode: 222 training return: tensor(-370.3679, device='cuda:0')
episode: 223 training return: tensor(-214.4040, device='cuda:0')
epoch: 56 test_true_pfm: 2611.3759864762023 sim_pfm: -113.82881424452837
episode: 224 training return: tensor(-278.5921, device='cuda:0')
episode: 225 training return: tensor(-312.8418, device='cuda:0')
episode: 226 training return: tensor(-409.0894, device='cuda:0')
episode: 227 training return: tensor(-326.1638, device='cuda:0')
epoch: 57 test_true_pfm: 2956.8940823393077 sim_pfm: 13.520360815785049
episode: 228 training return: tensor(-361.0083, device='cuda:0')
episode: 229 training return: tensor(-187.8444, device='cuda:0')
episode: 230 training return: tensor(-397.4219, device='cuda:0')
episode: 231 training return: tensor(72.4806, device='cuda:0')
epoch: 58 test_true_pfm: 2622.4360636662136 sim_pfm: -46.922424419453215
episode: 232 training return: tensor(-102.5053, device='cuda:0')
episode: 233 training return: tensor(-126.5353, device='cuda:0')
episode: 234 training return: tensor(-292.2153, device='cuda:0')
episode: 235 training return: tensor(55.8362, device='cuda:0')
epoch: 59 test_true_pfm: 2406.6834533987326 sim_pfm: -142.27447958282815
episode: 236 training return: tensor(-452.5026, device='cuda:0')
episode: 237 training return: tensor(-390.7453, device='cuda:0')
episode: 238 training return: tensor(-391.3364, device='cuda:0')
episode: 239 training return: tensor(-353.3880, device='cuda:0')
epoch: 60 test_true_pfm: 2362.967934503277 sim_pfm: 66.52370640388108
episode: 240 training return: tensor(-447.5229, device='cuda:0')
episode: 241 training return: tensor(-404.3391, device='cuda:0')
episode: 242 training return: tensor(-227.8080, device='cuda:0')
episode: 243 training return: tensor(-142.2135, device='cuda:0')
epoch: 61 test_true_pfm: 2275.6566074903717 sim_pfm: -147.68146240910087
episode: 244 training return: tensor(-450.1928, device='cuda:0')
episode: 245 training return: tensor(-412.4742, device='cuda:0')
episode: 246 training return: tensor(-338.3097, device='cuda:0')
episode: 247 training return: tensor(59.6051, device='cuda:0')
epoch: 62 test_true_pfm: 2690.632171848209 sim_pfm: 91.51062620920129
episode: 248 training return: tensor(165.9599, device='cuda:0')
episode: 249 training return: tensor(-359.9969, device='cuda:0')
episode: 250 training return: tensor(-348.1756, device='cuda:0')
episode: 251 training return: tensor(-292.9114, device='cuda:0')
epoch: 63 test_true_pfm: 2266.738037056664 sim_pfm: -168.86310126895356
episode: 252 training return: tensor(-410.1114, device='cuda:0')
episode: 253 training return: tensor(-372.7815, device='cuda:0')
episode: 254 training return: tensor(-393.5701, device='cuda:0')
episode: 255 training return: tensor(-417.8109, device='cuda:0')
epoch: 64 test_true_pfm: 2819.627850686176 sim_pfm: 44.21515728251931
episode: 256 training return: tensor(-340.4985, device='cuda:0')
episode: 257 training return: tensor(185.7610, device='cuda:0')
episode: 258 training return: tensor(-21.8249, device='cuda:0')
episode: 259 training return: tensor(213.9636, device='cuda:0')
epoch: 65 test_true_pfm: 2726.3329818014636 sim_pfm: -8.28563638184763
episode: 260 training return: tensor(-171.7234, device='cuda:0')
episode: 261 training return: tensor(-279.9788, device='cuda:0')
episode: 262 training return: tensor(-403.4879, device='cuda:0')
episode: 263 training return: tensor(-411.8256, device='cuda:0')
epoch: 66 test_true_pfm: 2637.228979430851 sim_pfm: 13.591012666583993
episode: 264 training return: tensor(-372.9870, device='cuda:0')
episode: 265 training return: tensor(-396.8492, device='cuda:0')
episode: 266 training return: tensor(-404.8510, device='cuda:0')
episode: 267 training return: tensor(-279.2731, device='cuda:0')
epoch: 67 test_true_pfm: 2745.280667754852 sim_pfm: -119.72539328954493
episode: 268 training return: tensor(190.9464, device='cuda:0')
episode: 269 training return: tensor(-369.5930, device='cuda:0')
episode: 270 training return: tensor(-187.0625, device='cuda:0')
episode: 271 training return: tensor(-397.2208, device='cuda:0')
epoch: 68 test_true_pfm: 2959.615931088067 sim_pfm: 74.00402481652175
episode: 272 training return: tensor(-397.6127, device='cuda:0')
episode: 273 training return: tensor(-409.4518, device='cuda:0')
episode: 274 training return: tensor(-14.6222, device='cuda:0')
episode: 275 training return: tensor(-386.4980, device='cuda:0')
epoch: 69 test_true_pfm: 2199.8620238137155 sim_pfm: -146.71249036499648
episode: 276 training return: tensor(-274.9658, device='cuda:0')
episode: 277 training return: tensor(-394.5620, device='cuda:0')
episode: 278 training return: tensor(-34.8583, device='cuda:0')
episode: 279 training return: tensor(-412.0317, device='cuda:0')
epoch: 70 test_true_pfm: 2646.4440036020997 sim_pfm: -72.02869097473256
episode: 280 training return: tensor(-401.7763, device='cuda:0')
episode: 281 training return: tensor(-12.4155, device='cuda:0')
episode: 282 training return: tensor(-385.2476, device='cuda:0')
episode: 283 training return: tensor(-3.5830, device='cuda:0')
epoch: 71 test_true_pfm: 2729.701957494524 sim_pfm: -173.41710332717048
episode: 284 training return: tensor(-383.3583, device='cuda:0')
episode: 285 training return: tensor(137.7915, device='cuda:0')
episode: 286 training return: tensor(-341.6497, device='cuda:0')
episode: 287 training return: tensor(-418.9495, device='cuda:0')
epoch: 72 test_true_pfm: 3223.463280400665 sim_pfm: -9.269411325503219
episode: 288 training return: tensor(-454.2149, device='cuda:0')
episode: 289 training return: tensor(-220.0114, device='cuda:0')
episode: 290 training return: tensor(-240.0267, device='cuda:0')
episode: 291 training return: tensor(137.2337, device='cuda:0')
epoch: 73 test_true_pfm: 2746.35423319991 sim_pfm: 130.89739770512097
episode: 292 training return: tensor(-356.9547, device='cuda:0')
episode: 293 training return: tensor(-370.3921, device='cuda:0')
episode: 294 training return: tensor(-202.0870, device='cuda:0')
episode: 295 training return: tensor(-164.1782, device='cuda:0')
epoch: 74 test_true_pfm: 2197.2770602080823 sim_pfm: -42.79954748963549
episode: 296 training return: tensor(-379.1208, device='cuda:0')
episode: 297 training return: tensor(-395.8495, device='cuda:0')
episode: 298 training return: tensor(-407.7179, device='cuda:0')
episode: 299 training return: tensor(-320.4821, device='cuda:0')
epoch: 75 test_true_pfm: 2426.6340536291386 sim_pfm: -171.68047597756959
episode: 300 training return: tensor(-300.6419, device='cuda:0')
episode: 301 training return: tensor(-401.6029, device='cuda:0')
episode: 302 training return: tensor(180.0233, device='cuda:0')
episode: 303 training return: tensor(-343.7237, device='cuda:0')
epoch: 76 test_true_pfm: 3084.5296065462376 sim_pfm: 14.77387764370845
episode: 304 training return: tensor(-29.6046, device='cuda:0')
episode: 305 training return: tensor(-126.0266, device='cuda:0')
episode: 306 training return: tensor(-290.2361, device='cuda:0')
episode: 307 training return: tensor(-319.5569, device='cuda:0')
epoch: 77 test_true_pfm: 2332.2316356081155 sim_pfm: 35.476021488876235
episode: 308 training return: tensor(-417.1669, device='cuda:0')
episode: 309 training return: tensor(-392.3572, device='cuda:0')
episode: 310 training return: tensor(-400.1232, device='cuda:0')
episode: 311 training return: tensor(-113.1892, device='cuda:0')
epoch: 78 test_true_pfm: 2523.1081464336326 sim_pfm: 49.586321125670416
episode: 312 training return: tensor(-316.2334, device='cuda:0')
episode: 313 training return: tensor(-359.8271, device='cuda:0')
episode: 314 training return: tensor(159.4324, device='cuda:0')
episode: 315 training return: tensor(-363.7139, device='cuda:0')
epoch: 79 test_true_pfm: 2412.113349788879 sim_pfm: -16.277641674736515
episode: 316 training return: tensor(-284.7835, device='cuda:0')
episode: 317 training return: tensor(-165.3294, device='cuda:0')
episode: 318 training return: tensor(-168.9199, device='cuda:0')
episode: 319 training return: tensor(-139.9371, device='cuda:0')
epoch: 80 test_true_pfm: 2587.9474017093476 sim_pfm: 8.612821527058259
episode: 320 training return: tensor(-316.1734, device='cuda:0')
episode: 321 training return: tensor(-432.3268, device='cuda:0')
episode: 322 training return: tensor(-39.6151, device='cuda:0')
episode: 323 training return: tensor(-355.3990, device='cuda:0')
epoch: 81 test_true_pfm: 3237.906272569306 sim_pfm: 72.67959477105371
episode: 324 training return: tensor(-443.2159, device='cuda:0')
episode: 325 training return: tensor(-220.6706, device='cuda:0')
episode: 326 training return: tensor(-136.9863, device='cuda:0')
episode: 327 training return: tensor(-387.3280, device='cuda:0')
epoch: 82 test_true_pfm: 2376.485142341519 sim_pfm: -51.693189747747965
episode: 328 training return: tensor(-318.4571, device='cuda:0')
episode: 329 training return: tensor(-414.5084, device='cuda:0')
episode: 330 training return: tensor(138.0171, device='cuda:0')
episode: 331 training return: tensor(-355.2139, device='cuda:0')
epoch: 83 test_true_pfm: 2780.274239029579 sim_pfm: 69.84813622072882
episode: 332 training return: tensor(-135.0253, device='cuda:0')
episode: 333 training return: tensor(240.0581, device='cuda:0')
episode: 334 training return: tensor(-355.0193, device='cuda:0')
episode: 335 training return: tensor(-430.6102, device='cuda:0')
epoch: 84 test_true_pfm: 2543.2573246123975 sim_pfm: -95.94849200091751
episode: 336 training return: tensor(-387.9477, device='cuda:0')
episode: 337 training return: tensor(-394.0903, device='cuda:0')
episode: 338 training return: tensor(-438.1491, device='cuda:0')
episode: 339 training return: tensor(-320.5537, device='cuda:0')
epoch: 85 test_true_pfm: 2418.1960482573995 sim_pfm: -93.03055283400074
episode: 340 training return: tensor(-210.7847, device='cuda:0')
episode: 341 training return: tensor(-407.6110, device='cuda:0')
episode: 342 training return: tensor(-431.4224, device='cuda:0')
episode: 343 training return: tensor(57.5420, device='cuda:0')
epoch: 86 test_true_pfm: 2348.112318177623 sim_pfm: -141.17675228862208
episode: 344 training return: tensor(-123.2490, device='cuda:0')
episode: 345 training return: tensor(194.9994, device='cuda:0')
episode: 346 training return: tensor(-214.7485, device='cuda:0')
episode: 347 training return: tensor(-364.5103, device='cuda:0')
epoch: 87 test_true_pfm: 3004.1550742807744 sim_pfm: 155.28638885047985
episode: 348 training return: tensor(-410.3776, device='cuda:0')
episode: 349 training return: tensor(-362.0819, device='cuda:0')
episode: 350 training return: tensor(239.1015, device='cuda:0')
episode: 351 training return: tensor(-392.6132, device='cuda:0')
epoch: 88 test_true_pfm: 2650.116235857053 sim_pfm: -93.87639931819285
episode: 352 training return: tensor(-167.2671, device='cuda:0')
episode: 353 training return: tensor(-223.1677, device='cuda:0')
episode: 354 training return: tensor(-420.4749, device='cuda:0')
episode: 355 training return: tensor(-424.3716, device='cuda:0')
epoch: 89 test_true_pfm: 2186.6542802972376 sim_pfm: -94.86566688500655
episode: 356 training return: tensor(-306.4340, device='cuda:0')
episode: 357 training return: tensor(-54.0385, device='cuda:0')
episode: 358 training return: tensor(-384.8675, device='cuda:0')
episode: 359 training return: tensor(-399.0880, device='cuda:0')
epoch: 90 test_true_pfm: 2593.5049796378185 sim_pfm: -13.193431230009688
episode: 360 training return: tensor(-370.3838, device='cuda:0')
episode: 361 training return: tensor(-328.1170, device='cuda:0')
episode: 362 training return: tensor(-258.7614, device='cuda:0')
episode: 363 training return: tensor(-213.0313, device='cuda:0')
epoch: 91 test_true_pfm: 2199.0314984586607 sim_pfm: -99.72951271754573
episode: 364 training return: tensor(-402.6713, device='cuda:0')
episode: 365 training return: tensor(-360.3945, device='cuda:0')
episode: 366 training return: tensor(232.3434, device='cuda:0')
episode: 367 training return: tensor(-390.0064, device='cuda:0')
epoch: 92 test_true_pfm: 2259.100375647593 sim_pfm: 0.05181595050574591
episode: 368 training return: tensor(-401.8448, device='cuda:0')
episode: 369 training return: tensor(-82.5074, device='cuda:0')
episode: 370 training return: tensor(-449.7861, device='cuda:0')
episode: 371 training return: tensor(41.4887, device='cuda:0')
epoch: 93 test_true_pfm: 2369.4624432332125 sim_pfm: -138.16873130695117
episode: 372 training return: tensor(-429.6361, device='cuda:0')
episode: 373 training return: tensor(-399.4802, device='cuda:0')
episode: 374 training return: tensor(132.6549, device='cuda:0')
episode: 375 training return: tensor(-393.7728, device='cuda:0')
epoch: 94 test_true_pfm: 2359.3468060323266 sim_pfm: 119.92231928212762
episode: 376 training return: tensor(-187.9835, device='cuda:0')
episode: 377 training return: tensor(-214.7002, device='cuda:0')
episode: 378 training return: tensor(-229.6680, device='cuda:0')
episode: 379 training return: tensor(-298.3444, device='cuda:0')
epoch: 95 test_true_pfm: 2372.025737963932 sim_pfm: 20.45831194935211
episode: 380 training return: tensor(-384.7321, device='cuda:0')
episode: 381 training return: tensor(-394.9273, device='cuda:0')
episode: 382 training return: tensor(-406.5133, device='cuda:0')
episode: 383 training return: tensor(-191.5202, device='cuda:0')
epoch: 96 test_true_pfm: 2308.0945214913554 sim_pfm: 34.102313485675644
episode: 384 training return: tensor(-277.1104, device='cuda:0')
episode: 385 training return: tensor(-386.0686, device='cuda:0')
episode: 386 training return: tensor(-388.9646, device='cuda:0')
episode: 387 training return: tensor(-258.2820, device='cuda:0')
epoch: 97 test_true_pfm: 2595.602634300043 sim_pfm: -128.79053574444455
episode: 388 training return: tensor(-218.9941, device='cuda:0')
episode: 389 training return: tensor(-389.6878, device='cuda:0')
episode: 390 training return: tensor(-396.0535, device='cuda:0')
episode: 391 training return: tensor(-426.9872, device='cuda:0')
epoch: 98 test_true_pfm: 2631.382611173667 sim_pfm: -59.73755203530891
episode: 392 training return: tensor(-367.4280, device='cuda:0')
episode: 393 training return: tensor(-236.1604, device='cuda:0')
episode: 394 training return: tensor(-387.8687, device='cuda:0')
episode: 395 training return: tensor(-104.9922, device='cuda:0')
epoch: 99 test_true_pfm: 2926.438583800095 sim_pfm: -137.08255547242393
episode: 396 training return: tensor(-399.4016, device='cuda:0')
episode: 397 training return: tensor(-398.2073, device='cuda:0')
episode: 398 training return: tensor(-23.7072, device='cuda:0')
episode: 399 training return: tensor(-388.3058, device='cuda:0')
epoch: 100 test_true_pfm: 2737.334322465363 sim_pfm: 90.78712502015212
episode: 400 training return: tensor(-409.6131, device='cuda:0')
episode: 401 training return: tensor(220.4243, device='cuda:0')
episode: 402 training return: tensor(-348.6530, device='cuda:0')
episode: 403 training return: tensor(-426.7873, device='cuda:0')
epoch: 101 test_true_pfm: 2404.0905715790345 sim_pfm: -99.80095290439203
episode: 404 training return: tensor(-388.0303, device='cuda:0')
episode: 405 training return: tensor(-371.1193, device='cuda:0')
episode: 406 training return: tensor(-403.8009, device='cuda:0')
episode: 407 training return: tensor(-191.5960, device='cuda:0')
epoch: 102 test_true_pfm: 2340.9214981287523 sim_pfm: -105.57062423592045
episode: 408 training return: tensor(-443.4760, device='cuda:0')
episode: 409 training return: tensor(-99.3091, device='cuda:0')
episode: 410 training return: tensor(-225.7769, device='cuda:0')
episode: 411 training return: tensor(-193.9771, device='cuda:0')
epoch: 103 test_true_pfm: 2687.48577633164 sim_pfm: 66.4367551364121
episode: 412 training return: tensor(-37.9320, device='cuda:0')
episode: 413 training return: tensor(-450.3111, device='cuda:0')
episode: 414 training return: tensor(-82.2272, device='cuda:0')
episode: 415 training return: tensor(-112.6238, device='cuda:0')
epoch: 104 test_true_pfm: 2730.325571503488 sim_pfm: 54.07794770044469
episode: 416 training return: tensor(-141.6330, device='cuda:0')
episode: 417 training return: tensor(-211.7948, device='cuda:0')
episode: 418 training return: tensor(-220.4194, device='cuda:0')
episode: 419 training return: tensor(-431.0420, device='cuda:0')
epoch: 105 test_true_pfm: 2779.0433060188157 sim_pfm: -151.69003647593004
episode: 420 training return: tensor(-9.4515, device='cuda:0')
episode: 421 training return: tensor(-417.8327, device='cuda:0')
episode: 422 training return: tensor(-412.5497, device='cuda:0')
episode: 423 training return: tensor(-194.2001, device='cuda:0')
epoch: 106 test_true_pfm: 2289.8724356775424 sim_pfm: -93.70019160989129
episode: 424 training return: tensor(-213.3454, device='cuda:0')
episode: 425 training return: tensor(-363.3268, device='cuda:0')
episode: 426 training return: tensor(-269.8461, device='cuda:0')
episode: 427 training return: tensor(-408.0865, device='cuda:0')
epoch: 107 test_true_pfm: 2359.9277630604743 sim_pfm: -99.60767156731647
episode: 428 training return: tensor(-411.9720, device='cuda:0')
episode: 429 training return: tensor(-120.2715, device='cuda:0')
episode: 430 training return: tensor(-414.4216, device='cuda:0')
episode: 431 training return: tensor(-296.6140, device='cuda:0')
epoch: 108 test_true_pfm: 2452.4067909546866 sim_pfm: 4.442370542121353
episode: 432 training return: tensor(-412.4806, device='cuda:0')
episode: 433 training return: tensor(-318.5833, device='cuda:0')
episode: 434 training return: tensor(-198.2928, device='cuda:0')
episode: 435 training return: tensor(-408.7602, device='cuda:0')
epoch: 109 test_true_pfm: 2716.580526958265 sim_pfm: 2.000643333012704
episode: 436 training return: tensor(-317.1555, device='cuda:0')
episode: 437 training return: tensor(-85.1412, device='cuda:0')
episode: 438 training return: tensor(159.4207, device='cuda:0')
episode: 439 training return: tensor(-123.8374, device='cuda:0')
epoch: 110 test_true_pfm: 2245.863775751583 sim_pfm: -23.644274692439165
episode: 440 training return: tensor(-436.6793, device='cuda:0')
episode: 441 training return: tensor(-402.1920, device='cuda:0')
episode: 442 training return: tensor(19.4003, device='cuda:0')
episode: 443 training return: tensor(-388.4822, device='cuda:0')
epoch: 111 test_true_pfm: 2404.40021088283 sim_pfm: -209.4698092925537
episode: 444 training return: tensor(-183.1632, device='cuda:0')
episode: 445 training return: tensor(-394.2888, device='cuda:0')
episode: 446 training return: tensor(-7.0530, device='cuda:0')
episode: 447 training return: tensor(-316.8965, device='cuda:0')
epoch: 112 test_true_pfm: 2180.435028639888 sim_pfm: -49.56355790640615
episode: 448 training return: tensor(-391.7862, device='cuda:0')
episode: 449 training return: tensor(-401.3706, device='cuda:0')
episode: 450 training return: tensor(190.4368, device='cuda:0')
episode: 451 training return: tensor(-431.8441, device='cuda:0')
epoch: 113 test_true_pfm: 2472.1506025262934 sim_pfm: -103.75583033449948
episode: 452 training return: tensor(-129.7683, device='cuda:0')
episode: 453 training return: tensor(-392.8297, device='cuda:0')
episode: 454 training return: tensor(-16.4361, device='cuda:0')
episode: 455 training return: tensor(-336.4104, device='cuda:0')
epoch: 114 test_true_pfm: 2743.541155564973 sim_pfm: -79.88765745458659
episode: 456 training return: tensor(-289.1969, device='cuda:0')
episode: 457 training return: tensor(-106.3696, device='cuda:0')
episode: 458 training return: tensor(-33.6572, device='cuda:0')
episode: 459 training return: tensor(-221.1579, device='cuda:0')
epoch: 115 test_true_pfm: 2453.9296173863986 sim_pfm: -50.340188917374086
episode: 460 training return: tensor(-62.0371, device='cuda:0')
episode: 461 training return: tensor(-398.6201, device='cuda:0')
episode: 462 training return: tensor(-409.6812, device='cuda:0')
episode: 463 training return: tensor(-381.1805, device='cuda:0')
epoch: 116 test_true_pfm: 2390.7371428218776 sim_pfm: -51.68877024156973
episode: 464 training return: tensor(-439.9163, device='cuda:0')
episode: 465 training return: tensor(-132.3562, device='cuda:0')
episode: 466 training return: tensor(-213.9318, device='cuda:0')
episode: 467 training return: tensor(-392.6872, device='cuda:0')
epoch: 117 test_true_pfm: 2097.6055818120362 sim_pfm: -7.173405239145116
episode: 468 training return: tensor(-420.2274, device='cuda:0')
episode: 469 training return: tensor(-393.7052, device='cuda:0')
episode: 470 training return: tensor(-323.3050, device='cuda:0')
episode: 471 training return: tensor(-350.7353, device='cuda:0')
epoch: 118 test_true_pfm: 2506.8666094346595 sim_pfm: -32.50640651894113
episode: 472 training return: tensor(-418.3357, device='cuda:0')
episode: 473 training return: tensor(-417.5491, device='cuda:0')
episode: 474 training return: tensor(-220.3052, device='cuda:0')
episode: 475 training return: tensor(-353.2944, device='cuda:0')
epoch: 119 test_true_pfm: 2701.4710856275483 sim_pfm: 28.670222779590404
episode: 476 training return: tensor(-400.6203, device='cuda:0')
episode: 477 training return: tensor(-134.7593, device='cuda:0')
episode: 478 training return: tensor(-361.0850, device='cuda:0')
episode: 479 training return: tensor(-361.8502, device='cuda:0')
epoch: 120 test_true_pfm: 2525.4424899972723 sim_pfm: -159.5795099165795
episode: 480 training return: tensor(-195.2434, device='cuda:0')
episode: 481 training return: tensor(-388.3174, device='cuda:0')
episode: 482 training return: tensor(-353.6447, device='cuda:0')
episode: 483 training return: tensor(186.4534, device='cuda:0')
epoch: 121 test_true_pfm: 2339.076425531979 sim_pfm: -71.32818357173043
episode: 484 training return: tensor(-274.9759, device='cuda:0')
episode: 485 training return: tensor(-384.5694, device='cuda:0')
episode: 486 training return: tensor(-192.1010, device='cuda:0')
episode: 487 training return: tensor(-430.1020, device='cuda:0')
epoch: 122 test_true_pfm: 2250.6143870126903 sim_pfm: -212.24892965778903
episode: 488 training return: tensor(-414.2205, device='cuda:0')
episode: 489 training return: tensor(-229.8656, device='cuda:0')
episode: 490 training return: tensor(-389.4317, device='cuda:0')
episode: 491 training return: tensor(-390.1750, device='cuda:0')
epoch: 123 test_true_pfm: 2672.576458466275 sim_pfm: -67.76108580816071
episode: 492 training return: tensor(222.5221, device='cuda:0')
episode: 493 training return: tensor(-348.9681, device='cuda:0')
episode: 494 training return: tensor(-427.2724, device='cuda:0')
episode: 495 training return: tensor(-181.2516, device='cuda:0')
epoch: 124 test_true_pfm: 2398.4769425990776 sim_pfm: 84.34302197320115
episode: 496 training return: tensor(-322.8838, device='cuda:0')
episode: 497 training return: tensor(-401.4913, device='cuda:0')
episode: 498 training return: tensor(-388.6776, device='cuda:0')
episode: 499 training return: tensor(-410.0077, device='cuda:0')
epoch: 125 test_true_pfm: 2917.7651785895937 sim_pfm: -93.07271890348056
episode: 500 training return: tensor(-398.5026, device='cuda:0')
episode: 501 training return: tensor(-328.1195, device='cuda:0')
episode: 502 training return: tensor(-348.0944, device='cuda:0')
episode: 503 training return: tensor(207.0357, device='cuda:0')
epoch: 126 test_true_pfm: 2347.1089087167998 sim_pfm: 25.71861728131383
episode: 504 training return: tensor(-126.6098, device='cuda:0')
episode: 505 training return: tensor(-217.5553, device='cuda:0')
episode: 506 training return: tensor(-402.8467, device='cuda:0')
episode: 507 training return: tensor(160.7045, device='cuda:0')
epoch: 127 test_true_pfm: 2208.3443225379415 sim_pfm: -20.30350949856802
episode: 508 training return: tensor(-214.4088, device='cuda:0')
episode: 509 training return: tensor(-440.7944, device='cuda:0')
episode: 510 training return: tensor(-400.3370, device='cuda:0')
episode: 511 training return: tensor(-269.9386, device='cuda:0')
epoch: 128 test_true_pfm: 2683.5471695308947 sim_pfm: -108.00577124604995
episode: 512 training return: tensor(-386.5891, device='cuda:0')
episode: 513 training return: tensor(-435.9279, device='cuda:0')
episode: 514 training return: tensor(-399.4889, device='cuda:0')
episode: 515 training return: tensor(-414.5774, device='cuda:0')
epoch: 129 test_true_pfm: 1843.9834963146125 sim_pfm: -134.8794786753133
episode: 516 training return: tensor(-322.9272, device='cuda:0')
episode: 517 training return: tensor(-192.1254, device='cuda:0')
episode: 518 training return: tensor(225.4713, device='cuda:0')
episode: 519 training return: tensor(-396.2944, device='cuda:0')
epoch: 130 test_true_pfm: 2093.984260717526 sim_pfm: 32.05229904587031
episode: 520 training return: tensor(-286.6764, device='cuda:0')
episode: 521 training return: tensor(140.1738, device='cuda:0')
episode: 522 training return: tensor(-128.4222, device='cuda:0')
episode: 523 training return: tensor(-300.0923, device='cuda:0')
epoch: 131 test_true_pfm: 2620.7777496341932 sim_pfm: 68.65442509262357
episode: 524 training return: tensor(-223.8532, device='cuda:0')
episode: 525 training return: tensor(-377.4189, device='cuda:0')
episode: 526 training return: tensor(-407.7514, device='cuda:0')
episode: 527 training return: tensor(-288.2303, device='cuda:0')
epoch: 132 test_true_pfm: 2932.8377219871104 sim_pfm: -81.84643740529039
episode: 528 training return: tensor(-154.2782, device='cuda:0')
episode: 529 training return: tensor(-393.7675, device='cuda:0')
episode: 530 training return: tensor(-399.6652, device='cuda:0')
episode: 531 training return: tensor(-373.0393, device='cuda:0')
epoch: 133 test_true_pfm: 2424.9150824378603 sim_pfm: -41.48614601478524
episode: 532 training return: tensor(-106.4587, device='cuda:0')
episode: 533 training return: tensor(-397.4796, device='cuda:0')
episode: 534 training return: tensor(-418.1611, device='cuda:0')
episode: 535 training return: tensor(-406.6989, device='cuda:0')
epoch: 134 test_true_pfm: 2752.572648621952 sim_pfm: -96.14801221591188
episode: 536 training return: tensor(-280.6090, device='cuda:0')
episode: 537 training return: tensor(-372.9905, device='cuda:0')
episode: 538 training return: tensor(-396.6673, device='cuda:0')
episode: 539 training return: tensor(-225.1207, device='cuda:0')
epoch: 135 test_true_pfm: 2821.2310296266223 sim_pfm: -117.12209370414105
episode: 540 training return: tensor(-376.8833, device='cuda:0')
episode: 541 training return: tensor(-397.5025, device='cuda:0')
episode: 542 training return: tensor(-210.3020, device='cuda:0')
episode: 543 training return: tensor(141.6738, device='cuda:0')
epoch: 136 test_true_pfm: 2206.2468095997606 sim_pfm: -184.73409354837108
episode: 544 training return: tensor(-343.8675, device='cuda:0')
episode: 545 training return: tensor(-197.4804, device='cuda:0')
episode: 546 training return: tensor(-60.7007, device='cuda:0')
episode: 547 training return: tensor(-113.7255, device='cuda:0')
epoch: 137 test_true_pfm: 3044.359404876441 sim_pfm: -148.9708904545502
episode: 548 training return: tensor(-403.9057, device='cuda:0')
episode: 549 training return: tensor(-305.7748, device='cuda:0')
episode: 550 training return: tensor(-399.9854, device='cuda:0')
episode: 551 training return: tensor(-100.7620, device='cuda:0')
epoch: 138 test_true_pfm: 2707.26546106232 sim_pfm: 9.152630778485522
episode: 552 training return: tensor(-391.0982, device='cuda:0')
episode: 553 training return: tensor(-219.4720, device='cuda:0')
episode: 554 training return: tensor(-79.7649, device='cuda:0')
episode: 555 training return: tensor(-342.1808, device='cuda:0')
epoch: 139 test_true_pfm: 2216.4599498162383 sim_pfm: 76.73400083302597
episode: 556 training return: tensor(-404.6547, device='cuda:0')
episode: 557 training return: tensor(196.5257, device='cuda:0')
episode: 558 training return: tensor(-359.6717, device='cuda:0')
episode: 559 training return: tensor(-381.3556, device='cuda:0')
epoch: 140 test_true_pfm: 2558.5243322378064 sim_pfm: -78.90143560245633
episode: 560 training return: tensor(-194.6213, device='cuda:0')
episode: 561 training return: tensor(-219.4939, device='cuda:0')
episode: 562 training return: tensor(-364.7323, device='cuda:0')
episode: 563 training return: tensor(-347.4210, device='cuda:0')
epoch: 141 test_true_pfm: 2648.961420528896 sim_pfm: -106.26717100130433
episode: 564 training return: tensor(-50.0100, device='cuda:0')
episode: 565 training return: tensor(-220.6978, device='cuda:0')
episode: 566 training return: tensor(-356.6177, device='cuda:0')
episode: 567 training return: tensor(-138.0821, device='cuda:0')
epoch: 142 test_true_pfm: 2247.8288630341963 sim_pfm: -19.161084545892663
episode: 568 training return: tensor(67.0853, device='cuda:0')
episode: 569 training return: tensor(168.5259, device='cuda:0')
episode: 570 training return: tensor(257.0834, device='cuda:0')
episode: 571 training return: tensor(-108.3239, device='cuda:0')
epoch: 143 test_true_pfm: 2441.621311823134 sim_pfm: -89.48524835546657
episode: 572 training return: tensor(-226.1554, device='cuda:0')
episode: 573 training return: tensor(-368.3658, device='cuda:0')
episode: 574 training return: tensor(-195.3164, device='cuda:0')
episode: 575 training return: tensor(-43.3561, device='cuda:0')
epoch: 144 test_true_pfm: 2371.3943032987127 sim_pfm: 168.98313459934434
episode: 576 training return: tensor(-356.3063, device='cuda:0')
episode: 577 training return: tensor(-405.4121, device='cuda:0')
episode: 578 training return: tensor(-356.8715, device='cuda:0')
episode: 579 training return: tensor(126.0817, device='cuda:0')
epoch: 145 test_true_pfm: 2061.228117878882 sim_pfm: -98.01823666664616
episode: 580 training return: tensor(-395.7719, device='cuda:0')
episode: 581 training return: tensor(-320.0093, device='cuda:0')
episode: 582 training return: tensor(-394.1516, device='cuda:0')
episode: 583 training return: tensor(-395.1760, device='cuda:0')
epoch: 146 test_true_pfm: 2258.6735468944094 sim_pfm: -114.36637679631046
episode: 584 training return: tensor(149.5754, device='cuda:0')
episode: 585 training return: tensor(-333.6690, device='cuda:0')
episode: 586 training return: tensor(-53.3439, device='cuda:0')
episode: 587 training return: tensor(-445.8340, device='cuda:0')
epoch: 147 test_true_pfm: 2886.4321520744884 sim_pfm: 36.61490035918541
episode: 588 training return: tensor(-325.2623, device='cuda:0')
episode: 589 training return: tensor(-407.2965, device='cuda:0')
episode: 590 training return: tensor(-401.5880, device='cuda:0')
episode: 591 training return: tensor(-388.5009, device='cuda:0')
epoch: 148 test_true_pfm: 2514.36831144017 sim_pfm: 26.08621909658541
episode: 592 training return: tensor(7.4503, device='cuda:0')
episode: 593 training return: tensor(-400.6802, device='cuda:0')
episode: 594 training return: tensor(-381.1985, device='cuda:0')
episode: 595 training return: tensor(74.0981, device='cuda:0')
epoch: 149 test_true_pfm: 2714.800157777821 sim_pfm: -42.85346507560462
episode: 596 training return: tensor(-118.8602, device='cuda:0')
episode: 597 training return: tensor(-138.1883, device='cuda:0')
episode: 598 training return: tensor(-143.2354, device='cuda:0')
episode: 599 training return: tensor(-300.1643, device='cuda:0')
epoch: 150 test_true_pfm: 3019.415942797497 sim_pfm: -65.8308559943107
