['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'behavior', '--traj', 'expert', '--seed', '3']
epoch: 0 training_loss 0.27712748408317567 test_loss: 0.19589519500732422
epoch: 1 training_loss 0.18398812018334865 test_loss: 0.22897098064422608
epoch: 2 training_loss 0.1835481110960245 test_loss: 0.17100856304168702
epoch: 3 training_loss 0.1662611060589552 test_loss: 0.17694566249847413
epoch: 4 training_loss 0.1534495886042714 test_loss: 0.1865397572517395
epoch: 5 training_loss 0.16019081309437752 test_loss: 0.14537546634674073
epoch: 6 training_loss 0.15877201072871686 test_loss: 0.15681320428848267
epoch: 7 training_loss 0.1625625938922167 test_loss: 0.15387245416641235
epoch: 8 training_loss 0.149565785638988 test_loss: 0.149133563041687
epoch: 9 training_loss 0.14774300180375577 test_loss: 0.17459319829940795
epoch: 10 training_loss 0.14929663747549057 test_loss: 0.1552807092666626
epoch: 11 training_loss 0.15278288949280977 test_loss: 0.16743555068969726
epoch: 12 training_loss 0.16062189396470786 test_loss: 0.16016798019409179
epoch: 13 training_loss 0.1519179033115506 test_loss: 0.1409605026245117
epoch: 14 training_loss 0.14353257488459348 test_loss: 0.14453775882720948
epoch: 15 training_loss 0.14743672024458646 test_loss: 0.13911728858947753
epoch: 16 training_loss 0.1514399029687047 test_loss: 0.1609927535057068
epoch: 17 training_loss 0.13780640579760076 test_loss: 0.13822020292282106
epoch: 18 training_loss 0.15276505902409554 test_loss: 0.18681199550628663
epoch: 19 training_loss 0.1497883727774024 test_loss: 0.15474486351013184
epoch: 20 training_loss 0.1540450882166624 test_loss: 0.12440426349639892
epoch: 21 training_loss 0.14999752379953862 test_loss: 0.13822510242462158
epoch: 22 training_loss 0.14570178158581257 test_loss: 0.14323861598968507
epoch: 23 training_loss 0.13158395923674107 test_loss: 0.16597577333450317
epoch: 24 training_loss 0.14468660205602646 test_loss: 0.14724185466766357
epoch: 25 training_loss 0.14524780604988335 test_loss: 0.1389091730117798
epoch: 26 training_loss 0.1383858559653163 test_loss: 0.13043183088302612
epoch: 27 training_loss 0.14865989372134208 test_loss: 0.16296483278274537
epoch: 28 training_loss 0.1404636286571622 test_loss: 0.1341681718826294
epoch: 29 training_loss 0.13691427644342183 test_loss: 0.13010140657424926
epoch: 30 training_loss 0.1418564036488533 test_loss: 0.15656031370162965
epoch: 31 training_loss 0.14650641478598117 test_loss: 0.14687273502349854
epoch: 32 training_loss 0.14293111104518175 test_loss: 0.1514160990715027
epoch: 33 training_loss 0.12842469915747642 test_loss: 0.14265841245651245
epoch: 34 training_loss 0.14979046441614627 test_loss: 0.13704925775527954
epoch: 35 training_loss 0.14581935357302428 test_loss: 0.13965556621551514
epoch: 36 training_loss 0.13476384934037924 test_loss: 0.13778059482574462
epoch: 37 training_loss 0.13699399318546057 test_loss: 0.13157329559326172
epoch: 38 training_loss 0.14448171779513358 test_loss: 0.1437551736831665
epoch: 39 training_loss 0.13491123981773853 test_loss: 0.1431584119796753
epoch: 40 training_loss 0.13455616679042579 test_loss: 0.15082175731658937
epoch: 41 training_loss 0.14831130590289832 test_loss: 0.1675658106803894
epoch: 42 training_loss 0.1296007376164198 test_loss: 0.12307868003845215
epoch: 43 training_loss 0.14373884242027998 test_loss: 0.14401710033416748
epoch: 44 training_loss 0.13376483749598264 test_loss: 0.1283714771270752
epoch: 45 training_loss 0.14014006294310094 test_loss: 0.1387126326560974
epoch: 46 training_loss 0.1369210570678115 test_loss: 0.1258987545967102
epoch: 47 training_loss 0.13517336826771498 test_loss: 0.13308508396148683
epoch: 48 training_loss 0.14700151290744543 test_loss: 0.14360696077346802
epoch: 49 training_loss 0.1366813514009118 test_loss: 0.140838623046875
epoch: 50 training_loss 0.1431061404943466 test_loss: 0.13703405857086182
epoch: 51 training_loss 0.1376497401483357 test_loss: 0.14169542789459227
epoch: 52 training_loss 0.13956220157444477 test_loss: 0.14267905950546264
epoch: 53 training_loss 0.13343748170882463 test_loss: 0.1423576831817627
epoch: 54 training_loss 0.13920902069658042 test_loss: 0.13389949798583983
epoch: 55 training_loss 0.1380163076147437 test_loss: 0.1505791187286377
epoch: 56 training_loss 0.1345807783678174 test_loss: 0.1458810567855835
epoch: 57 training_loss 0.13800629813224077 test_loss: 0.13123102188110353
epoch: 58 training_loss 0.13001017980277538 test_loss: 0.15998971462249756
epoch: 59 training_loss 0.14382043804973363 test_loss: 0.12461942434310913
epoch: 60 training_loss 0.1300608867034316 test_loss: 0.14220746755599975
epoch: 61 training_loss 0.15026183284819125 test_loss: 0.12002997398376465
epoch: 62 training_loss 0.13761758740991353 test_loss: 0.1534676432609558
epoch: 63 training_loss 0.1297024828568101 test_loss: 0.13019542694091796
epoch: 64 training_loss 0.1357363187521696 test_loss: 0.12786704301834106
epoch: 65 training_loss 0.14143031150102614 test_loss: 0.13402625322341918
epoch: 66 training_loss 0.14079065404832364 test_loss: 0.12092111110687256
epoch: 67 training_loss 0.13452581617981196 test_loss: 0.13324968814849852
epoch: 68 training_loss 0.13418874952942134 test_loss: 0.13357608318328856
epoch: 69 training_loss 0.13387079931795598 test_loss: 0.14807795286178588
epoch: 70 training_loss 0.13809048876166344 test_loss: 0.14263490438461304
epoch: 71 training_loss 0.1350251991674304 test_loss: 0.14056891202926636
epoch: 72 training_loss 0.1344240177050233 test_loss: 0.14101169109344483
epoch: 73 training_loss 0.13463560171425343 test_loss: 0.13213452100753784
epoch: 74 training_loss 0.13293608900159598 test_loss: 0.13725219964981078
epoch: 75 training_loss 0.13689466077834367 test_loss: 0.13429514169692994
epoch: 76 training_loss 0.13588896438479423 test_loss: 0.14990254640579223
epoch: 77 training_loss 0.1402800977230072 test_loss: 0.15114824771881102
epoch: 78 training_loss 0.13820991564542054 test_loss: 0.14019173383712769
epoch: 79 training_loss 0.13728259257972242 test_loss: 0.1467987298965454
epoch: 80 training_loss 0.13592848014086484 test_loss: 0.13834707736968993
epoch: 81 training_loss 0.13864976271986962 test_loss: 0.14389317035675048
epoch: 82 training_loss 0.13302767749875785 test_loss: 0.13742364645004274
epoch: 83 training_loss 0.13279078967869282 test_loss: 0.14711073637008668
epoch: 84 training_loss 0.13817605935037136 test_loss: 0.13252238035202027
epoch: 85 training_loss 0.1431739829480648 test_loss: 0.1296568751335144
epoch: 86 training_loss 0.1376678464189172 test_loss: 0.1659369111061096
epoch: 87 training_loss 0.1417876148596406 test_loss: 0.1596888303756714
epoch: 88 training_loss 0.12783173218369484 test_loss: 0.1305241823196411
epoch: 89 training_loss 0.13182507697492837 test_loss: 0.1512672185897827
epoch: 90 training_loss 0.13513758212327956 test_loss: 0.12574667930603028
epoch: 91 training_loss 0.13297290083020927 test_loss: 0.12548996210098268
epoch: 92 training_loss 0.13950220327824353 test_loss: 0.13677393198013305
epoch: 93 training_loss 0.13131085842847823 test_loss: 0.13289755582809448
epoch: 94 training_loss 0.1358474261313677 test_loss: 0.13045462369918823
epoch: 95 training_loss 0.1273459637723863 test_loss: 0.14740756750106812
epoch: 96 training_loss 0.13389954395592213 test_loss: 0.15392266511917113
epoch: 97 training_loss 0.13699514623731374 test_loss: 0.12525805234909057
epoch: 98 training_loss 0.1279884883761406 test_loss: 0.12865344285964966
epoch: 99 training_loss 0.13572476025670765 test_loss: 0.14454865455627441
epoch: 100 training_loss 0.1307385752350092 test_loss: 0.15938347578048706
epoch: 101 training_loss 0.1344866891950369 test_loss: 0.1550150156021118
epoch: 102 training_loss 0.13365961376577615 test_loss: 0.1306745171546936
epoch: 103 training_loss 0.12681904017925263 test_loss: 0.14361188411712647
epoch: 104 training_loss 0.13601986430585383 test_loss: 0.1401721715927124
epoch: 105 training_loss 0.12908825628459453 test_loss: 0.1528171181678772
epoch: 106 training_loss 0.13287355966866016 test_loss: 0.15142269134521485
epoch: 107 training_loss 0.13251561857759953 test_loss: 0.13458470106124878
epoch: 108 training_loss 0.12978139463812113 test_loss: 0.14532840251922607
epoch: 109 training_loss 0.13726413127034903 test_loss: 0.1285577178001404
epoch: 110 training_loss 0.13068962294608355 test_loss: 0.10951982736587525
epoch: 111 training_loss 0.1436494079232216 test_loss: 0.13869708776474
epoch: 112 training_loss 0.13007091753184796 test_loss: 0.13950287103652953
epoch: 113 training_loss 0.1306569616869092 test_loss: 0.12950810194015502
epoch: 114 training_loss 0.13256024841219186 test_loss: 0.14028207063674927
epoch: 115 training_loss 0.13226685337722302 test_loss: 0.14179285764694213
epoch: 116 training_loss 0.12921535421162844 test_loss: 0.13449366092681886
epoch: 117 training_loss 0.1422717297822237 test_loss: 0.13822038173675538
epoch: 118 training_loss 0.13217036079615355 test_loss: 0.13171608448028566
epoch: 119 training_loss 0.13421151630580425 test_loss: 0.14633530378341675
epoch: 120 training_loss 0.1378237659111619 test_loss: 0.1309656620025635
epoch: 121 training_loss 0.13761248737573623 test_loss: 0.1571307897567749
epoch: 122 training_loss 0.14063847348093986 test_loss: 0.14905297756195068
epoch: 123 training_loss 0.13058357387781144 test_loss: 0.12781808376312256
epoch: 124 training_loss 0.13418135803192854 test_loss: 0.12856597900390626
epoch: 125 training_loss 0.1291027631983161 test_loss: 0.14661283493041993
epoch: 126 training_loss 0.1371829203888774 test_loss: 0.1302838444709778
epoch: 127 training_loss 0.1340146693214774 test_loss: 0.13602300882339477
epoch: 128 training_loss 0.1380628440901637 test_loss: 0.12380150556564332
epoch: 129 training_loss 0.13762740064412354 test_loss: 0.14206249713897706
epoch: 130 training_loss 0.13445979170501232 test_loss: 0.14606399536132814
epoch: 131 training_loss 0.13229596342891456 test_loss: 0.13638361692428588
epoch: 132 training_loss 0.1351881382614374 test_loss: 0.13589489459991455
epoch: 133 training_loss 0.1393919069878757 test_loss: 0.14096431732177733
epoch: 134 training_loss 0.14392357654869556 test_loss: 0.13251532316207887
epoch: 135 training_loss 0.13246477734297513 test_loss: 0.11719951629638672
epoch: 136 training_loss 0.12903682958334683 test_loss: 0.12935456037521362
epoch: 137 training_loss 0.1356668246909976 test_loss: 0.12466824054718018
epoch: 138 training_loss 0.13151159387081862 test_loss: 0.1296441912651062
epoch: 139 training_loss 0.12622611947357654 test_loss: 0.14813125133514404
epoch: 140 training_loss 0.13749658904969692 test_loss: 0.13550832271575927
epoch: 141 training_loss 0.13863786026835442 test_loss: 0.12723243236541748
epoch: 142 training_loss 0.13794882155954838 test_loss: 0.1418905258178711
epoch: 143 training_loss 0.13837830543518068 test_loss: 0.14510459899902345
epoch: 144 training_loss 0.13693848494440317 test_loss: 0.12298388481140136
epoch: 145 training_loss 0.1437758718058467 test_loss: 0.12469593286514283
epoch: 146 training_loss 0.13629202093929052 test_loss: 0.1452004313468933
epoch: 147 training_loss 0.13346578605473042 test_loss: 0.14557701349258423
epoch: 148 training_loss 0.13064959447830915 test_loss: 0.1345090389251709
epoch: 149 training_loss 0.1279119237139821 test_loss: 0.13031429052352905
epoch: 0 training_loss 8.041177082061768 test_loss: 4.888854217529297
epoch: 1 training_loss 3.780293278694153 test_loss: 2.9750457763671876
epoch: 2 training_loss 2.438729305267334 test_loss: 2.091069984436035
epoch: 3 training_loss 1.92441645860672 test_loss: 1.7100229263305664
epoch: 4 training_loss 1.5750387132167816 test_loss: 1.5336084365844727
epoch: 5 training_loss 1.3946276116371155 test_loss: 1.2964423179626465
epoch: 6 training_loss 1.243059697151184 test_loss: 1.219611644744873
epoch: 7 training_loss 1.1581265795230866 test_loss: 1.1715936660766602
epoch: 8 training_loss 1.1195548337697983 test_loss: 1.0933238029479981
epoch: 9 training_loss 1.0441352570056914 test_loss: 1.071908950805664
epoch: 10 training_loss 0.9895711344480514 test_loss: 1.036262607574463
epoch: 11 training_loss 0.9634649056196213 test_loss: 0.900809383392334
epoch: 12 training_loss 0.9952048605680466 test_loss: 0.9286162376403808
epoch: 13 training_loss 0.9029733914136887 test_loss: 0.8958950996398926
epoch: 14 training_loss 0.8952204865217209 test_loss: 0.8533252716064453
epoch: 15 training_loss 0.8382969284057618 test_loss: 0.8375711441040039
epoch: 16 training_loss 0.8615955924987793 test_loss: 0.8671533584594726
epoch: 17 training_loss 0.81128402531147 test_loss: 0.788880443572998
epoch: 18 training_loss 0.8117923218011857 test_loss: 0.8022977828979492
epoch: 19 training_loss 0.8081761956214905 test_loss: 0.8409268379211425
epoch: 20 training_loss 0.7737739711999894 test_loss: 0.8251051902770996
epoch: 21 training_loss 0.7729707390069962 test_loss: 0.7736396789550781
epoch: 22 training_loss 0.7589519268274307 test_loss: 0.7298484325408936
epoch: 23 training_loss 0.7335528737306595 test_loss: 0.7585888385772706
epoch: 24 training_loss 0.7222700107097626 test_loss: 0.7346786499023438
epoch: 25 training_loss 0.7178330379724502 test_loss: 0.7113465785980224
epoch: 26 training_loss 0.7278824782371521 test_loss: 0.727366304397583
epoch: 27 training_loss 0.7257127386331558 test_loss: 0.7312479972839355
epoch: 28 training_loss 0.7153852748870849 test_loss: 0.6901678085327149
epoch: 29 training_loss 0.6955223244428634 test_loss: 0.7013140678405761
epoch: 30 training_loss 0.717710308432579 test_loss: 0.7192610740661621
epoch: 31 training_loss 0.7046493262052536 test_loss: 0.6588581562042236
epoch: 32 training_loss 0.7025881451368332 test_loss: 0.6670731544494629
epoch: 33 training_loss 0.6901234459877014 test_loss: 0.6924559593200683
epoch: 34 training_loss 0.6721280026435852 test_loss: 0.6684400558471679
epoch: 35 training_loss 0.6904186081886291 test_loss: 0.6733248710632325
epoch: 36 training_loss 0.6538759762048721 test_loss: 0.6564603328704834
epoch: 37 training_loss 0.6540013331174851 test_loss: 0.6747947216033936
epoch: 38 training_loss 0.6395428860187531 test_loss: 0.6410147666931152
epoch: 39 training_loss 0.6525879609584808 test_loss: 0.6216188907623291
epoch: 40 training_loss 0.6557769882678985 test_loss: 0.6823357582092285
epoch: 41 training_loss 0.6312404376268387 test_loss: 0.64185471534729
epoch: 42 training_loss 0.6444652318954468 test_loss: 0.6383422374725342
epoch: 43 training_loss 0.6350200229883194 test_loss: 0.6423393726348877
epoch: 44 training_loss 0.6269938361644745 test_loss: 0.6903089046478271
epoch: 45 training_loss 0.6370189201831817 test_loss: 0.6393555164337158
epoch: 46 training_loss 0.628568269610405 test_loss: 0.6226542949676513
epoch: 47 training_loss 0.6229676669836044 test_loss: 0.6038485050201416
epoch: 48 training_loss 0.6213713175058365 test_loss: 0.5994978427886963
epoch: 49 training_loss 0.632015660405159 test_loss: 0.6193183898925781
epoch: 50 training_loss 0.6136647665500641 test_loss: 0.5977288722991944
epoch: 51 training_loss 0.6047187751531601 test_loss: 0.5922060966491699
epoch: 52 training_loss 0.6074893987178802 test_loss: 0.6180402278900147
epoch: 53 training_loss 0.589028685092926 test_loss: 0.5744292259216308
epoch: 54 training_loss 0.6090784859657288 test_loss: 0.5693567276000977
epoch: 55 training_loss 0.5956710636615753 test_loss: 0.5908285617828369
epoch: 56 training_loss 0.592734649181366 test_loss: 0.5965612411499024
epoch: 57 training_loss 0.5983762192726135 test_loss: 0.6370347023010254
epoch: 58 training_loss 0.5960376566648483 test_loss: 0.6079270362854003
epoch: 59 training_loss 0.6069166660308838 test_loss: 0.5786092758178711
epoch: 60 training_loss 0.5922363746166229 test_loss: 0.5968460083007813
epoch: 61 training_loss 0.5667494955658913 test_loss: 0.5790884017944335
epoch: 62 training_loss 0.5881445151567459 test_loss: 0.5628148555755615
epoch: 63 training_loss 0.5748983398079872 test_loss: 0.6038658618927002
epoch: 64 training_loss 0.5975174671411514 test_loss: 0.5796887874603271
epoch: 65 training_loss 0.5807364064455033 test_loss: 0.6140525341033936
epoch: 66 training_loss 0.5865196576714515 test_loss: 0.5522714138031006
epoch: 67 training_loss 0.5847314211726189 test_loss: 0.5500979900360108
epoch: 68 training_loss 0.5678286096453666 test_loss: 0.5888672351837159
epoch: 69 training_loss 0.5642492297291756 test_loss: 0.6015439510345459
epoch: 70 training_loss 0.5631027582287789 test_loss: 0.5869275093078613
epoch: 71 training_loss 0.5553167223930359 test_loss: 0.5414267539978027
epoch: 72 training_loss 0.5660383605957031 test_loss: 0.5721799850463867
epoch: 73 training_loss 0.5707962572574615 test_loss: 0.53875732421875
epoch: 74 training_loss 0.5550787600874901 test_loss: 0.597196626663208
epoch: 75 training_loss 0.5676396432518959 test_loss: 0.5996272087097168
epoch: 76 training_loss 0.5552399903535843 test_loss: 0.5891323566436768
epoch: 77 training_loss 0.5451601004600525 test_loss: 0.5882663249969482
epoch: 78 training_loss 0.5531116384267807 test_loss: 0.5500946998596191
epoch: 79 training_loss 0.5570201832056045 test_loss: 0.5422833442687989
epoch: 80 training_loss 0.5473610678315163 test_loss: 0.568926477432251
epoch: 81 training_loss 0.554413170516491 test_loss: 0.572504711151123
epoch: 82 training_loss 0.545407734811306 test_loss: 0.5424334526062011
epoch: 83 training_loss 0.5594941872358322 test_loss: 0.5516527175903321
epoch: 84 training_loss 0.5593692126870156 test_loss: 0.5422574043273926
epoch: 85 training_loss 0.5458847773075104 test_loss: 0.5625439167022706
epoch: 86 training_loss 0.5470156049728394 test_loss: 0.5277753353118897
epoch: 87 training_loss 0.5403749462962151 test_loss: 0.5356807231903076
epoch: 88 training_loss 0.5572322857379913 test_loss: 0.5320225715637207
epoch: 89 training_loss 0.5406989520788192 test_loss: 0.5563948154449463
epoch: 90 training_loss 0.5445003342628479 test_loss: 0.5243076801300048
epoch: 91 training_loss 0.5368581649661064 test_loss: 0.5308549880981446
epoch: 92 training_loss 0.5280619278550148 test_loss: 0.5462881565093994
epoch: 93 training_loss 0.5270041397213936 test_loss: 0.5237241744995117
epoch: 94 training_loss 0.5455073139071465 test_loss: 0.5391298294067383
epoch: 95 training_loss 0.5359859836101531 test_loss: 0.5521982669830322
epoch: 96 training_loss 0.530316232740879 test_loss: 0.5232285499572754
epoch: 97 training_loss 0.5382645982503891 test_loss: 0.5470502853393555
epoch: 98 training_loss 0.5360954242944718 test_loss: 0.5261165618896484
epoch: 99 training_loss 0.5225851526856422 test_loss: 0.5612716674804688
epoch: 100 training_loss 0.5372251355648041 test_loss: 0.5429784774780273
epoch: 101 training_loss 0.5326517829298973 test_loss: 0.5240282535552978
epoch: 102 training_loss 0.5302653217315674 test_loss: 0.5210486888885498
epoch: 103 training_loss 0.5317876499891281 test_loss: 0.5152105331420899
epoch: 104 training_loss 0.5087459233403205 test_loss: 0.5006025791168213
epoch: 105 training_loss 0.5162675529718399 test_loss: 0.524579668045044
epoch: 106 training_loss 0.524394410252571 test_loss: 0.5516544818878174
epoch: 107 training_loss 0.5318452674150467 test_loss: 0.5291935920715332
epoch: 108 training_loss 0.5158739510178566 test_loss: 0.547987699508667
epoch: 109 training_loss 0.5109877288341522 test_loss: 0.547429895401001
epoch: 110 training_loss 0.5167452353239059 test_loss: 0.5106281280517578
epoch: 111 training_loss 0.5295234027504921 test_loss: 0.5275893688201905
epoch: 112 training_loss 0.5073226091265678 test_loss: 0.5095193386077881
epoch: 113 training_loss 0.5095875096321106 test_loss: 0.5413027763366699
epoch: 114 training_loss 0.5010845860838891 test_loss: 0.5643092155456543
epoch: 115 training_loss 0.5163014486432076 test_loss: 0.5146903991699219
epoch: 116 training_loss 0.5081881386041641 test_loss: 0.5262669086456299
epoch: 117 training_loss 0.5058070707321167 test_loss: 0.5480751037597656
epoch: 118 training_loss 0.5057106918096542 test_loss: 0.48595471382141114
epoch: 119 training_loss 0.5114701250195504 test_loss: 0.5056519508361816
epoch: 120 training_loss 0.502749927341938 test_loss: 0.5176815986633301
epoch: 121 training_loss 0.5035522598028183 test_loss: 0.5011973857879639
epoch: 122 training_loss 0.5023143011331558 test_loss: 0.5005000114440918
epoch: 123 training_loss 0.5242053684592247 test_loss: 0.4931054592132568
epoch: 124 training_loss 0.502634391784668 test_loss: 0.508385705947876
epoch: 125 training_loss 0.5201471707224846 test_loss: 0.5200344085693359
epoch: 126 training_loss 0.5101076120138168 test_loss: 0.4905391693115234
epoch: 127 training_loss 0.51423898011446 test_loss: 0.4946280479431152
epoch: 128 training_loss 0.49572867542505267 test_loss: 0.5179361343383789
epoch: 129 training_loss 0.49483070254325867 test_loss: 0.5242322444915771
epoch: 130 training_loss 0.5162101891636849 test_loss: 0.5187049388885498
epoch: 131 training_loss 0.494659908413887 test_loss: 0.5029519081115723
epoch: 132 training_loss 0.5095647862553596 test_loss: 0.5114476680755615
epoch: 133 training_loss 0.49932519912719725 test_loss: 0.5122242450714112
epoch: 134 training_loss 0.49686942726373673 test_loss: 0.49389138221740725
epoch: 135 training_loss 0.4816825079917908 test_loss: 0.4823399543762207
epoch: 136 training_loss 0.4817508915066719 test_loss: 0.4990841388702393
epoch: 137 training_loss 0.49234657526016234 test_loss: 0.49933390617370604
epoch: 138 training_loss 0.4818387308716774 test_loss: 0.49953336715698243
epoch: 139 training_loss 0.4878503277897835 test_loss: 0.4844813823699951
epoch: 140 training_loss 0.49211375176906585 test_loss: 0.4873072624206543
epoch: 141 training_loss 0.48960051745176314 test_loss: 0.49019837379455566
epoch: 142 training_loss 0.48683099627494814 test_loss: 0.4953639507293701
epoch: 143 training_loss 0.49092284828424454 test_loss: 0.5020679950714111
epoch: 144 training_loss 0.49437550723552703 test_loss: 0.4988552570343018
epoch: 145 training_loss 0.4837008911371231 test_loss: 0.48376216888427737
epoch: 146 training_loss 0.488100426197052 test_loss: 0.4835686683654785
epoch: 147 training_loss 0.48972963482141496 test_loss: 0.48135690689086913
epoch: 148 training_loss 0.49036292880773547 test_loss: 0.4904064655303955
epoch: 149 training_loss 0.4808404737710953 test_loss: 0.5057813167572022
3105.588611373793
episode: 0 training return: tensor(-75.2011, device='cuda:0')
episode: 1 training return: tensor(-122.7115, device='cuda:0')
episode: 2 training return: tensor(-79.5791, device='cuda:0')
episode: 3 training return: tensor(-112.9357, device='cuda:0')
epoch: 1 test_true_pfm: 3191.81215822507 sim_pfm: -82.08496466386714
episode: 4 training return: tensor(-119.1956, device='cuda:0')
episode: 5 training return: tensor(-111.8954, device='cuda:0')
episode: 6 training return: tensor(-107.6804, device='cuda:0')
episode: 7 training return: tensor(-97.9859, device='cuda:0')
epoch: 2 test_true_pfm: 3190.219374704126 sim_pfm: -112.83970734094812
episode: 8 training return: tensor(-73.3052, device='cuda:0')
episode: 9 training return: tensor(-81.7243, device='cuda:0')
episode: 10 training return: tensor(-697.8560, device='cuda:0')
episode: 11 training return: tensor(-603.2922, device='cuda:0')
epoch: 3 test_true_pfm: 1657.1867493210073 sim_pfm: -283.2705447981231
episode: 12 training return: tensor(-144.7953, device='cuda:0')
episode: 13 training return: tensor(-686.3934, device='cuda:0')
episode: 14 training return: tensor(-110.0715, device='cuda:0')
episode: 15 training return: tensor(-621.2556, device='cuda:0')
epoch: 4 test_true_pfm: 3139.206588761082 sim_pfm: -201.78412902118484
episode: 16 training return: tensor(-712.7629, device='cuda:0')
episode: 17 training return: tensor(-233.0343, device='cuda:0')
episode: 18 training return: tensor(-709.3421, device='cuda:0')
episode: 19 training return: tensor(-702.2335, device='cuda:0')
epoch: 5 test_true_pfm: 2971.428886562912 sim_pfm: -121.39935497754293
episode: 20 training return: tensor(-539.3192, device='cuda:0')
episode: 21 training return: tensor(-206.9458, device='cuda:0')
episode: 22 training return: tensor(-694.5566, device='cuda:0')
episode: 23 training return: tensor(-642.6281, device='cuda:0')
epoch: 6 test_true_pfm: 3200.6857903150726 sim_pfm: -64.05821660743095
episode: 24 training return: tensor(-74.9286, device='cuda:0')
episode: 25 training return: tensor(-172.6839, device='cuda:0')
episode: 26 training return: tensor(-83.0094, device='cuda:0')
episode: 27 training return: tensor(-67.6821, device='cuda:0')
epoch: 7 test_true_pfm: 3158.992841274448 sim_pfm: -113.60682718465493
episode: 28 training return: tensor(-79.1626, device='cuda:0')
episode: 29 training return: tensor(-119.6248, device='cuda:0')
episode: 30 training return: tensor(-58.7526, device='cuda:0')
episode: 31 training return: tensor(-88.9041, device='cuda:0')
epoch: 8 test_true_pfm: 2976.0448804080684 sim_pfm: -75.88219020318745
episode: 32 training return: tensor(-705.0700, device='cuda:0')
episode: 33 training return: tensor(-94.8744, device='cuda:0')
episode: 34 training return: tensor(-82.9433, device='cuda:0')
episode: 35 training return: tensor(-85.7886, device='cuda:0')
epoch: 9 test_true_pfm: 2966.3767163031357 sim_pfm: -52.175131682646075
episode: 36 training return: tensor(-67.1132, device='cuda:0')
episode: 37 training return: tensor(-539.4244, device='cuda:0')
episode: 38 training return: tensor(-61.3701, device='cuda:0')
episode: 39 training return: tensor(-106.6282, device='cuda:0')
epoch: 10 test_true_pfm: 2666.938981966848 sim_pfm: -44.01252283009429
episode: 40 training return: tensor(-638.4846, device='cuda:0')
episode: 41 training return: tensor(-61.0221, device='cuda:0')
episode: 42 training return: tensor(-475.4951, device='cuda:0')
episode: 43 training return: tensor(-66.5610, device='cuda:0')
epoch: 11 test_true_pfm: 3236.983410685323 sim_pfm: -16.351075629655195
episode: 44 training return: tensor(-445.6920, device='cuda:0')
episode: 45 training return: tensor(-609.3229, device='cuda:0')
episode: 46 training return: tensor(-80.8972, device='cuda:0')
episode: 47 training return: tensor(-84.6880, device='cuda:0')
epoch: 12 test_true_pfm: 3236.613444796722 sim_pfm: -38.81613824973465
episode: 48 training return: tensor(-31.6942, device='cuda:0')
episode: 49 training return: tensor(-306.1250, device='cuda:0')
episode: 50 training return: tensor(-74.2790, device='cuda:0')
episode: 51 training return: tensor(-65.1890, device='cuda:0')
epoch: 13 test_true_pfm: 3215.492072230525 sim_pfm: -175.7894318815767
episode: 52 training return: tensor(-72.5443, device='cuda:0')
episode: 53 training return: tensor(-79.5782, device='cuda:0')
episode: 54 training return: tensor(-640.0161, device='cuda:0')
episode: 55 training return: tensor(-105.7230, device='cuda:0')
epoch: 14 test_true_pfm: 3248.804948342649 sim_pfm: -42.80960932641756
episode: 56 training return: tensor(-84.6947, device='cuda:0')
episode: 57 training return: tensor(-55.9648, device='cuda:0')
episode: 58 training return: tensor(-60.3292, device='cuda:0')
episode: 59 training return: tensor(-81.4276, device='cuda:0')
epoch: 15 test_true_pfm: 3237.2888293491455 sim_pfm: -43.32968952335068
episode: 60 training return: tensor(-74.9457, device='cuda:0')
episode: 61 training return: tensor(-77.0158, device='cuda:0')
episode: 62 training return: tensor(-78.4516, device='cuda:0')
episode: 63 training return: tensor(-58.3667, device='cuda:0')
epoch: 16 test_true_pfm: 3213.810689879272 sim_pfm: -61.9698533382616
episode: 64 training return: tensor(-51.3705, device='cuda:0')
episode: 65 training return: tensor(-26.7256, device='cuda:0')
episode: 66 training return: tensor(-12.6099, device='cuda:0')
episode: 67 training return: tensor(-62.6294, device='cuda:0')
epoch: 17 test_true_pfm: 3227.967105749596 sim_pfm: -51.22722545608607
episode: 68 training return: tensor(-100.5236, device='cuda:0')
episode: 69 training return: tensor(-56.7592, device='cuda:0')
episode: 70 training return: tensor(-67.3590, device='cuda:0')
episode: 71 training return: tensor(-70.0059, device='cuda:0')
epoch: 18 test_true_pfm: 2658.912091272539 sim_pfm: -195.5654936741145
episode: 72 training return: tensor(-287.5894, device='cuda:0')
episode: 73 training return: tensor(-94.3411, device='cuda:0')
episode: 74 training return: tensor(-79.2002, device='cuda:0')
episode: 75 training return: tensor(-47.2068, device='cuda:0')
epoch: 19 test_true_pfm: 3237.8011978288414 sim_pfm: -26.778987743979087
episode: 76 training return: tensor(-696.5558, device='cuda:0')
episode: 77 training return: tensor(-86.9753, device='cuda:0')
episode: 78 training return: tensor(-55.1355, device='cuda:0')
episode: 79 training return: tensor(-444.6643, device='cuda:0')
epoch: 20 test_true_pfm: 3065.4443396663446 sim_pfm: -34.36552778059073
episode: 80 training return: tensor(-65.5975, device='cuda:0')
episode: 81 training return: tensor(-82.6165, device='cuda:0')
episode: 82 training return: tensor(-374.9499, device='cuda:0')
episode: 83 training return: tensor(-695.3270, device='cuda:0')
epoch: 21 test_true_pfm: 2200.5443991211073 sim_pfm: -230.2517776361201
episode: 84 training return: tensor(-76.2443, device='cuda:0')
episode: 85 training return: tensor(-66.4690, device='cuda:0')
episode: 86 training return: tensor(-27.3214, device='cuda:0')
episode: 87 training return: tensor(-70.7008, device='cuda:0')
epoch: 22 test_true_pfm: 3215.2736828112343 sim_pfm: -114.3246289199451
episode: 88 training return: tensor(-10.8332, device='cuda:0')
episode: 89 training return: tensor(-361.8279, device='cuda:0')
episode: 90 training return: tensor(-78.5856, device='cuda:0')
episode: 91 training return: tensor(-104.8555, device='cuda:0')
epoch: 23 test_true_pfm: 3183.8271999155986 sim_pfm: -92.49414968215085
episode: 92 training return: tensor(-78.2141, device='cuda:0')
episode: 93 training return: tensor(-36.3113, device='cuda:0')
episode: 94 training return: tensor(-385.2767, device='cuda:0')
episode: 95 training return: tensor(-85.8877, device='cuda:0')
epoch: 24 test_true_pfm: 2294.267153832952 sim_pfm: -122.12638212934446
episode: 96 training return: tensor(-43.5583, device='cuda:0')
episode: 97 training return: tensor(-145.7781, device='cuda:0')
episode: 98 training return: tensor(-34.6766, device='cuda:0')
episode: 99 training return: tensor(-93.1355, device='cuda:0')
epoch: 25 test_true_pfm: 2900.813215539289 sim_pfm: -412.2395470793126
episode: 100 training return: tensor(-67.3267, device='cuda:0')
episode: 101 training return: tensor(-46.0363, device='cuda:0')
episode: 102 training return: tensor(-98.3890, device='cuda:0')
episode: 103 training return: tensor(-206.7537, device='cuda:0')
epoch: 26 test_true_pfm: 2865.8950390421 sim_pfm: -211.53615375597533
episode: 104 training return: tensor(-28.8659, device='cuda:0')
episode: 105 training return: tensor(-74.1373, device='cuda:0')
episode: 106 training return: tensor(-104.6504, device='cuda:0')
episode: 107 training return: tensor(-43.6638, device='cuda:0')
epoch: 27 test_true_pfm: 3183.0740234836962 sim_pfm: -143.12810954806628
episode: 108 training return: tensor(-132.2027, device='cuda:0')
episode: 109 training return: tensor(-602.6864, device='cuda:0')
episode: 110 training return: tensor(-81.7441, device='cuda:0')
episode: 111 training return: tensor(-553.4941, device='cuda:0')
epoch: 28 test_true_pfm: 3223.768355066302 sim_pfm: -58.71392221039665
episode: 112 training return: tensor(-48.3995, device='cuda:0')
episode: 113 training return: tensor(-72.5197, device='cuda:0')
episode: 114 training return: tensor(-117.9833, device='cuda:0')
episode: 115 training return: tensor(-45.3085, device='cuda:0')
epoch: 29 test_true_pfm: 3206.6882651154956 sim_pfm: -95.54664510801861
episode: 116 training return: tensor(-77.0804, device='cuda:0')
episode: 117 training return: tensor(-79.1000, device='cuda:0')
episode: 118 training return: tensor(-89.4645, device='cuda:0')
episode: 119 training return: tensor(-68.3094, device='cuda:0')
epoch: 30 test_true_pfm: 3227.4070517251967 sim_pfm: -71.36101611501847
episode: 120 training return: tensor(-101.0357, device='cuda:0')
episode: 121 training return: tensor(-76.3865, device='cuda:0')
episode: 122 training return: tensor(-76.0172, device='cuda:0')
episode: 123 training return: tensor(-72.0050, device='cuda:0')
epoch: 31 test_true_pfm: 3202.3261106920786 sim_pfm: -82.96634637215175
episode: 124 training return: tensor(-87.1540, device='cuda:0')
episode: 125 training return: tensor(-72.9865, device='cuda:0')
episode: 126 training return: tensor(-80.7449, device='cuda:0')
episode: 127 training return: tensor(-28.2165, device='cuda:0')
epoch: 32 test_true_pfm: 3187.58227302564 sim_pfm: -79.91009856265737
episode: 128 training return: tensor(-83.0786, device='cuda:0')
episode: 129 training return: tensor(-79.4779, device='cuda:0')
episode: 130 training return: tensor(-68.9401, device='cuda:0')
episode: 131 training return: tensor(-120.0273, device='cuda:0')
epoch: 33 test_true_pfm: 3237.868907884887 sim_pfm: -69.3552336988602
episode: 132 training return: tensor(-72.8532, device='cuda:0')
episode: 133 training return: tensor(-39.5959, device='cuda:0')
episode: 134 training return: tensor(-68.0062, device='cuda:0')
episode: 135 training return: tensor(-87.8796, device='cuda:0')
epoch: 34 test_true_pfm: 3209.0007897213313 sim_pfm: -79.13987352323602
episode: 136 training return: tensor(-76.3219, device='cuda:0')
episode: 137 training return: tensor(-57.1864, device='cuda:0')
episode: 138 training return: tensor(-50.2943, device='cuda:0')
episode: 139 training return: tensor(-45.6482, device='cuda:0')
epoch: 35 test_true_pfm: 3218.393856997726 sim_pfm: -39.86557609166872
episode: 140 training return: tensor(-71.3040, device='cuda:0')
episode: 141 training return: tensor(-60.9442, device='cuda:0')
episode: 142 training return: tensor(-368.5238, device='cuda:0')
episode: 143 training return: tensor(-66.0151, device='cuda:0')
epoch: 36 test_true_pfm: 3232.258749026993 sim_pfm: -27.667935388628393
episode: 144 training return: tensor(-85.6951, device='cuda:0')
episode: 145 training return: tensor(-100.3631, device='cuda:0')
episode: 146 training return: tensor(-80.4249, device='cuda:0')
episode: 147 training return: tensor(-37.9953, device='cuda:0')
epoch: 37 test_true_pfm: 3235.667316366889 sim_pfm: -38.3114374629998
episode: 148 training return: tensor(-61.4355, device='cuda:0')
episode: 149 training return: tensor(-85.3145, device='cuda:0')
episode: 150 training return: tensor(-68.7246, device='cuda:0')
episode: 151 training return: tensor(-83.1077, device='cuda:0')
epoch: 38 test_true_pfm: 3250.578935850728 sim_pfm: -80.42524840106489
episode: 152 training return: tensor(-74.0630, device='cuda:0')
episode: 153 training return: tensor(-37.8798, device='cuda:0')
episode: 154 training return: tensor(-65.2685, device='cuda:0')
episode: 155 training return: tensor(-45.3060, device='cuda:0')
epoch: 39 test_true_pfm: 3246.871822946903 sim_pfm: -16.62604401666128
episode: 156 training return: tensor(-76.2023, device='cuda:0')
episode: 157 training return: tensor(-276.4289, device='cuda:0')
episode: 158 training return: tensor(-77.9543, device='cuda:0')
episode: 159 training return: tensor(-68.6208, device='cuda:0')
epoch: 40 test_true_pfm: 3238.7227247214782 sim_pfm: -21.237351645793144
episode: 160 training return: tensor(-50.3739, device='cuda:0')
episode: 161 training return: tensor(-82.7638, device='cuda:0')
episode: 162 training return: tensor(-72.2646, device='cuda:0')
episode: 163 training return: tensor(-84.3635, device='cuda:0')
epoch: 41 test_true_pfm: 3225.639656683014 sim_pfm: -32.754819059569854
episode: 164 training return: tensor(-45.3441, device='cuda:0')
episode: 165 training return: tensor(-51.7628, device='cuda:0')
episode: 166 training return: tensor(-31.0425, device='cuda:0')
episode: 167 training return: tensor(-83.4538, device='cuda:0')
epoch: 42 test_true_pfm: 3239.7339417678427 sim_pfm: -41.26726074117081
episode: 168 training return: tensor(-79.9072, device='cuda:0')
episode: 169 training return: tensor(-48.5126, device='cuda:0')
episode: 170 training return: tensor(-59.6870, device='cuda:0')
episode: 171 training return: tensor(-43.4557, device='cuda:0')
epoch: 43 test_true_pfm: 3226.2697279050094 sim_pfm: -66.57358226726258
episode: 172 training return: tensor(-78.0154, device='cuda:0')
episode: 173 training return: tensor(-33.4992, device='cuda:0')
episode: 174 training return: tensor(-73.3906, device='cuda:0')
episode: 175 training return: tensor(-631.6721, device='cuda:0')
epoch: 44 test_true_pfm: 3202.685657249053 sim_pfm: -44.66144979992532
episode: 176 training return: tensor(-50.5743, device='cuda:0')
episode: 177 training return: tensor(-9.3292, device='cuda:0')
episode: 178 training return: tensor(-49.2029, device='cuda:0')
episode: 179 training return: tensor(-102.7457, device='cuda:0')
epoch: 45 test_true_pfm: 3245.4815944047546 sim_pfm: -33.371370332693914
episode: 180 training return: tensor(-65.8944, device='cuda:0')
episode: 181 training return: tensor(-71.6145, device='cuda:0')
episode: 182 training return: tensor(-44.7657, device='cuda:0')
episode: 183 training return: tensor(-55.1508, device='cuda:0')
epoch: 46 test_true_pfm: 3241.165313229785 sim_pfm: -35.77879861162122
episode: 184 training return: tensor(-51.6514, device='cuda:0')
episode: 185 training return: tensor(-93.8323, device='cuda:0')
episode: 186 training return: tensor(-86.0755, device='cuda:0')
episode: 187 training return: tensor(-58.3065, device='cuda:0')
epoch: 47 test_true_pfm: 3225.2086863085096 sim_pfm: -44.73108349874383
episode: 188 training return: tensor(-26.4560, device='cuda:0')
episode: 189 training return: tensor(-65.9388, device='cuda:0')
episode: 190 training return: tensor(-84.4774, device='cuda:0')
episode: 191 training return: tensor(-48.1147, device='cuda:0')
epoch: 48 test_true_pfm: 3228.0756372655 sim_pfm: -44.21258158532631
episode: 192 training return: tensor(-65.9391, device='cuda:0')
episode: 193 training return: tensor(-77.4301, device='cuda:0')
episode: 194 training return: tensor(-92.6679, device='cuda:0')
episode: 195 training return: tensor(-122.3964, device='cuda:0')
epoch: 49 test_true_pfm: 3232.328832771085 sim_pfm: -36.00062560564644
episode: 196 training return: tensor(-74.8465, device='cuda:0')
episode: 197 training return: tensor(-65.0584, device='cuda:0')
episode: 198 training return: tensor(-17.8169, device='cuda:0')
episode: 199 training return: tensor(-44.7657, device='cuda:0')
epoch: 50 test_true_pfm: 3235.2112007483033 sim_pfm: -43.69779426468691
episode: 200 training return: tensor(-42.2324, device='cuda:0')
episode: 201 training return: tensor(-57.2457, device='cuda:0')
episode: 202 training return: tensor(-45.2002, device='cuda:0')
episode: 203 training return: tensor(-42.4277, device='cuda:0')
epoch: 51 test_true_pfm: 3254.0683094964734 sim_pfm: -20.6298291758964
episode: 204 training return: tensor(-61.2390, device='cuda:0')
episode: 205 training return: tensor(-97.3419, device='cuda:0')
episode: 206 training return: tensor(-114.2358, device='cuda:0')
episode: 207 training return: tensor(-64.7274, device='cuda:0')
epoch: 52 test_true_pfm: 3225.8853276820123 sim_pfm: -50.258501536484495
episode: 208 training return: tensor(-81.3325, device='cuda:0')
episode: 209 training return: tensor(-54.0453, device='cuda:0')
episode: 210 training return: tensor(-44.9254, device='cuda:0')
episode: 211 training return: tensor(-478.0902, device='cuda:0')
epoch: 53 test_true_pfm: 3227.2768582221815 sim_pfm: -29.563335090953235
episode: 212 training return: tensor(-47.6722, device='cuda:0')
episode: 213 training return: tensor(-49.3002, device='cuda:0')
episode: 214 training return: tensor(-81.3830, device='cuda:0')
episode: 215 training return: tensor(-80.9762, device='cuda:0')
epoch: 54 test_true_pfm: 3231.3185223704822 sim_pfm: -44.01163099674159
episode: 216 training return: tensor(-81.3742, device='cuda:0')
episode: 217 training return: tensor(-55.3050, device='cuda:0')
episode: 218 training return: tensor(-44.0507, device='cuda:0')
episode: 219 training return: tensor(-66.9588, device='cuda:0')
epoch: 55 test_true_pfm: 3211.8537975633444 sim_pfm: -64.42326507751325
episode: 220 training return: tensor(-72.1316, device='cuda:0')
episode: 221 training return: tensor(-44.0717, device='cuda:0')
episode: 222 training return: tensor(-63.1097, device='cuda:0')
episode: 223 training return: tensor(-278.5812, device='cuda:0')
epoch: 56 test_true_pfm: 3218.3297465683972 sim_pfm: -45.772862735408125
episode: 224 training return: tensor(-73.0761, device='cuda:0')
episode: 225 training return: tensor(-45.5522, device='cuda:0')
episode: 226 training return: tensor(-698.7119, device='cuda:0')
episode: 227 training return: tensor(-69.5804, device='cuda:0')
epoch: 57 test_true_pfm: 3231.9791663306805 sim_pfm: -53.42896980525499
episode: 228 training return: tensor(-33.3879, device='cuda:0')
episode: 229 training return: tensor(-51.8699, device='cuda:0')
episode: 230 training return: tensor(-55.4058, device='cuda:0')
episode: 231 training return: tensor(-71.4881, device='cuda:0')
epoch: 58 test_true_pfm: 3231.4953773531684 sim_pfm: -34.46360529047282
episode: 232 training return: tensor(-67.5802, device='cuda:0')
episode: 233 training return: tensor(-48.7621, device='cuda:0')
episode: 234 training return: tensor(-86.1336, device='cuda:0')
episode: 235 training return: tensor(-32.9837, device='cuda:0')
epoch: 59 test_true_pfm: 3230.7157752317994 sim_pfm: -46.041488600157514
episode: 236 training return: tensor(-70.1347, device='cuda:0')
episode: 237 training return: tensor(-71.5547, device='cuda:0')
episode: 238 training return: tensor(-88.7624, device='cuda:0')
episode: 239 training return: tensor(-46.8782, device='cuda:0')
epoch: 60 test_true_pfm: 3250.8725240156286 sim_pfm: -37.43618454254465
episode: 240 training return: tensor(-64.3289, device='cuda:0')
episode: 241 training return: tensor(-61.7706, device='cuda:0')
episode: 242 training return: tensor(-59.3446, device='cuda:0')
episode: 243 training return: tensor(-303.1559, device='cuda:0')
epoch: 61 test_true_pfm: 3246.667486740314 sim_pfm: -8.957929237532275
episode: 244 training return: tensor(-89.8133, device='cuda:0')
episode: 245 training return: tensor(-78.6807, device='cuda:0')
episode: 246 training return: tensor(-54.5619, device='cuda:0')
episode: 247 training return: tensor(-100.5798, device='cuda:0')
epoch: 62 test_true_pfm: 3235.349108625443 sim_pfm: -37.008046824950725
episode: 248 training return: tensor(-77.7405, device='cuda:0')
episode: 249 training return: tensor(-41.2368, device='cuda:0')
episode: 250 training return: tensor(-66.0751, device='cuda:0')
episode: 251 training return: tensor(-75.8683, device='cuda:0')
epoch: 63 test_true_pfm: 3243.7257008729525 sim_pfm: -29.84139775519725
episode: 252 training return: tensor(-51.9063, device='cuda:0')
episode: 253 training return: tensor(-61.6103, device='cuda:0')
episode: 254 training return: tensor(-70.3747, device='cuda:0')
episode: 255 training return: tensor(-57.9740, device='cuda:0')
epoch: 64 test_true_pfm: 3218.6134542712693 sim_pfm: -54.155358596258644
episode: 256 training return: tensor(-49.6613, device='cuda:0')
episode: 257 training return: tensor(-59.5528, device='cuda:0')
episode: 258 training return: tensor(-684.1635, device='cuda:0')
episode: 259 training return: tensor(-45.7623, device='cuda:0')
epoch: 65 test_true_pfm: 3247.0753640888856 sim_pfm: -31.19197325927477
episode: 260 training return: tensor(-102.9760, device='cuda:0')
episode: 261 training return: tensor(-222.9549, device='cuda:0')
episode: 262 training return: tensor(-62.5128, device='cuda:0')
episode: 263 training return: tensor(-58.3529, device='cuda:0')
epoch: 66 test_true_pfm: 3222.4790233672015 sim_pfm: -43.84453872910429
episode: 264 training return: tensor(-73.2336, device='cuda:0')
episode: 265 training return: tensor(-442.4575, device='cuda:0')
episode: 266 training return: tensor(-98.4382, device='cuda:0')
episode: 267 training return: tensor(-49.1994, device='cuda:0')
epoch: 67 test_true_pfm: 3246.195063711275 sim_pfm: -19.517555897900213
episode: 268 training return: tensor(-55.1466, device='cuda:0')
episode: 269 training return: tensor(-67.2772, device='cuda:0')
episode: 270 training return: tensor(-61.2508, device='cuda:0')
episode: 271 training return: tensor(-87.1691, device='cuda:0')
epoch: 68 test_true_pfm: 3218.673592790398 sim_pfm: -57.501406138277765
episode: 272 training return: tensor(-59.2107, device='cuda:0')
episode: 273 training return: tensor(-71.1283, device='cuda:0')
episode: 274 training return: tensor(-76.9740, device='cuda:0')
episode: 275 training return: tensor(-75.1158, device='cuda:0')
epoch: 69 test_true_pfm: 3228.3116913065787 sim_pfm: -29.249620479900234
episode: 276 training return: tensor(-61.9638, device='cuda:0')
episode: 277 training return: tensor(-49.8974, device='cuda:0')
episode: 278 training return: tensor(-66.0602, device='cuda:0')
episode: 279 training return: tensor(-70.9536, device='cuda:0')
epoch: 70 test_true_pfm: 3227.008639977734 sim_pfm: -43.74278528621653
episode: 280 training return: tensor(-49.8017, device='cuda:0')
episode: 281 training return: tensor(-83.7952, device='cuda:0')
episode: 282 training return: tensor(-65.9283, device='cuda:0')
episode: 283 training return: tensor(-57.9218, device='cuda:0')
epoch: 71 test_true_pfm: 3231.732617424191 sim_pfm: -34.1265422134893
episode: 284 training return: tensor(-62.7280, device='cuda:0')
episode: 285 training return: tensor(-51.6448, device='cuda:0')
episode: 286 training return: tensor(-596.6778, device='cuda:0')
episode: 287 training return: tensor(-86.0334, device='cuda:0')
epoch: 72 test_true_pfm: 3224.7313615880025 sim_pfm: -44.17187233467121
episode: 288 training return: tensor(-67.0160, device='cuda:0')
episode: 289 training return: tensor(-81.1418, device='cuda:0')
episode: 290 training return: tensor(-84.6657, device='cuda:0')
episode: 291 training return: tensor(-51.8195, device='cuda:0')
epoch: 73 test_true_pfm: 3245.952574391704 sim_pfm: -38.63026611293511
episode: 292 training return: tensor(-55.8049, device='cuda:0')
episode: 293 training return: tensor(-74.1135, device='cuda:0')
episode: 294 training return: tensor(-31.4923, device='cuda:0')
episode: 295 training return: tensor(-67.6595, device='cuda:0')
epoch: 74 test_true_pfm: 3243.4212058428293 sim_pfm: -48.06683897824647
episode: 296 training return: tensor(-72.2612, device='cuda:0')
episode: 297 training return: tensor(-60.0392, device='cuda:0')
episode: 298 training return: tensor(-33.2539, device='cuda:0')
episode: 299 training return: tensor(-77.8066, device='cuda:0')
epoch: 75 test_true_pfm: 3228.857211199982 sim_pfm: -48.42759031248473
episode: 300 training return: tensor(-64.5533, device='cuda:0')
episode: 301 training return: tensor(-69.3298, device='cuda:0')
episode: 302 training return: tensor(-500.7223, device='cuda:0')
episode: 303 training return: tensor(-39.9554, device='cuda:0')
epoch: 76 test_true_pfm: 3256.2603472797327 sim_pfm: -16.35949856535687
episode: 304 training return: tensor(-52.0835, device='cuda:0')
episode: 305 training return: tensor(-82.8325, device='cuda:0')
episode: 306 training return: tensor(-83.8832, device='cuda:0')
episode: 307 training return: tensor(-51.5091, device='cuda:0')
epoch: 77 test_true_pfm: 3225.020428669542 sim_pfm: -39.96769193194147
episode: 308 training return: tensor(-86.3764, device='cuda:0')
episode: 309 training return: tensor(-38.5357, device='cuda:0')
episode: 310 training return: tensor(-49.0157, device='cuda:0')
episode: 311 training return: tensor(-49.7155, device='cuda:0')
epoch: 78 test_true_pfm: 3256.1005617066126 sim_pfm: -14.305468707461841
episode: 312 training return: tensor(-101.6678, device='cuda:0')
episode: 313 training return: tensor(-70.1973, device='cuda:0')
episode: 314 training return: tensor(-63.0594, device='cuda:0')
episode: 315 training return: tensor(-72.9249, device='cuda:0')
epoch: 79 test_true_pfm: 3232.075057941842 sim_pfm: -33.36095792479076
episode: 316 training return: tensor(-45.7809, device='cuda:0')
episode: 317 training return: tensor(-51.1715, device='cuda:0')
episode: 318 training return: tensor(-54.2004, device='cuda:0')
episode: 319 training return: tensor(-55.6634, device='cuda:0')
epoch: 80 test_true_pfm: 3231.3693353232798 sim_pfm: -33.97133688822699
episode: 320 training return: tensor(-47.2397, device='cuda:0')
episode: 321 training return: tensor(-48.2213, device='cuda:0')
episode: 322 training return: tensor(-51.6786, device='cuda:0')
episode: 323 training return: tensor(-52.7712, device='cuda:0')
epoch: 81 test_true_pfm: 3220.366755652241 sim_pfm: -40.51498850532031
episode: 324 training return: tensor(-35.2775, device='cuda:0')
episode: 325 training return: tensor(-56.5531, device='cuda:0')
episode: 326 training return: tensor(-87.2566, device='cuda:0')
episode: 327 training return: tensor(-43.5605, device='cuda:0')
epoch: 82 test_true_pfm: 3215.985456792752 sim_pfm: -59.1058126767069
episode: 328 training return: tensor(-53.6141, device='cuda:0')
episode: 329 training return: tensor(-63.2560, device='cuda:0')
episode: 330 training return: tensor(-56.0311, device='cuda:0')
episode: 331 training return: tensor(-44.9811, device='cuda:0')
epoch: 83 test_true_pfm: 3267.4075177039035 sim_pfm: -15.291042498633033
episode: 332 training return: tensor(-270.4754, device='cuda:0')
episode: 333 training return: tensor(-51.9376, device='cuda:0')
episode: 334 training return: tensor(-45.3614, device='cuda:0')
episode: 335 training return: tensor(-79.9433, device='cuda:0')
epoch: 84 test_true_pfm: 3226.339263813273 sim_pfm: -48.999570326899025
episode: 336 training return: tensor(-33.3228, device='cuda:0')
episode: 337 training return: tensor(-60.4004, device='cuda:0')
episode: 338 training return: tensor(-30.7810, device='cuda:0')
episode: 339 training return: tensor(-29.6908, device='cuda:0')
epoch: 85 test_true_pfm: 3226.4087127794305 sim_pfm: -34.5316863170107
episode: 340 training return: tensor(-95.2972, device='cuda:0')
episode: 341 training return: tensor(-51.2455, device='cuda:0')
episode: 342 training return: tensor(-46.9031, device='cuda:0')
episode: 343 training return: tensor(-49.7047, device='cuda:0')
epoch: 86 test_true_pfm: 3230.554028542396 sim_pfm: -38.106189980336545
episode: 344 training return: tensor(-54.8359, device='cuda:0')
episode: 345 training return: tensor(-57.1081, device='cuda:0')
episode: 346 training return: tensor(-41.8909, device='cuda:0')
episode: 347 training return: tensor(-56.8423, device='cuda:0')
epoch: 87 test_true_pfm: 3239.201877551072 sim_pfm: -41.29546002885521
episode: 348 training return: tensor(-70.2151, device='cuda:0')
episode: 349 training return: tensor(-52.0777, device='cuda:0')
episode: 350 training return: tensor(-58.3525, device='cuda:0')
episode: 351 training return: tensor(-48.8097, device='cuda:0')
epoch: 88 test_true_pfm: 3240.414545501684 sim_pfm: -29.549134525761474
episode: 352 training return: tensor(-59.9416, device='cuda:0')
episode: 353 training return: tensor(-7.0222, device='cuda:0')
episode: 354 training return: tensor(-71.9109, device='cuda:0')
episode: 355 training return: tensor(-102.6653, device='cuda:0')
epoch: 89 test_true_pfm: 3224.243958209936 sim_pfm: -40.287324732654575
episode: 356 training return: tensor(-66.6598, device='cuda:0')
episode: 357 training return: tensor(-39.4617, device='cuda:0')
episode: 358 training return: tensor(-39.4172, device='cuda:0')
episode: 359 training return: tensor(-409.7504, device='cuda:0')
epoch: 90 test_true_pfm: 3224.6843938022807 sim_pfm: -47.82265611225739
episode: 360 training return: tensor(-82.3247, device='cuda:0')
episode: 361 training return: tensor(-48.1868, device='cuda:0')
episode: 362 training return: tensor(-45.2397, device='cuda:0')
episode: 363 training return: tensor(-47.7847, device='cuda:0')
epoch: 91 test_true_pfm: 3226.614689385038 sim_pfm: -49.490601441638624
episode: 364 training return: tensor(-74.0859, device='cuda:0')
episode: 365 training return: tensor(-57.1060, device='cuda:0')
episode: 366 training return: tensor(-45.7868, device='cuda:0')
episode: 367 training return: tensor(-83.3829, device='cuda:0')
epoch: 92 test_true_pfm: 3221.9658286544695 sim_pfm: -56.219302832643734
episode: 368 training return: tensor(-66.7338, device='cuda:0')
episode: 369 training return: tensor(-92.4960, device='cuda:0')
episode: 370 training return: tensor(-39.1928, device='cuda:0')
episode: 371 training return: tensor(-57.9888, device='cuda:0')
epoch: 93 test_true_pfm: 3219.9096045721017 sim_pfm: -48.1736956651633
episode: 372 training return: tensor(-92.2370, device='cuda:0')
episode: 373 training return: tensor(-66.9375, device='cuda:0')
episode: 374 training return: tensor(-41.4737, device='cuda:0')
episode: 375 training return: tensor(-107.4863, device='cuda:0')
epoch: 94 test_true_pfm: 3232.516518225242 sim_pfm: -40.23010097288837
episode: 376 training return: tensor(-67.4082, device='cuda:0')
episode: 377 training return: tensor(-95.2310, device='cuda:0')
episode: 378 training return: tensor(-55.5784, device='cuda:0')
episode: 379 training return: tensor(-23.9071, device='cuda:0')
epoch: 95 test_true_pfm: 3204.2251399511138 sim_pfm: -51.34776299035487
episode: 380 training return: tensor(-48.2392, device='cuda:0')
episode: 381 training return: tensor(-68.1909, device='cuda:0')
episode: 382 training return: tensor(-75.3026, device='cuda:0')
episode: 383 training return: tensor(-56.4211, device='cuda:0')
epoch: 96 test_true_pfm: 3235.557418699375 sim_pfm: -40.9628673926054
episode: 384 training return: tensor(-21.0476, device='cuda:0')
episode: 385 training return: tensor(-38.4403, device='cuda:0')
episode: 386 training return: tensor(-52.9474, device='cuda:0')
episode: 387 training return: tensor(-61.2215, device='cuda:0')
epoch: 97 test_true_pfm: 3229.5403067561074 sim_pfm: -42.138268965354655
episode: 388 training return: tensor(-38.3399, device='cuda:0')
episode: 389 training return: tensor(-43.7781, device='cuda:0')
episode: 390 training return: tensor(-66.9328, device='cuda:0')
episode: 391 training return: tensor(-56.4409, device='cuda:0')
epoch: 98 test_true_pfm: 3228.165024944985 sim_pfm: -46.43540635479925
episode: 392 training return: tensor(-70.2128, device='cuda:0')
episode: 393 training return: tensor(-40.6360, device='cuda:0')
episode: 394 training return: tensor(-56.7002, device='cuda:0')
episode: 395 training return: tensor(-344.6754, device='cuda:0')
epoch: 99 test_true_pfm: 3272.6800480467696 sim_pfm: -5.75054219882198
episode: 396 training return: tensor(-619.6543, device='cuda:0')
episode: 397 training return: tensor(-44.1234, device='cuda:0')
episode: 398 training return: tensor(-64.9655, device='cuda:0')
episode: 399 training return: tensor(-50.9646, device='cuda:0')
epoch: 100 test_true_pfm: 3234.0231582397014 sim_pfm: -41.54194340653097
episode: 400 training return: tensor(-48.6465, device='cuda:0')
episode: 401 training return: tensor(-57.8275, device='cuda:0')
episode: 402 training return: tensor(-69.9040, device='cuda:0')
episode: 403 training return: tensor(-52.1297, device='cuda:0')
epoch: 101 test_true_pfm: 3230.837214637932 sim_pfm: -44.342035009389896
episode: 404 training return: tensor(-592.3604, device='cuda:0')
episode: 405 training return: tensor(-42.9540, device='cuda:0')
episode: 406 training return: tensor(-58.4636, device='cuda:0')
episode: 407 training return: tensor(-40.4219, device='cuda:0')
epoch: 102 test_true_pfm: 3248.6931973805135 sim_pfm: -37.81947433451811
episode: 408 training return: tensor(-59.1364, device='cuda:0')
episode: 409 training return: tensor(-77.8461, device='cuda:0')
episode: 410 training return: tensor(-33.9453, device='cuda:0')
episode: 411 training return: tensor(-79.6662, device='cuda:0')
epoch: 103 test_true_pfm: 3224.5492195380757 sim_pfm: -43.64323271106696
episode: 412 training return: tensor(-86.3669, device='cuda:0')
episode: 413 training return: tensor(-95.1515, device='cuda:0')
episode: 414 training return: tensor(-58.3286, device='cuda:0')
episode: 415 training return: tensor(-46.8266, device='cuda:0')
epoch: 104 test_true_pfm: 3232.3675838848612 sim_pfm: -51.5561043045988
episode: 416 training return: tensor(-41.4156, device='cuda:0')
episode: 417 training return: tensor(-74.9663, device='cuda:0')
episode: 418 training return: tensor(-87.2072, device='cuda:0')
episode: 419 training return: tensor(-55.7521, device='cuda:0')
epoch: 105 test_true_pfm: 3232.5815845191796 sim_pfm: -43.35735786803222
episode: 420 training return: tensor(-57.0023, device='cuda:0')
episode: 421 training return: tensor(-88.6008, device='cuda:0')
episode: 422 training return: tensor(-46.1404, device='cuda:0')
episode: 423 training return: tensor(-59.1583, device='cuda:0')
epoch: 106 test_true_pfm: 3232.060248664711 sim_pfm: -40.29898133688645
episode: 424 training return: tensor(-58.2350, device='cuda:0')
episode: 425 training return: tensor(-73.8108, device='cuda:0')
episode: 426 training return: tensor(-50.3161, device='cuda:0')
episode: 427 training return: tensor(-87.9475, device='cuda:0')
epoch: 107 test_true_pfm: 3221.1602029566384 sim_pfm: -51.03215137736212
episode: 428 training return: tensor(-56.9592, device='cuda:0')
episode: 429 training return: tensor(-46.9607, device='cuda:0')
episode: 430 training return: tensor(-57.2587, device='cuda:0')
episode: 431 training return: tensor(-74.6730, device='cuda:0')
epoch: 108 test_true_pfm: 3206.782889105943 sim_pfm: -54.60263392829802
episode: 432 training return: tensor(-78.7514, device='cuda:0')
episode: 433 training return: tensor(-46.6452, device='cuda:0')
episode: 434 training return: tensor(-38.9344, device='cuda:0')
episode: 435 training return: tensor(-35.7967, device='cuda:0')
epoch: 109 test_true_pfm: 3213.5599626866133 sim_pfm: -59.27584745092705
episode: 436 training return: tensor(-59.6343, device='cuda:0')
episode: 437 training return: tensor(-67.4615, device='cuda:0')
episode: 438 training return: tensor(-83.7099, device='cuda:0')
episode: 439 training return: tensor(-61.1750, device='cuda:0')
epoch: 110 test_true_pfm: 3233.017524259905 sim_pfm: -43.60599787222842
episode: 440 training return: tensor(-48.7775, device='cuda:0')
episode: 441 training return: tensor(-38.5851, device='cuda:0')
episode: 442 training return: tensor(-43.9903, device='cuda:0')
episode: 443 training return: tensor(-78.2730, device='cuda:0')
epoch: 111 test_true_pfm: 3228.3008955353835 sim_pfm: -42.25495223485632
episode: 444 training return: tensor(-46.6496, device='cuda:0')
episode: 445 training return: tensor(-40.3050, device='cuda:0')
episode: 446 training return: tensor(-56.0631, device='cuda:0')
episode: 447 training return: tensor(-77.2533, device='cuda:0')
epoch: 112 test_true_pfm: 3239.085368180337 sim_pfm: -43.130308255184595
episode: 448 training return: tensor(-195.8013, device='cuda:0')
episode: 449 training return: tensor(-77.3716, device='cuda:0')
episode: 450 training return: tensor(-34.5369, device='cuda:0')
episode: 451 training return: tensor(-64.2248, device='cuda:0')
epoch: 113 test_true_pfm: 3224.932273278735 sim_pfm: -55.101569298606286
episode: 452 training return: tensor(-57.2222, device='cuda:0')
episode: 453 training return: tensor(-62.1249, device='cuda:0')
episode: 454 training return: tensor(-81.1505, device='cuda:0')
episode: 455 training return: tensor(-82.7819, device='cuda:0')
epoch: 114 test_true_pfm: 3225.3138813365927 sim_pfm: -33.277103258073716
episode: 456 training return: tensor(-286.2677, device='cuda:0')
episode: 457 training return: tensor(-55.3440, device='cuda:0')
episode: 458 training return: tensor(-73.6648, device='cuda:0')
episode: 459 training return: tensor(-57.2803, device='cuda:0')
epoch: 115 test_true_pfm: 3196.94551123356 sim_pfm: -54.306016895978246
episode: 460 training return: tensor(-64.1411, device='cuda:0')
episode: 461 training return: tensor(-55.2938, device='cuda:0')
episode: 462 training return: tensor(-68.7899, device='cuda:0')
episode: 463 training return: tensor(-80.1781, device='cuda:0')
epoch: 116 test_true_pfm: 3219.2279968925836 sim_pfm: -57.14559612132143
episode: 464 training return: tensor(-51.3414, device='cuda:0')
episode: 465 training return: tensor(-68.0250, device='cuda:0')
episode: 466 training return: tensor(-78.5873, device='cuda:0')
episode: 467 training return: tensor(-92.6084, device='cuda:0')
epoch: 117 test_true_pfm: 3223.6667979764975 sim_pfm: -44.38798599440876
episode: 468 training return: tensor(-97.1980, device='cuda:0')
episode: 469 training return: tensor(-63.5472, device='cuda:0')
episode: 470 training return: tensor(-57.0882, device='cuda:0')
episode: 471 training return: tensor(-60.2920, device='cuda:0')
epoch: 118 test_true_pfm: 3220.05995835379 sim_pfm: -55.94522583768897
episode: 472 training return: tensor(-56.1485, device='cuda:0')
episode: 473 training return: tensor(-56.9483, device='cuda:0')
episode: 474 training return: tensor(-36.1697, device='cuda:0')
episode: 475 training return: tensor(-66.8510, device='cuda:0')
epoch: 119 test_true_pfm: 3240.522752576276 sim_pfm: -34.15354327736228
episode: 476 training return: tensor(-67.1034, device='cuda:0')
episode: 477 training return: tensor(-436.9990, device='cuda:0')
episode: 478 training return: tensor(-18.0763, device='cuda:0')
episode: 479 training return: tensor(-77.9953, device='cuda:0')
epoch: 120 test_true_pfm: 3213.2669717440763 sim_pfm: -54.8244237890176
episode: 480 training return: tensor(-85.2106, device='cuda:0')
episode: 481 training return: tensor(-69.6543, device='cuda:0')
episode: 482 training return: tensor(-69.1938, device='cuda:0')
episode: 483 training return: tensor(-43.5323, device='cuda:0')
epoch: 121 test_true_pfm: 3218.7476175128772 sim_pfm: -33.85076337016653
episode: 484 training return: tensor(-56.4600, device='cuda:0')
episode: 485 training return: tensor(-67.2309, device='cuda:0')
episode: 486 training return: tensor(-66.0650, device='cuda:0')
episode: 487 training return: tensor(-87.1492, device='cuda:0')
epoch: 122 test_true_pfm: 3232.585122248345 sim_pfm: -41.92299870444307
episode: 488 training return: tensor(-40.7835, device='cuda:0')
episode: 489 training return: tensor(-70.4399, device='cuda:0')
episode: 490 training return: tensor(-71.6240, device='cuda:0')
episode: 491 training return: tensor(-31.0526, device='cuda:0')
epoch: 123 test_true_pfm: 3243.272291145881 sim_pfm: -39.92558936084
episode: 492 training return: tensor(-61.9336, device='cuda:0')
episode: 493 training return: tensor(-51.0671, device='cuda:0')
episode: 494 training return: tensor(-61.7180, device='cuda:0')
episode: 495 training return: tensor(-59.8253, device='cuda:0')
epoch: 124 test_true_pfm: 3233.760467087588 sim_pfm: -40.571748228181
episode: 496 training return: tensor(-80.0820, device='cuda:0')
episode: 497 training return: tensor(-78.4657, device='cuda:0')
episode: 498 training return: tensor(-81.5276, device='cuda:0')
episode: 499 training return: tensor(-54.5737, device='cuda:0')
epoch: 125 test_true_pfm: 3228.006830202294 sim_pfm: -47.277410929653946
episode: 500 training return: tensor(-44.2206, device='cuda:0')
episode: 501 training return: tensor(-45.2847, device='cuda:0')
episode: 502 training return: tensor(-62.2663, device='cuda:0')
episode: 503 training return: tensor(-77.3402, device='cuda:0')
epoch: 126 test_true_pfm: 3217.148153005015 sim_pfm: -67.89485180409004
episode: 504 training return: tensor(-66.9899, device='cuda:0')
episode: 505 training return: tensor(-280.0179, device='cuda:0')
episode: 506 training return: tensor(-89.2861, device='cuda:0')
episode: 507 training return: tensor(-42.9771, device='cuda:0')
epoch: 127 test_true_pfm: 3224.9068556230254 sim_pfm: -47.64485322945014
episode: 508 training return: tensor(-86.3322, device='cuda:0')
episode: 509 training return: tensor(-55.7086, device='cuda:0')
episode: 510 training return: tensor(-50.7391, device='cuda:0')
episode: 511 training return: tensor(-62.7319, device='cuda:0')
epoch: 128 test_true_pfm: 3229.2110477796937 sim_pfm: -36.05933658553598
episode: 512 training return: tensor(-63.1725, device='cuda:0')
episode: 513 training return: tensor(-55.5560, device='cuda:0')
episode: 514 training return: tensor(-58.7051, device='cuda:0')
episode: 515 training return: tensor(-74.4200, device='cuda:0')
epoch: 129 test_true_pfm: 3228.886870437295 sim_pfm: -47.27095958335364
episode: 516 training return: tensor(-61.2080, device='cuda:0')
episode: 517 training return: tensor(-69.8273, device='cuda:0')
episode: 518 training return: tensor(-35.5654, device='cuda:0')
episode: 519 training return: tensor(-48.9028, device='cuda:0')
epoch: 130 test_true_pfm: 3231.6927772091144 sim_pfm: -31.209493136955036
episode: 520 training return: tensor(-52.5851, device='cuda:0')
episode: 521 training return: tensor(-54.4981, device='cuda:0')
episode: 522 training return: tensor(-82.1519, device='cuda:0')
episode: 523 training return: tensor(-58.8079, device='cuda:0')
epoch: 131 test_true_pfm: 3210.9366890917613 sim_pfm: -42.881525755898714
episode: 524 training return: tensor(-45.6491, device='cuda:0')
episode: 525 training return: tensor(-75.1116, device='cuda:0')
episode: 526 training return: tensor(-16.6184, device='cuda:0')
episode: 527 training return: tensor(-69.5065, device='cuda:0')
epoch: 132 test_true_pfm: 3221.7492498294873 sim_pfm: -52.71561708001536
episode: 528 training return: tensor(-41.0304, device='cuda:0')
episode: 529 training return: tensor(-27.7358, device='cuda:0')
episode: 530 training return: tensor(-69.6303, device='cuda:0')
episode: 531 training return: tensor(-85.9277, device='cuda:0')
epoch: 133 test_true_pfm: 3223.8906979417484 sim_pfm: -44.45807189642801
episode: 532 training return: tensor(-57.0226, device='cuda:0')
episode: 533 training return: tensor(-30.3013, device='cuda:0')
episode: 534 training return: tensor(-66.4796, device='cuda:0')
episode: 535 training return: tensor(-58.1106, device='cuda:0')
epoch: 134 test_true_pfm: 3228.42688970237 sim_pfm: -40.393695447506616
episode: 536 training return: tensor(-56.6094, device='cuda:0')
episode: 537 training return: tensor(-61.2666, device='cuda:0')
episode: 538 training return: tensor(-70.4728, device='cuda:0')
episode: 539 training return: tensor(-91.6428, device='cuda:0')
epoch: 135 test_true_pfm: 3217.172941466653 sim_pfm: -56.14821964673077
episode: 540 training return: tensor(-62.8734, device='cuda:0')
episode: 541 training return: tensor(-68.0379, device='cuda:0')
episode: 542 training return: tensor(-55.2383, device='cuda:0')
episode: 543 training return: tensor(-31.3832, device='cuda:0')
epoch: 136 test_true_pfm: 3218.52685401684 sim_pfm: -53.81775115670947
episode: 544 training return: tensor(-78.4105, device='cuda:0')
episode: 545 training return: tensor(-73.1820, device='cuda:0')
episode: 546 training return: tensor(-80.6474, device='cuda:0')
episode: 547 training return: tensor(-65.7615, device='cuda:0')
epoch: 137 test_true_pfm: 3236.1417466764674 sim_pfm: -55.834170718716145
episode: 548 training return: tensor(-64.2950, device='cuda:0')
episode: 549 training return: tensor(-69.9140, device='cuda:0')
episode: 550 training return: tensor(-63.5302, device='cuda:0')
episode: 551 training return: tensor(-51.1610, device='cuda:0')
epoch: 138 test_true_pfm: 3232.112393829811 sim_pfm: -41.1067287241361
episode: 552 training return: tensor(-57.8710, device='cuda:0')
episode: 553 training return: tensor(-83.0782, device='cuda:0')
episode: 554 training return: tensor(-41.9804, device='cuda:0')
episode: 555 training return: tensor(-44.0126, device='cuda:0')
epoch: 139 test_true_pfm: 3225.018628521792 sim_pfm: -43.858631029210905
episode: 556 training return: tensor(-35.4473, device='cuda:0')
episode: 557 training return: tensor(-677.6529, device='cuda:0')
episode: 558 training return: tensor(-60.5955, device='cuda:0')
episode: 559 training return: tensor(-38.7542, device='cuda:0')
epoch: 140 test_true_pfm: 3228.2837471232924 sim_pfm: -34.43963711431328
episode: 560 training return: tensor(-53.1416, device='cuda:0')
episode: 561 training return: tensor(-46.4025, device='cuda:0')
episode: 562 training return: tensor(-47.0429, device='cuda:0')
episode: 563 training return: tensor(-24.0981, device='cuda:0')
epoch: 141 test_true_pfm: 3214.6281333336374 sim_pfm: -42.00802238094426
episode: 564 training return: tensor(-85.7125, device='cuda:0')
episode: 565 training return: tensor(-90.7371, device='cuda:0')
episode: 566 training return: tensor(-74.8892, device='cuda:0')
episode: 567 training return: tensor(-44.4946, device='cuda:0')
epoch: 142 test_true_pfm: 3225.9476579941725 sim_pfm: -48.61111457627461
episode: 568 training return: tensor(-75.5470, device='cuda:0')
episode: 569 training return: tensor(-73.4598, device='cuda:0')
episode: 570 training return: tensor(-80.5936, device='cuda:0')
episode: 571 training return: tensor(-57.8177, device='cuda:0')
epoch: 143 test_true_pfm: 3202.510416854364 sim_pfm: -63.01193704834441
episode: 572 training return: tensor(-96.6963, device='cuda:0')
episode: 573 training return: tensor(-681.6720, device='cuda:0')
episode: 574 training return: tensor(-63.2175, device='cuda:0')
episode: 575 training return: tensor(-605.3710, device='cuda:0')
epoch: 144 test_true_pfm: 3216.498309064529 sim_pfm: -51.118400452280184
episode: 576 training return: tensor(-50.6285, device='cuda:0')
episode: 577 training return: tensor(-65.7401, device='cuda:0')
episode: 578 training return: tensor(-89.6657, device='cuda:0')
episode: 579 training return: tensor(-43.6463, device='cuda:0')
epoch: 145 test_true_pfm: 3225.773407019565 sim_pfm: -42.08308813605496
episode: 580 training return: tensor(-58.5148, device='cuda:0')
episode: 581 training return: tensor(-41.9184, device='cuda:0')
episode: 582 training return: tensor(-698.7095, device='cuda:0')
episode: 583 training return: tensor(-64.2695, device='cuda:0')
epoch: 146 test_true_pfm: 3226.3111693555525 sim_pfm: -45.53305681218626
episode: 584 training return: tensor(-74.6523, device='cuda:0')
episode: 585 training return: tensor(-68.2600, device='cuda:0')
episode: 586 training return: tensor(-65.5859, device='cuda:0')
episode: 587 training return: tensor(-65.6048, device='cuda:0')
epoch: 147 test_true_pfm: 3227.143532632604 sim_pfm: -42.738716402847786
episode: 588 training return: tensor(-84.5491, device='cuda:0')
episode: 589 training return: tensor(-33.1982, device='cuda:0')
episode: 590 training return: tensor(-120.4999, device='cuda:0')
episode: 591 training return: tensor(-110.5685, device='cuda:0')
epoch: 148 test_true_pfm: 3199.5189011136604 sim_pfm: -46.43863465159666
episode: 592 training return: tensor(-81.5046, device='cuda:0')
episode: 593 training return: tensor(-81.4832, device='cuda:0')
episode: 594 training return: tensor(-23.6026, device='cuda:0')
episode: 595 training return: tensor(-61.7718, device='cuda:0')
epoch: 149 test_true_pfm: 3236.7220393994744 sim_pfm: -32.236243327264674
episode: 596 training return: tensor(-57.2875, device='cuda:0')
episode: 597 training return: tensor(-64.2630, device='cuda:0')
episode: 598 training return: tensor(-50.9243, device='cuda:0')
episode: 599 training return: tensor(-39.0233, device='cuda:0')
epoch: 150 test_true_pfm: 3221.1287132198636 sim_pfm: -53.642927144644396
