['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'behavior', '--traj', 'expert', '--seed', '0']
epoch: 0 training_loss 0.29305146969854834 test_loss: 0.24658167362213135
epoch: 1 training_loss 0.18192572079598904 test_loss: 0.1674121141433716
epoch: 2 training_loss 0.17870817750692367 test_loss: 0.16892150640487671
epoch: 3 training_loss 0.16791729498654603 test_loss: 0.13706129789352417
epoch: 4 training_loss 0.15320542439818383 test_loss: 0.1418803334236145
epoch: 5 training_loss 0.16105455733835697 test_loss: 0.13868370056152343
epoch: 6 training_loss 0.1640392840653658 test_loss: 0.13576159477233887
epoch: 7 training_loss 0.1509792894870043 test_loss: 0.15661280155181884
epoch: 8 training_loss 0.15014736745506524 test_loss: 0.1373240113258362
epoch: 9 training_loss 0.1483340672403574 test_loss: 0.14094966650009155
epoch: 10 training_loss 0.1451364663243294 test_loss: 0.15104384422302247
epoch: 11 training_loss 0.15495673336088658 test_loss: 0.16752567291259765
epoch: 12 training_loss 0.150462835021317 test_loss: 0.1455378532409668
epoch: 13 training_loss 0.14083905167877675 test_loss: 0.15169050693511962
epoch: 14 training_loss 0.15201257660984993 test_loss: 0.13490455150604247
epoch: 15 training_loss 0.14885547153651715 test_loss: 0.16220223903656006
epoch: 16 training_loss 0.15445089519023894 test_loss: 0.1402791380882263
epoch: 17 training_loss 0.1317781875655055 test_loss: 0.1315838098526001
epoch: 18 training_loss 0.14294914565980435 test_loss: 0.1496145248413086
epoch: 19 training_loss 0.13339706972241402 test_loss: 0.14566323757171631
epoch: 20 training_loss 0.1269695967063308 test_loss: 0.15892847776412963
epoch: 21 training_loss 0.13738064128905536 test_loss: 0.12008780241012573
epoch: 22 training_loss 0.13824251864105463 test_loss: 0.15223692655563353
epoch: 23 training_loss 0.13884501658380033 test_loss: 0.15502094030380248
epoch: 24 training_loss 0.14427963580936193 test_loss: 0.14504765272140502
epoch: 25 training_loss 0.13006811611354352 test_loss: 0.14510995149612427
epoch: 26 training_loss 0.13474857777357102 test_loss: 0.13027560710906982
epoch: 27 training_loss 0.1398035280406475 test_loss: 0.14674428701400757
epoch: 28 training_loss 0.14624106857925653 test_loss: 0.14882110357284545
epoch: 29 training_loss 0.14219047203660012 test_loss: 0.13073627948760985
epoch: 30 training_loss 0.1430758623406291 test_loss: 0.1270551562309265
epoch: 31 training_loss 0.1365409616008401 test_loss: 0.1297343134880066
epoch: 32 training_loss 0.13934905834496022 test_loss: 0.1498807907104492
epoch: 33 training_loss 0.1457649776339531 test_loss: 0.138479745388031
epoch: 34 training_loss 0.13522497540339828 test_loss: 0.13369039297103882
epoch: 35 training_loss 0.13950162436813116 test_loss: 0.11537694931030273
epoch: 36 training_loss 0.1426595499366522 test_loss: 0.1293092966079712
epoch: 37 training_loss 0.13180125620216132 test_loss: 0.16137363910675048
epoch: 38 training_loss 0.13821530140936375 test_loss: 0.13114255666732788
epoch: 39 training_loss 0.13338654909282924 test_loss: 0.17331501245498657
epoch: 40 training_loss 0.14344974532723426 test_loss: 0.11344791650772094
epoch: 41 training_loss 0.13305569868534803 test_loss: 0.139342737197876
epoch: 42 training_loss 0.13148074489086867 test_loss: 0.14014925956726074
epoch: 43 training_loss 0.14031107131391762 test_loss: 0.11738201379776
epoch: 44 training_loss 0.13954280231148006 test_loss: 0.1544983506202698
epoch: 45 training_loss 0.13878976237028837 test_loss: 0.1395618200302124
epoch: 46 training_loss 0.13834194876253605 test_loss: 0.14069801568984985
epoch: 47 training_loss 0.1400808198750019 test_loss: 0.14507124423980713
epoch: 48 training_loss 0.1330679290741682 test_loss: 0.14287090301513672
epoch: 49 training_loss 0.15042406268417835 test_loss: 0.1435297131538391
epoch: 50 training_loss 0.1301443898677826 test_loss: 0.14064128398895265
epoch: 51 training_loss 0.13908198494464158 test_loss: 0.1316138744354248
epoch: 52 training_loss 0.13564457338303326 test_loss: 0.16022276878356934
epoch: 53 training_loss 0.1317714311182499 test_loss: 0.13205397129058838
epoch: 54 training_loss 0.13264981795102357 test_loss: 0.13335225582122803
epoch: 55 training_loss 0.13200603663921356 test_loss: 0.15173763036727905
epoch: 56 training_loss 0.13603376265615225 test_loss: 0.1307593822479248
epoch: 57 training_loss 0.13221418336033822 test_loss: 0.14489697217941283
epoch: 58 training_loss 0.13593349259346724 test_loss: 0.15556522607803344
epoch: 59 training_loss 0.14012488782405852 test_loss: 0.13937983512878419
epoch: 60 training_loss 0.1423390629515052 test_loss: 0.15221983194351196
epoch: 61 training_loss 0.13254346169531345 test_loss: 0.1438275694847107
epoch: 62 training_loss 0.1375885095819831 test_loss: 0.1484392523765564
epoch: 63 training_loss 0.1325657521188259 test_loss: 0.1397957444190979
epoch: 64 training_loss 0.1300108564645052 test_loss: 0.13698054552078248
epoch: 65 training_loss 0.13314705848693847 test_loss: 0.12417392730712891
epoch: 66 training_loss 0.13968995980918408 test_loss: 0.11178786754608154
epoch: 67 training_loss 0.14440982341766356 test_loss: 0.14096816778182983
epoch: 68 training_loss 0.14143723340705036 test_loss: 0.13957469463348388
epoch: 69 training_loss 0.1408148232474923 test_loss: 0.13782600164413453
epoch: 70 training_loss 0.13450319768860935 test_loss: 0.1299208641052246
epoch: 71 training_loss 0.139150897115469 test_loss: 0.1478908896446228
epoch: 72 training_loss 0.14373081889003514 test_loss: 0.13104610443115233
epoch: 73 training_loss 0.1307608475163579 test_loss: 0.13129189014434814
epoch: 74 training_loss 0.13646489791572095 test_loss: 0.1471387267112732
epoch: 75 training_loss 0.1362396639212966 test_loss: 0.14045538902282714
epoch: 76 training_loss 0.13547674454748632 test_loss: 0.14344929456710814
epoch: 77 training_loss 0.12535873752087354 test_loss: 0.13596118688583375
epoch: 78 training_loss 0.14017599564045669 test_loss: 0.13832050561904907
epoch: 79 training_loss 0.13138260390609502 test_loss: 0.13976407051086426
epoch: 80 training_loss 0.13593607388436793 test_loss: 0.13941513299942015
epoch: 81 training_loss 0.1260456646978855 test_loss: 0.1365424156188965
epoch: 82 training_loss 0.13281470539048315 test_loss: 0.13775135278701783
epoch: 83 training_loss 0.13488846007734537 test_loss: 0.13119790554046631
epoch: 84 training_loss 0.1407026133686304 test_loss: 0.12843549251556396
epoch: 85 training_loss 0.13375563941895963 test_loss: 0.1305798888206482
epoch: 86 training_loss 0.12947966121137142 test_loss: 0.14788346290588378
epoch: 87 training_loss 0.14085010278970003 test_loss: 0.14417355060577391
epoch: 88 training_loss 0.14551319248974323 test_loss: 0.13199348449707032
epoch: 89 training_loss 0.13723382335156203 test_loss: 0.13190385103225707
epoch: 90 training_loss 0.13207665391266346 test_loss: 0.1410074234008789
epoch: 91 training_loss 0.12745627477765084 test_loss: 0.14109448194503785
epoch: 92 training_loss 0.1361088949069381 test_loss: 0.1263389229774475
epoch: 93 training_loss 0.13826908491551876 test_loss: 0.15042381286621093
epoch: 94 training_loss 0.137643482349813 test_loss: 0.135465407371521
epoch: 95 training_loss 0.13595633763819934 test_loss: 0.1265244722366333
epoch: 96 training_loss 0.1347207335755229 test_loss: 0.14225915670394898
epoch: 97 training_loss 0.13348859827965498 test_loss: 0.12370716333389283
epoch: 98 training_loss 0.13791492715477943 test_loss: 0.13630825281143188
epoch: 99 training_loss 0.1359355280920863 test_loss: 0.11505693197250366
epoch: 100 training_loss 0.13568112440407276 test_loss: 0.136648690700531
epoch: 101 training_loss 0.12762921407818795 test_loss: 0.1373228073120117
epoch: 102 training_loss 0.13390792902559043 test_loss: 0.14272669553756714
epoch: 103 training_loss 0.13433479841798543 test_loss: 0.13072553873062134
epoch: 104 training_loss 0.13334755212068558 test_loss: 0.1422187089920044
epoch: 105 training_loss 0.13352573543787002 test_loss: 0.12892800569534302
epoch: 106 training_loss 0.13304213136434556 test_loss: 0.13882246017456054
epoch: 107 training_loss 0.12940328303724527 test_loss: 0.14322333335876464
epoch: 108 training_loss 0.13392914064228534 test_loss: 0.14067275524139405
epoch: 109 training_loss 0.1349629400856793 test_loss: 0.14508988857269287
epoch: 110 training_loss 0.13042409755289555 test_loss: 0.13678932189941406
epoch: 111 training_loss 0.1356006620824337 test_loss: 0.1368404746055603
epoch: 112 training_loss 0.14069268476217986 test_loss: 0.14386513233184814
epoch: 113 training_loss 0.14161628976464272 test_loss: 0.10838239192962647
epoch: 114 training_loss 0.1282391983270645 test_loss: 0.13380366563796997
epoch: 115 training_loss 0.13588003326207398 test_loss: 0.14627405405044555
epoch: 116 training_loss 0.12760834116488695 test_loss: 0.13242387771606445
epoch: 117 training_loss 0.13572532888501881 test_loss: 0.14705991744995117
epoch: 118 training_loss 0.13547928480431437 test_loss: 0.12410320043563842
epoch: 119 training_loss 0.13604234516620636 test_loss: 0.14565197229385377
epoch: 120 training_loss 0.13525412127375602 test_loss: 0.12223531007766723
epoch: 121 training_loss 0.13395888481289148 test_loss: 0.1244545578956604
epoch: 122 training_loss 0.13654269136488437 test_loss: 0.12082279920578003
epoch: 123 training_loss 0.13456316523253917 test_loss: 0.12540627717971803
epoch: 124 training_loss 0.12613328784704209 test_loss: 0.13431452512741088
epoch: 125 training_loss 0.12188705025240779 test_loss: 0.15640480518341066
epoch: 126 training_loss 0.13372360374778508 test_loss: 0.1401156187057495
epoch: 127 training_loss 0.12789851967245341 test_loss: 0.14010539054870605
epoch: 128 training_loss 0.1345674255117774 test_loss: 0.14491535425186158
epoch: 129 training_loss 0.1340918130800128 test_loss: 0.13508616685867308
epoch: 130 training_loss 0.12978384383022784 test_loss: 0.13348588943481446
epoch: 131 training_loss 0.13099003404378892 test_loss: 0.14875121116638185
epoch: 132 training_loss 0.1380464767292142 test_loss: 0.14602309465408325
epoch: 133 training_loss 0.13620336562395097 test_loss: 0.1432834506034851
epoch: 134 training_loss 0.1326036635041237 test_loss: 0.143365740776062
epoch: 135 training_loss 0.12643140975385905 test_loss: 0.1666799783706665
epoch: 136 training_loss 0.1291625025495887 test_loss: 0.13275119066238403
epoch: 137 training_loss 0.1356908930465579 test_loss: 0.13965729475021363
epoch: 138 training_loss 0.12762049559503794 test_loss: 0.12492120265960693
epoch: 139 training_loss 0.14141378097236157 test_loss: 0.13157596588134765
epoch: 140 training_loss 0.1306859442964196 test_loss: 0.1409855604171753
epoch: 141 training_loss 0.13090792614966631 test_loss: 0.13580044507980346
epoch: 142 training_loss 0.12750569231808184 test_loss: 0.14240959882736207
epoch: 143 training_loss 0.1309065991267562 test_loss: 0.11802898645401001
epoch: 144 training_loss 0.13149539478123187 test_loss: 0.14794777631759642
epoch: 145 training_loss 0.1375805624574423 test_loss: 0.12253638505935668
epoch: 146 training_loss 0.13173703828826547 test_loss: 0.1303354024887085
epoch: 147 training_loss 0.1338494980894029 test_loss: 0.12887036800384521
epoch: 148 training_loss 0.13649352338165044 test_loss: 0.13527464866638184
epoch: 149 training_loss 0.1294576646760106 test_loss: 0.11667695045471191
epoch: 0 training_loss 7.713966851234436 test_loss: 4.686029434204102
epoch: 1 training_loss 3.674608061313629 test_loss: 2.8675411224365233
epoch: 2 training_loss 2.5128670394420625 test_loss: 2.181936836242676
epoch: 3 training_loss 1.9855252647399901 test_loss: 1.9195846557617187
epoch: 4 training_loss 1.6793969786167144 test_loss: 1.5932555198669434
epoch: 5 training_loss 1.4473235297203064 test_loss: 1.4314990997314454
epoch: 6 training_loss 1.3479755246639251 test_loss: 1.302668285369873
epoch: 7 training_loss 1.2361480212211609 test_loss: 1.181222152709961
epoch: 8 training_loss 1.1459590697288513 test_loss: 1.1156505584716796
epoch: 9 training_loss 1.085951714515686 test_loss: 1.0545018196105957
epoch: 10 training_loss 1.04767598092556 test_loss: 1.0257557868957519
epoch: 11 training_loss 0.9893155294656754 test_loss: 0.9694913864135742
epoch: 12 training_loss 0.9716891342401505 test_loss: 0.9460363388061523
epoch: 13 training_loss 0.9464795368909836 test_loss: 0.94227294921875
epoch: 14 training_loss 0.9464180272817612 test_loss: 0.9145375251770019
epoch: 15 training_loss 0.881303306221962 test_loss: 0.8985825538635254
epoch: 16 training_loss 0.8748237264156341 test_loss: 0.9116159439086914
epoch: 17 training_loss 0.8904141813516617 test_loss: 0.8165283203125
epoch: 18 training_loss 0.8449768143892288 test_loss: 0.8786460876464843
epoch: 19 training_loss 0.8422661566734314 test_loss: 0.8567374229431153
epoch: 20 training_loss 0.8255086892843246 test_loss: 0.8011317253112793
epoch: 21 training_loss 0.8203952091932297 test_loss: 0.8240055084228516
epoch: 22 training_loss 0.7948481076955796 test_loss: 0.8084491729736328
epoch: 23 training_loss 0.7922523772716522 test_loss: 0.8232856750488281
epoch: 24 training_loss 0.7447687923908234 test_loss: 0.7990562915802002
epoch: 25 training_loss 0.7730417615175247 test_loss: 0.7866662979125977
epoch: 26 training_loss 0.7568952429294586 test_loss: 0.7489649295806885
epoch: 27 training_loss 0.7286938291788101 test_loss: 0.7545368671417236
epoch: 28 training_loss 0.729181472659111 test_loss: 0.7125044822692871
epoch: 29 training_loss 0.7435096204280853 test_loss: 0.7130996704101562
epoch: 30 training_loss 0.7214549368619919 test_loss: 0.6843151569366455
epoch: 31 training_loss 0.7077081912755966 test_loss: 0.7358002662658691
epoch: 32 training_loss 0.7189775174856186 test_loss: 0.6986186027526855
epoch: 33 training_loss 0.7030030637979507 test_loss: 0.6772757053375245
epoch: 34 training_loss 0.6969052249193192 test_loss: 0.7341135501861572
epoch: 35 training_loss 0.6821364104747772 test_loss: 0.6717938423156739
epoch: 36 training_loss 0.6870622789859772 test_loss: 0.6929305076599122
epoch: 37 training_loss 0.6741490411758423 test_loss: 0.6926042556762695
epoch: 38 training_loss 0.6617615234851837 test_loss: 0.7059801578521728
epoch: 39 training_loss 0.6676148414611817 test_loss: 0.6887060165405273
epoch: 40 training_loss 0.6670773476362228 test_loss: 0.6886506080627441
epoch: 41 training_loss 0.641898894906044 test_loss: 0.6366260528564454
epoch: 42 training_loss 0.6379328221082687 test_loss: 0.7015004634857178
epoch: 43 training_loss 0.6375848662853241 test_loss: 0.6331408023834229
epoch: 44 training_loss 0.6381262046098709 test_loss: 0.6195944309234619
epoch: 45 training_loss 0.645665734410286 test_loss: 0.6291598796844482
epoch: 46 training_loss 0.6222208333015442 test_loss: 0.6614443302154541
epoch: 47 training_loss 0.6101346349716187 test_loss: 0.6248311042785645
epoch: 48 training_loss 0.6171268218755722 test_loss: 0.6275001049041748
epoch: 49 training_loss 0.6198808062076568 test_loss: 0.6361806392669678
epoch: 50 training_loss 0.6142179274559021 test_loss: 0.6080830097198486
epoch: 51 training_loss 0.6165937370061875 test_loss: 0.6047338485717774
epoch: 52 training_loss 0.6140200686454773 test_loss: 0.6213096141815185
epoch: 53 training_loss 0.6251180738210678 test_loss: 0.6012056827545166
epoch: 54 training_loss 0.608349079489708 test_loss: 0.6044658660888672
epoch: 55 training_loss 0.5971326035261154 test_loss: 0.6179784774780274
epoch: 56 training_loss 0.5933258584141732 test_loss: 0.5967260837554932
epoch: 57 training_loss 0.5995341831445694 test_loss: 0.6123891830444336
epoch: 58 training_loss 0.5879808157682419 test_loss: 0.5895439147949219
epoch: 59 training_loss 0.6012173640727997 test_loss: 0.6063672542572022
epoch: 60 training_loss 0.5881769359111786 test_loss: 0.5846604824066162
epoch: 61 training_loss 0.5866251108050347 test_loss: 0.5966060161590576
epoch: 62 training_loss 0.5923074901103973 test_loss: 0.5813632011413574
epoch: 63 training_loss 0.5758239591121673 test_loss: 0.6367885112762451
epoch: 64 training_loss 0.5808050927519798 test_loss: 0.5586498260498047
epoch: 65 training_loss 0.5756071740388871 test_loss: 0.5871003627777099
epoch: 66 training_loss 0.5837681013345718 test_loss: 0.5786683082580566
epoch: 67 training_loss 0.5813105177879333 test_loss: 0.5620602130889892
epoch: 68 training_loss 0.5685621592402458 test_loss: 0.5476663112640381
epoch: 69 training_loss 0.5708047670125961 test_loss: 0.5406518936157226
epoch: 70 training_loss 0.563749050796032 test_loss: 0.5651519298553467
epoch: 71 training_loss 0.5547858181595803 test_loss: 0.5502020359039307
epoch: 72 training_loss 0.5552135288715363 test_loss: 0.5721139430999755
epoch: 73 training_loss 0.5497287136316299 test_loss: 0.5808873653411866
epoch: 74 training_loss 0.5616724109649658 test_loss: 0.566691541671753
epoch: 75 training_loss 0.566014375090599 test_loss: 0.5379802703857421
epoch: 76 training_loss 0.5576132968068123 test_loss: 0.5491475582122802
epoch: 77 training_loss 0.5431507152318954 test_loss: 0.5549293518066406
epoch: 78 training_loss 0.5433469954133033 test_loss: 0.548822546005249
epoch: 79 training_loss 0.5485289996862411 test_loss: 0.5523149013519287
epoch: 80 training_loss 0.546179550588131 test_loss: 0.5346013069152832
epoch: 81 training_loss 0.5379027429223061 test_loss: 0.5373932838439941
epoch: 82 training_loss 0.5463216170668602 test_loss: 0.537705945968628
epoch: 83 training_loss 0.5454405385255814 test_loss: 0.5772700309753418
epoch: 84 training_loss 0.5355423718690873 test_loss: 0.5507924079895019
epoch: 85 training_loss 0.5422686088085175 test_loss: 0.5669064998626709
epoch: 86 training_loss 0.524713259935379 test_loss: 0.5295328617095947
epoch: 87 training_loss 0.5426757276058197 test_loss: 0.5696442604064942
epoch: 88 training_loss 0.5349950540065765 test_loss: 0.5507652282714843
epoch: 89 training_loss 0.5333324885368347 test_loss: 0.5302988529205322
epoch: 90 training_loss 0.5487035357952118 test_loss: 0.5363898277282715
epoch: 91 training_loss 0.5389550614356995 test_loss: 0.5532378196716309
epoch: 92 training_loss 0.5265572673082352 test_loss: 0.5357849597930908
epoch: 93 training_loss 0.527431711256504 test_loss: 0.5612205028533935
epoch: 94 training_loss 0.5269813719391823 test_loss: 0.525870418548584
epoch: 95 training_loss 0.5304163759946823 test_loss: 0.5081315040588379
epoch: 96 training_loss 0.5230134120583534 test_loss: 0.5302350044250488
epoch: 97 training_loss 0.5184633165597916 test_loss: 0.5334363460540772
epoch: 98 training_loss 0.522953183054924 test_loss: 0.5410789966583252
epoch: 99 training_loss 0.523216065466404 test_loss: 0.5444450378417969
epoch: 100 training_loss 0.5153634703159332 test_loss: 0.5194979190826416
epoch: 101 training_loss 0.524720375239849 test_loss: 0.5153542995452881
epoch: 102 training_loss 0.5151823085546493 test_loss: 0.4995079517364502
epoch: 103 training_loss 0.522674058675766 test_loss: 0.5174082756042481
epoch: 104 training_loss 0.5153473183512688 test_loss: 0.5182723522186279
epoch: 105 training_loss 0.5136291545629501 test_loss: 0.528980827331543
epoch: 106 training_loss 0.5081176701188087 test_loss: 0.5342366218566894
epoch: 107 training_loss 0.5159426140785217 test_loss: 0.5215929985046387
epoch: 108 training_loss 0.5150238713622093 test_loss: 0.5092187404632569
epoch: 109 training_loss 0.5134977146983146 test_loss: 0.5015607357025147
epoch: 110 training_loss 0.5162126863002777 test_loss: 0.5089237689971924
epoch: 111 training_loss 0.5222543293237686 test_loss: 0.5388260364532471
epoch: 112 training_loss 0.5088652336597442 test_loss: 0.501524543762207
epoch: 113 training_loss 0.498913582265377 test_loss: 0.5034468173980713
epoch: 114 training_loss 0.5054581063985825 test_loss: 0.5010558605194092
epoch: 115 training_loss 0.5073763197660446 test_loss: 0.5014326095581054
epoch: 116 training_loss 0.5089518749713897 test_loss: 0.4991147994995117
epoch: 117 training_loss 0.5045524439215661 test_loss: 0.5009210109710693
epoch: 118 training_loss 0.5035680505633354 test_loss: 0.4904871940612793
epoch: 119 training_loss 0.5001693004369736 test_loss: 0.5151288032531738
epoch: 120 training_loss 0.512167651951313 test_loss: 0.5042208194732666
epoch: 121 training_loss 0.5023827284574509 test_loss: 0.5036555290222168
epoch: 122 training_loss 0.49756149321794507 test_loss: 0.5265315532684326
epoch: 123 training_loss 0.5047757470607758 test_loss: 0.5265643119812011
epoch: 124 training_loss 0.5042961481213569 test_loss: 0.4841169834136963
epoch: 125 training_loss 0.48556532442569733 test_loss: 0.49356350898742674
epoch: 126 training_loss 0.4934918341040611 test_loss: 0.49109773635864257
epoch: 127 training_loss 0.5021488749980927 test_loss: 0.49704880714416505
epoch: 128 training_loss 0.48817972511053087 test_loss: 0.4974499225616455
epoch: 129 training_loss 0.498637173473835 test_loss: 0.48854999542236327
epoch: 130 training_loss 0.49934427231550216 test_loss: 0.4950493335723877
epoch: 131 training_loss 0.4907215139269829 test_loss: 0.4883420467376709
epoch: 132 training_loss 0.49001803278923034 test_loss: 0.5276600360870362
epoch: 133 training_loss 0.5072609892487526 test_loss: 0.4863121509552002
epoch: 134 training_loss 0.49108566105365753 test_loss: 0.48971099853515626
epoch: 135 training_loss 0.4926675155758858 test_loss: 0.506205701828003
epoch: 136 training_loss 0.4922087773680687 test_loss: 0.4848484039306641
epoch: 137 training_loss 0.48972364753484726 test_loss: 0.47545695304870605
epoch: 138 training_loss 0.48975556164979933 test_loss: 0.5108351230621337
epoch: 139 training_loss 0.4844479623436928 test_loss: 0.4985066890716553
epoch: 140 training_loss 0.4879626142978668 test_loss: 0.49637413024902344
epoch: 141 training_loss 0.47850589245557784 test_loss: 0.4677425861358643
epoch: 142 training_loss 0.48141306668519973 test_loss: 0.4897159576416016
epoch: 143 training_loss 0.48918012768030167 test_loss: 0.4998220920562744
epoch: 144 training_loss 0.4974293801188469 test_loss: 0.4848918914794922
epoch: 145 training_loss 0.483565471470356 test_loss: 0.494277811050415
epoch: 146 training_loss 0.4933443650603294 test_loss: 0.48978657722473146
epoch: 147 training_loss 0.4781892442703247 test_loss: 0.5022622585296631
epoch: 148 training_loss 0.4819142952561378 test_loss: 0.4870506763458252
epoch: 149 training_loss 0.48469595730304715 test_loss: 0.4919395446777344
3173.261993785259
episode: 0 training return: tensor(-42.2008, device='cuda:0')
episode: 1 training return: tensor(-62.8840, device='cuda:0')
episode: 2 training return: tensor(-101.2746, device='cuda:0')
episode: 3 training return: tensor(-635.9490, device='cuda:0')
epoch: 1 test_true_pfm: 1123.7217744709303 sim_pfm: -679.663975347076
episode: 4 training return: tensor(-94.4782, device='cuda:0')
episode: 5 training return: tensor(-70.2741, device='cuda:0')
episode: 6 training return: tensor(-59.0277, device='cuda:0')
episode: 7 training return: tensor(-623.6887, device='cuda:0')
epoch: 2 test_true_pfm: 932.782782727742 sim_pfm: -695.0274697113394
episode: 8 training return: tensor(-458.3716, device='cuda:0')
episode: 9 training return: tensor(-615.5057, device='cuda:0')
episode: 10 training return: tensor(-700.9949, device='cuda:0')
episode: 11 training return: tensor(-690.3345, device='cuda:0')
epoch: 3 test_true_pfm: 964.7211033219415 sim_pfm: -686.655106582155
episode: 12 training return: tensor(-691.1442, device='cuda:0')
episode: 13 training return: tensor(-686.1171, device='cuda:0')
episode: 14 training return: tensor(-668.7477, device='cuda:0')
episode: 15 training return: tensor(-598.6016, device='cuda:0')
epoch: 4 test_true_pfm: 1081.9416372917665 sim_pfm: -455.74353530614945
episode: 16 training return: tensor(-261.9865, device='cuda:0')
episode: 17 training return: tensor(-702.0883, device='cuda:0')
episode: 18 training return: tensor(-90.7664, device='cuda:0')
episode: 19 training return: tensor(-693.1320, device='cuda:0')
epoch: 5 test_true_pfm: 2517.5833168055738 sim_pfm: -448.4992987262279
episode: 20 training return: tensor(-121.2198, device='cuda:0')
episode: 21 training return: tensor(-673.4811, device='cuda:0')
episode: 22 training return: tensor(-699.6870, device='cuda:0')
episode: 23 training return: tensor(-241.3472, device='cuda:0')
epoch: 6 test_true_pfm: 3203.1202586224476 sim_pfm: -71.79313108762533
episode: 24 training return: tensor(-394.5397, device='cuda:0')
episode: 25 training return: tensor(-625.2309, device='cuda:0')
episode: 26 training return: tensor(-106.2847, device='cuda:0')
episode: 27 training return: tensor(-65.4572, device='cuda:0')
epoch: 7 test_true_pfm: 2720.3888862588747 sim_pfm: -160.48737854820016
episode: 28 training return: tensor(-624.5306, device='cuda:0')
episode: 29 training return: tensor(-698.0577, device='cuda:0')
episode: 30 training return: tensor(-686.8682, device='cuda:0')
episode: 31 training return: tensor(-91.2860, device='cuda:0')
epoch: 8 test_true_pfm: 2453.337532583902 sim_pfm: -189.81993854531902
episode: 32 training return: tensor(-66.8304, device='cuda:0')
episode: 33 training return: tensor(-88.2864, device='cuda:0')
episode: 34 training return: tensor(-108.3718, device='cuda:0')
episode: 35 training return: tensor(-65.1656, device='cuda:0')
epoch: 9 test_true_pfm: 2844.2771767947766 sim_pfm: -107.75829904299462
episode: 36 training return: tensor(-691.0027, device='cuda:0')
episode: 37 training return: tensor(-75.4063, device='cuda:0')
episode: 38 training return: tensor(-224.1626, device='cuda:0')
episode: 39 training return: tensor(-631.6884, device='cuda:0')
epoch: 10 test_true_pfm: 3214.0763710838123 sim_pfm: -253.4076533317469
episode: 40 training return: tensor(-639.8203, device='cuda:0')
episode: 41 training return: tensor(-440.9288, device='cuda:0')
episode: 42 training return: tensor(-538.2119, device='cuda:0')
episode: 43 training return: tensor(-691.4543, device='cuda:0')
epoch: 11 test_true_pfm: 3125.1742684135775 sim_pfm: -53.134899486981645
episode: 44 training return: tensor(-626.3117, device='cuda:0')
episode: 45 training return: tensor(-690.4510, device='cuda:0')
episode: 46 training return: tensor(-627.7697, device='cuda:0')
episode: 47 training return: tensor(-94.6471, device='cuda:0')
epoch: 12 test_true_pfm: 3211.2842693427187 sim_pfm: -36.4599426597512
episode: 48 training return: tensor(-75.4904, device='cuda:0')
episode: 49 training return: tensor(-58.1283, device='cuda:0')
episode: 50 training return: tensor(-622.8141, device='cuda:0')
episode: 51 training return: tensor(-48.3129, device='cuda:0')
epoch: 13 test_true_pfm: 3229.411166734239 sim_pfm: -34.82777578417639
episode: 52 training return: tensor(-641.1589, device='cuda:0')
episode: 53 training return: tensor(-28.1938, device='cuda:0')
episode: 54 training return: tensor(-693.6470, device='cuda:0')
episode: 55 training return: tensor(-59.3536, device='cuda:0')
epoch: 14 test_true_pfm: 3237.574921060992 sim_pfm: -27.59402533721489
episode: 56 training return: tensor(-97.7929, device='cuda:0')
episode: 57 training return: tensor(-80.4630, device='cuda:0')
episode: 58 training return: tensor(-548.0161, device='cuda:0')
episode: 59 training return: tensor(-59.4305, device='cuda:0')
epoch: 15 test_true_pfm: 3208.207382115526 sim_pfm: -52.168974632998776
episode: 60 training return: tensor(-162.3969, device='cuda:0')
episode: 61 training return: tensor(-617.1661, device='cuda:0')
episode: 62 training return: tensor(-356.1907, device='cuda:0')
episode: 63 training return: tensor(-290.2126, device='cuda:0')
epoch: 16 test_true_pfm: 2861.3706025047286 sim_pfm: -140.5732687439886
episode: 64 training return: tensor(-44.9770, device='cuda:0')
episode: 65 training return: tensor(-68.0468, device='cuda:0')
episode: 66 training return: tensor(-76.7254, device='cuda:0')
episode: 67 training return: tensor(-14.6867, device='cuda:0')
epoch: 17 test_true_pfm: 3208.1496456493874 sim_pfm: -44.0594443451458
episode: 68 training return: tensor(-43.7048, device='cuda:0')
episode: 69 training return: tensor(-623.9730, device='cuda:0')
episode: 70 training return: tensor(-620.2487, device='cuda:0')
episode: 71 training return: tensor(-4.1203, device='cuda:0')
epoch: 18 test_true_pfm: 3176.5430852167133 sim_pfm: -70.41302321563126
episode: 72 training return: tensor(-679.6636, device='cuda:0')
episode: 73 training return: tensor(-76.3531, device='cuda:0')
episode: 74 training return: tensor(-42.4138, device='cuda:0')
episode: 75 training return: tensor(-208.7791, device='cuda:0')
epoch: 19 test_true_pfm: 3199.538074650534 sim_pfm: -129.83786783047253
episode: 76 training return: tensor(-576.1565, device='cuda:0')
episode: 77 training return: tensor(-152.5795, device='cuda:0')
episode: 78 training return: tensor(-57.5476, device='cuda:0')
episode: 79 training return: tensor(-13.2452, device='cuda:0')
epoch: 20 test_true_pfm: 3183.9679736178005 sim_pfm: -50.801893044263124
episode: 80 training return: tensor(-70.2923, device='cuda:0')
episode: 81 training return: tensor(-71.0574, device='cuda:0')
episode: 82 training return: tensor(-54.4326, device='cuda:0')
episode: 83 training return: tensor(-633.1338, device='cuda:0')
epoch: 21 test_true_pfm: 3161.3085126844344 sim_pfm: -166.92059066534662
episode: 84 training return: tensor(21.2384, device='cuda:0')
episode: 85 training return: tensor(33.3789, device='cuda:0')
episode: 86 training return: tensor(-80.4912, device='cuda:0')
episode: 87 training return: tensor(-568.6553, device='cuda:0')
epoch: 22 test_true_pfm: 3205.7613327121585 sim_pfm: -36.44169277159381
episode: 88 training return: tensor(-692.1981, device='cuda:0')
episode: 89 training return: tensor(-691.2163, device='cuda:0')
episode: 90 training return: tensor(-322.6557, device='cuda:0')
episode: 91 training return: tensor(-68.7854, device='cuda:0')
epoch: 23 test_true_pfm: 3216.1615164792415 sim_pfm: -133.21672988372544
episode: 92 training return: tensor(-636.8053, device='cuda:0')
episode: 93 training return: tensor(-69.2729, device='cuda:0')
episode: 94 training return: tensor(-612.1857, device='cuda:0')
episode: 95 training return: tensor(-73.5917, device='cuda:0')
epoch: 24 test_true_pfm: 3189.13998203383 sim_pfm: -120.61202795044908
episode: 96 training return: tensor(-611.0792, device='cuda:0')
episode: 97 training return: tensor(-254.6808, device='cuda:0')
episode: 98 training return: tensor(-615.3501, device='cuda:0')
episode: 99 training return: tensor(-593.1115, device='cuda:0')
epoch: 25 test_true_pfm: 3193.2727299828293 sim_pfm: -413.4040016513318
episode: 100 training return: tensor(-626.7695, device='cuda:0')
episode: 101 training return: tensor(-80.0311, device='cuda:0')
episode: 102 training return: tensor(-538.5121, device='cuda:0')
episode: 103 training return: tensor(-392.0263, device='cuda:0')
epoch: 26 test_true_pfm: 2976.5697790848685 sim_pfm: -79.71861093856084
episode: 104 training return: tensor(-72.6907, device='cuda:0')
episode: 105 training return: tensor(-689.2662, device='cuda:0')
episode: 106 training return: tensor(-528.9418, device='cuda:0')
episode: 107 training return: tensor(-608.7176, device='cuda:0')
epoch: 27 test_true_pfm: 3217.5171199760575 sim_pfm: -183.56031824390325
episode: 108 training return: tensor(-448.7018, device='cuda:0')
episode: 109 training return: tensor(-35.1691, device='cuda:0')
episode: 110 training return: tensor(-30.4145, device='cuda:0')
episode: 111 training return: tensor(-625.6436, device='cuda:0')
epoch: 28 test_true_pfm: 3204.398137146987 sim_pfm: -35.61570688463204
episode: 112 training return: tensor(-124.0699, device='cuda:0')
episode: 113 training return: tensor(-31.8193, device='cuda:0')
episode: 114 training return: tensor(-614.2386, device='cuda:0')
episode: 115 training return: tensor(-48.0411, device='cuda:0')
epoch: 29 test_true_pfm: 3199.512128561175 sim_pfm: -21.88392212650312
episode: 116 training return: tensor(-641.3798, device='cuda:0')
episode: 117 training return: tensor(-362.8705, device='cuda:0')
episode: 118 training return: tensor(-274.7019, device='cuda:0')
episode: 119 training return: tensor(-533.7165, device='cuda:0')
epoch: 30 test_true_pfm: 3232.3923656932516 sim_pfm: -26.597549358538043
episode: 120 training return: tensor(-26.3129, device='cuda:0')
episode: 121 training return: tensor(-385.3802, device='cuda:0')
episode: 122 training return: tensor(-49.3896, device='cuda:0')
episode: 123 training return: tensor(-45.0155, device='cuda:0')
epoch: 31 test_true_pfm: 2822.0169083327078 sim_pfm: -31.937925561049877
episode: 124 training return: tensor(-34.5200, device='cuda:0')
episode: 125 training return: tensor(-28.9861, device='cuda:0')
episode: 126 training return: tensor(-2.8075, device='cuda:0')
episode: 127 training return: tensor(-682.8292, device='cuda:0')
epoch: 32 test_true_pfm: 3208.4683261555874 sim_pfm: -58.878458952831956
episode: 128 training return: tensor(-280.7055, device='cuda:0')
episode: 129 training return: tensor(-690.0881, device='cuda:0')
episode: 130 training return: tensor(-608.8436, device='cuda:0')
episode: 131 training return: tensor(-434.4409, device='cuda:0')
epoch: 33 test_true_pfm: 3207.3206817646437 sim_pfm: -66.18517811326699
episode: 132 training return: tensor(-689.7169, device='cuda:0')
episode: 133 training return: tensor(-72.0710, device='cuda:0')
episode: 134 training return: tensor(-64.6841, device='cuda:0')
episode: 135 training return: tensor(-53.2950, device='cuda:0')
epoch: 34 test_true_pfm: 2582.8616864866085 sim_pfm: -48.05525545060906
episode: 136 training return: tensor(-555.4865, device='cuda:0')
episode: 137 training return: tensor(12.3288, device='cuda:0')
episode: 138 training return: tensor(-48.8025, device='cuda:0')
episode: 139 training return: tensor(-616.4431, device='cuda:0')
epoch: 35 test_true_pfm: 2533.7044153199595 sim_pfm: -41.315850632954
episode: 140 training return: tensor(-48.4310, device='cuda:0')
episode: 141 training return: tensor(-11.4946, device='cuda:0')
episode: 142 training return: tensor(-603.0652, device='cuda:0')
episode: 143 training return: tensor(-608.8533, device='cuda:0')
epoch: 36 test_true_pfm: 3269.675866710854 sim_pfm: -75.17690886330092
episode: 144 training return: tensor(-528.8392, device='cuda:0')
episode: 145 training return: tensor(-692.1713, device='cuda:0')
episode: 146 training return: tensor(-7.6873, device='cuda:0')
episode: 147 training return: tensor(-56.2504, device='cuda:0')
epoch: 37 test_true_pfm: 3176.3525399462646 sim_pfm: -142.20669706431605
episode: 148 training return: tensor(-613.4643, device='cuda:0')
episode: 149 training return: tensor(-440.6148, device='cuda:0')
episode: 150 training return: tensor(-614.8969, device='cuda:0')
episode: 151 training return: tensor(-437.6114, device='cuda:0')
epoch: 38 test_true_pfm: 3198.16291772625 sim_pfm: -13.059241581320142
episode: 152 training return: tensor(-283.0657, device='cuda:0')
episode: 153 training return: tensor(-611.0284, device='cuda:0')
episode: 154 training return: tensor(-26.8613, device='cuda:0')
episode: 155 training return: tensor(-694.2961, device='cuda:0')
epoch: 39 test_true_pfm: 3219.9932024058894 sim_pfm: -44.04570637050589
episode: 156 training return: tensor(-38.8999, device='cuda:0')
episode: 157 training return: tensor(10.1041, device='cuda:0')
episode: 158 training return: tensor(-57.0797, device='cuda:0')
episode: 159 training return: tensor(-25.3482, device='cuda:0')
epoch: 40 test_true_pfm: 3214.7224615944783 sim_pfm: -40.46285898147229
episode: 160 training return: tensor(-361.2793, device='cuda:0')
episode: 161 training return: tensor(-10.3633, device='cuda:0')
episode: 162 training return: tensor(-25.9265, device='cuda:0')
episode: 163 training return: tensor(-105.5137, device='cuda:0')
epoch: 41 test_true_pfm: 2799.1359384408092 sim_pfm: -45.039994943886995
episode: 164 training return: tensor(-439.4770, device='cuda:0')
episode: 165 training return: tensor(-28.9942, device='cuda:0')
episode: 166 training return: tensor(-43.3153, device='cuda:0')
episode: 167 training return: tensor(-18.8635, device='cuda:0')
epoch: 42 test_true_pfm: 3202.0057034480765 sim_pfm: -228.53165749007408
episode: 168 training return: tensor(-286.1752, device='cuda:0')
episode: 169 training return: tensor(-285.8243, device='cuda:0')
episode: 170 training return: tensor(-686.6863, device='cuda:0')
episode: 171 training return: tensor(-615.4794, device='cuda:0')
epoch: 43 test_true_pfm: 3201.124613797305 sim_pfm: -64.29135889497896
episode: 172 training return: tensor(-287.5841, device='cuda:0')
episode: 173 training return: tensor(-628.5504, device='cuda:0')
episode: 174 training return: tensor(-613.3923, device='cuda:0')
episode: 175 training return: tensor(-613.6107, device='cuda:0')
epoch: 44 test_true_pfm: 2774.419983085658 sim_pfm: -56.10168092511594
episode: 176 training return: tensor(-582.5979, device='cuda:0')
episode: 177 training return: tensor(-50.4853, device='cuda:0')
episode: 178 training return: tensor(-451.4422, device='cuda:0')
episode: 179 training return: tensor(-35.5923, device='cuda:0')
epoch: 45 test_true_pfm: 3215.658105414337 sim_pfm: -162.48191499070768
episode: 180 training return: tensor(-70.9854, device='cuda:0')
episode: 181 training return: tensor(-75.2831, device='cuda:0')
episode: 182 training return: tensor(-64.2960, device='cuda:0')
episode: 183 training return: tensor(-40.4426, device='cuda:0')
epoch: 46 test_true_pfm: 2384.2549408521163 sim_pfm: -153.29939036703823
episode: 184 training return: tensor(-9.1379, device='cuda:0')
episode: 185 training return: tensor(-605.8176, device='cuda:0')
episode: 186 training return: tensor(-43.9322, device='cuda:0')
episode: 187 training return: tensor(-697.6763, device='cuda:0')
epoch: 47 test_true_pfm: 2597.771155747901 sim_pfm: -53.447925040653594
episode: 188 training return: tensor(-648.2965, device='cuda:0')
episode: 189 training return: tensor(-612.6194, device='cuda:0')
episode: 190 training return: tensor(-610.4549, device='cuda:0')
episode: 191 training return: tensor(-24.2302, device='cuda:0')
epoch: 48 test_true_pfm: 3223.22950809642 sim_pfm: -30.155842814167652
episode: 192 training return: tensor(-37.5401, device='cuda:0')
episode: 193 training return: tensor(-608.0385, device='cuda:0')
episode: 194 training return: tensor(-608.7737, device='cuda:0')
episode: 195 training return: tensor(-194.2413, device='cuda:0')
epoch: 49 test_true_pfm: 2547.317981530167 sim_pfm: -338.30407346775365
episode: 196 training return: tensor(-209.3395, device='cuda:0')
episode: 197 training return: tensor(-25.5901, device='cuda:0')
episode: 198 training return: tensor(-610.0928, device='cuda:0')
episode: 199 training return: tensor(-35.9760, device='cuda:0')
epoch: 50 test_true_pfm: 2197.0451119331824 sim_pfm: -161.8142683932674
episode: 200 training return: tensor(-693.5150, device='cuda:0')
episode: 201 training return: tensor(-609.8412, device='cuda:0')
episode: 202 training return: tensor(34.2744, device='cuda:0')
episode: 203 training return: tensor(-44.0782, device='cuda:0')
epoch: 51 test_true_pfm: 3211.206955949441 sim_pfm: -309.64505393510143
episode: 204 training return: tensor(-688.7093, device='cuda:0')
episode: 205 training return: tensor(-616.3175, device='cuda:0')
episode: 206 training return: tensor(1.7322, device='cuda:0')
episode: 207 training return: tensor(-297.1177, device='cuda:0')
epoch: 52 test_true_pfm: 2542.8436300760536 sim_pfm: -37.133879778518654
episode: 208 training return: tensor(-86.3400, device='cuda:0')
episode: 209 training return: tensor(-453.5986, device='cuda:0')
episode: 210 training return: tensor(-45.3002, device='cuda:0')
episode: 211 training return: tensor(-612.6562, device='cuda:0')
epoch: 53 test_true_pfm: 2797.8986790253325 sim_pfm: -86.8525135901582
episode: 212 training return: tensor(-50.4416, device='cuda:0')
episode: 213 training return: tensor(-687.7035, device='cuda:0')
episode: 214 training return: tensor(-609.2032, device='cuda:0')
episode: 215 training return: tensor(-44.6967, device='cuda:0')
epoch: 54 test_true_pfm: 3066.454442530398 sim_pfm: -163.6461387964276
episode: 216 training return: tensor(-55.9467, device='cuda:0')
episode: 217 training return: tensor(-56.1061, device='cuda:0')
episode: 218 training return: tensor(-617.4219, device='cuda:0')
episode: 219 training return: tensor(-691.0413, device='cuda:0')
epoch: 55 test_true_pfm: 3210.632932412181 sim_pfm: -170.1210318688148
episode: 220 training return: tensor(-578.3035, device='cuda:0')
episode: 221 training return: tensor(-447.5487, device='cuda:0')
episode: 222 training return: tensor(-33.5930, device='cuda:0')
episode: 223 training return: tensor(-34.5538, device='cuda:0')
epoch: 56 test_true_pfm: 1184.189082779638 sim_pfm: -611.1223192419469
episode: 224 training return: tensor(-37.4820, device='cuda:0')
episode: 225 training return: tensor(-236.3336, device='cuda:0')
episode: 226 training return: tensor(-694.7834, device='cuda:0')
episode: 227 training return: tensor(-422.7869, device='cuda:0')
epoch: 57 test_true_pfm: 1499.1861487536262 sim_pfm: -114.95488411184245
episode: 228 training return: tensor(-689.9322, device='cuda:0')
episode: 229 training return: tensor(-50.2505, device='cuda:0')
episode: 230 training return: tensor(-609.6978, device='cuda:0')
episode: 231 training return: tensor(-623.1540, device='cuda:0')
epoch: 58 test_true_pfm: 3198.598203583321 sim_pfm: -40.02837837364253
episode: 232 training return: tensor(-532.8677, device='cuda:0')
episode: 233 training return: tensor(-59.7744, device='cuda:0')
episode: 234 training return: tensor(-47.7767, device='cuda:0')
episode: 235 training return: tensor(-39.0487, device='cuda:0')
epoch: 59 test_true_pfm: 3207.006042949835 sim_pfm: -156.8638740971995
episode: 236 training return: tensor(-645.9714, device='cuda:0')
episode: 237 training return: tensor(-619.7902, device='cuda:0')
episode: 238 training return: tensor(-534.1940, device='cuda:0')
episode: 239 training return: tensor(-627.8275, device='cuda:0')
epoch: 60 test_true_pfm: 3213.682008302827 sim_pfm: -61.65030327369459
episode: 240 training return: tensor(-598.9877, device='cuda:0')
episode: 241 training return: tensor(-36.3770, device='cuda:0')
episode: 242 training return: tensor(-633.9432, device='cuda:0')
episode: 243 training return: tensor(-45.3159, device='cuda:0')
epoch: 61 test_true_pfm: 3211.9264519741096 sim_pfm: -44.432808992666345
episode: 244 training return: tensor(-586.4081, device='cuda:0')
episode: 245 training return: tensor(-536.8889, device='cuda:0')
episode: 246 training return: tensor(-54.4634, device='cuda:0')
episode: 247 training return: tensor(-689.7313, device='cuda:0')
epoch: 62 test_true_pfm: 2785.8097840361693 sim_pfm: -41.37005319812064
episode: 248 training return: tensor(-89.8574, device='cuda:0')
episode: 249 training return: tensor(-570.2485, device='cuda:0')
episode: 250 training return: tensor(-281.7000, device='cuda:0')
episode: 251 training return: tensor(-634.4897, device='cuda:0')
epoch: 63 test_true_pfm: 3086.868492738314 sim_pfm: -39.9128381528717
episode: 252 training return: tensor(34.5866, device='cuda:0')
episode: 253 training return: tensor(-635.0933, device='cuda:0')
episode: 254 training return: tensor(-49.7999, device='cuda:0')
episode: 255 training return: tensor(-622.9125, device='cuda:0')
epoch: 64 test_true_pfm: 2713.758940194944 sim_pfm: -333.02426416714053
episode: 256 training return: tensor(-634.4852, device='cuda:0')
episode: 257 training return: tensor(-612.9879, device='cuda:0')
episode: 258 training return: tensor(-120.9796, device='cuda:0')
episode: 259 training return: tensor(-130.1369, device='cuda:0')
epoch: 65 test_true_pfm: 2594.777764478781 sim_pfm: -171.7179426413883
episode: 260 training return: tensor(-693.5737, device='cuda:0')
episode: 261 training return: tensor(-613.8295, device='cuda:0')
episode: 262 training return: tensor(-44.8585, device='cuda:0')
episode: 263 training return: tensor(-650.5295, device='cuda:0')
epoch: 66 test_true_pfm: 2899.0511566994414 sim_pfm: -45.18898877156122
episode: 264 training return: tensor(-683.4196, device='cuda:0')
episode: 265 training return: tensor(-532.6164, device='cuda:0')
episode: 266 training return: tensor(-457.1181, device='cuda:0')
episode: 267 training return: tensor(-599.2547, device='cuda:0')
epoch: 67 test_true_pfm: 2360.208970457963 sim_pfm: 7.992111682319471
episode: 268 training return: tensor(-689.6258, device='cuda:0')
episode: 269 training return: tensor(-685.1282, device='cuda:0')
episode: 270 training return: tensor(-689.6921, device='cuda:0')
episode: 271 training return: tensor(-603.2966, device='cuda:0')
epoch: 68 test_true_pfm: 3052.7005404916345 sim_pfm: -137.12474643215924
episode: 272 training return: tensor(-573.1588, device='cuda:0')
episode: 273 training return: tensor(-36.2908, device='cuda:0')
episode: 274 training return: tensor(-689.5029, device='cuda:0')
episode: 275 training return: tensor(-44.4300, device='cuda:0')
epoch: 69 test_true_pfm: 1914.9623586874739 sim_pfm: -244.6632217472943
episode: 276 training return: tensor(-30.7449, device='cuda:0')
episode: 277 training return: tensor(-612.9777, device='cuda:0')
episode: 278 training return: tensor(-68.7974, device='cuda:0')
episode: 279 training return: tensor(-635.4218, device='cuda:0')
epoch: 70 test_true_pfm: 3201.6205238586645 sim_pfm: -32.950940112932585
episode: 280 training return: tensor(-401.9056, device='cuda:0')
episode: 281 training return: tensor(-691.5579, device='cuda:0')
episode: 282 training return: tensor(-437.9595, device='cuda:0')
episode: 283 training return: tensor(-637.1917, device='cuda:0')
epoch: 71 test_true_pfm: 2822.7370140025855 sim_pfm: -416.34670816664584
episode: 284 training return: tensor(-58.2880, device='cuda:0')
episode: 285 training return: tensor(-690.8060, device='cuda:0')
episode: 286 training return: tensor(-66.6470, device='cuda:0')
episode: 287 training return: tensor(-609.9435, device='cuda:0')
epoch: 72 test_true_pfm: 3203.246351368707 sim_pfm: -133.39687841880368
episode: 288 training return: tensor(-651.5405, device='cuda:0')
episode: 289 training return: tensor(-28.5551, device='cuda:0')
episode: 290 training return: tensor(-437.3486, device='cuda:0')
episode: 291 training return: tensor(-361.8385, device='cuda:0')
epoch: 73 test_true_pfm: 1173.9762399292993 sim_pfm: -618.9421532325408
episode: 292 training return: tensor(-24.5769, device='cuda:0')
episode: 293 training return: tensor(-604.2713, device='cuda:0')
episode: 294 training return: tensor(-624.1376, device='cuda:0')
episode: 295 training return: tensor(-14.3046, device='cuda:0')
epoch: 74 test_true_pfm: 1396.6414313198345 sim_pfm: -95.66273767556413
episode: 296 training return: tensor(-643.2098, device='cuda:0')
episode: 297 training return: tensor(-615.3315, device='cuda:0')
episode: 298 training return: tensor(13.9344, device='cuda:0')
episode: 299 training return: tensor(-450.3925, device='cuda:0')
epoch: 75 test_true_pfm: 3207.950448394775 sim_pfm: -77.80162212539774
episode: 300 training return: tensor(-37.9382, device='cuda:0')
episode: 301 training return: tensor(-613.0938, device='cuda:0')
episode: 302 training return: tensor(-611.6876, device='cuda:0')
episode: 303 training return: tensor(-608.4684, device='cuda:0')
epoch: 76 test_true_pfm: 3251.910724875553 sim_pfm: -303.8113205988581
episode: 304 training return: tensor(-72.7149, device='cuda:0')
episode: 305 training return: tensor(-49.2162, device='cuda:0')
episode: 306 training return: tensor(-444.4600, device='cuda:0')
episode: 307 training return: tensor(-666.5133, device='cuda:0')
epoch: 77 test_true_pfm: 3223.2531673016897 sim_pfm: -47.48573510238202
episode: 308 training return: tensor(-196.9904, device='cuda:0')
episode: 309 training return: tensor(-617.5347, device='cuda:0')
episode: 310 training return: tensor(-676.1773, device='cuda:0')
episode: 311 training return: tensor(-55.8848, device='cuda:0')
epoch: 78 test_true_pfm: 3191.076238491915 sim_pfm: -72.04600843117805
episode: 312 training return: tensor(-210.1972, device='cuda:0')
episode: 313 training return: tensor(-693.1849, device='cuda:0')
episode: 314 training return: tensor(-238.0756, device='cuda:0')
episode: 315 training return: tensor(-614.0342, device='cuda:0')
epoch: 79 test_true_pfm: 2796.84077024487 sim_pfm: -114.1425586741728
episode: 316 training return: tensor(-614.6213, device='cuda:0')
episode: 317 training return: tensor(-2.9508, device='cuda:0')
episode: 318 training return: tensor(-645.8375, device='cuda:0')
episode: 319 training return: tensor(-218.9996, device='cuda:0')
epoch: 80 test_true_pfm: 3233.0498654394955 sim_pfm: -47.264454834376615
episode: 320 training return: tensor(-455.3732, device='cuda:0')
episode: 321 training return: tensor(-691.7195, device='cuda:0')
episode: 322 training return: tensor(-656.4594, device='cuda:0')
episode: 323 training return: tensor(-693.4307, device='cuda:0')
epoch: 81 test_true_pfm: 1727.6070468360247 sim_pfm: -81.56448452266825
episode: 324 training return: tensor(-46.2681, device='cuda:0')
episode: 325 training return: tensor(-635.6953, device='cuda:0')
episode: 326 training return: tensor(-613.1458, device='cuda:0')
episode: 327 training return: tensor(-21.5319, device='cuda:0')
epoch: 82 test_true_pfm: 2812.635091290964 sim_pfm: -164.7129331783702
episode: 328 training return: tensor(-20.1997, device='cuda:0')
episode: 329 training return: tensor(-43.7624, device='cuda:0')
episode: 330 training return: tensor(-33.9860, device='cuda:0')
episode: 331 training return: tensor(-64.7262, device='cuda:0')
epoch: 83 test_true_pfm: 1862.1115356553485 sim_pfm: -495.25140004310134
episode: 332 training return: tensor(-359.9213, device='cuda:0')
episode: 333 training return: tensor(-690.1333, device='cuda:0')
episode: 334 training return: tensor(-188.3121, device='cuda:0')
episode: 335 training return: tensor(-633.8774, device='cuda:0')
epoch: 84 test_true_pfm: 3209.866678715863 sim_pfm: -66.4389569236276
episode: 336 training return: tensor(-693.9044, device='cuda:0')
episode: 337 training return: tensor(-686.6621, device='cuda:0')
episode: 338 training return: tensor(-41.5010, device='cuda:0')
episode: 339 training return: tensor(-44.2517, device='cuda:0')
epoch: 85 test_true_pfm: 3091.748955120882 sim_pfm: -332.2099111642844
episode: 340 training return: tensor(-646.6459, device='cuda:0')
episode: 341 training return: tensor(-373.4990, device='cuda:0')
episode: 342 training return: tensor(-58.6101, device='cuda:0')
episode: 343 training return: tensor(-652.0921, device='cuda:0')
epoch: 86 test_true_pfm: 1243.2173655854901 sim_pfm: -383.27621185329434
episode: 344 training return: tensor(30.3921, device='cuda:0')
episode: 345 training return: tensor(-18.0671, device='cuda:0')
episode: 346 training return: tensor(-362.8109, device='cuda:0')
episode: 347 training return: tensor(-609.1715, device='cuda:0')
epoch: 87 test_true_pfm: 2598.086164687262 sim_pfm: -44.203538746883474
episode: 348 training return: tensor(-43.1290, device='cuda:0')
episode: 349 training return: tensor(-102.5419, device='cuda:0')
episode: 350 training return: tensor(-618.4194, device='cuda:0')
episode: 351 training return: tensor(-67.4510, device='cuda:0')
epoch: 88 test_true_pfm: 2897.7470939919162 sim_pfm: -27.451305578938143
episode: 352 training return: tensor(-681.0853, device='cuda:0')
episode: 353 training return: tensor(-285.9131, device='cuda:0')
episode: 354 training return: tensor(-616.1542, device='cuda:0')
episode: 355 training return: tensor(-689.9340, device='cuda:0')
epoch: 89 test_true_pfm: 1253.8555597677275 sim_pfm: -611.0748256665344
episode: 356 training return: tensor(-521.5864, device='cuda:0')
episode: 357 training return: tensor(-611.4526, device='cuda:0')
episode: 358 training return: tensor(-656.5271, device='cuda:0')
episode: 359 training return: tensor(-610.8122, device='cuda:0')
epoch: 90 test_true_pfm: 3230.0699928868366 sim_pfm: -174.91990882440587
episode: 360 training return: tensor(-657.6281, device='cuda:0')
episode: 361 training return: tensor(-24.8625, device='cuda:0')
episode: 362 training return: tensor(-71.0427, device='cuda:0')
episode: 363 training return: tensor(-271.0361, device='cuda:0')
epoch: 91 test_true_pfm: 2544.4594931034576 sim_pfm: -329.9095675617282
episode: 364 training return: tensor(-7.9100, device='cuda:0')
episode: 365 training return: tensor(-662.3939, device='cuda:0')
episode: 366 training return: tensor(-472.5174, device='cuda:0')
episode: 367 training return: tensor(-369.2877, device='cuda:0')
epoch: 92 test_true_pfm: 1355.5209445572607 sim_pfm: -606.3124895474757
episode: 368 training return: tensor(-29.3391, device='cuda:0')
episode: 369 training return: tensor(-689.8133, device='cuda:0')
episode: 370 training return: tensor(-63.8472, device='cuda:0')
episode: 371 training return: tensor(-656.6647, device='cuda:0')
epoch: 93 test_true_pfm: 1271.1935913040197 sim_pfm: -258.59724871739553
episode: 372 training return: tensor(-64.8732, device='cuda:0')
episode: 373 training return: tensor(19.2342, device='cuda:0')
episode: 374 training return: tensor(-362.0408, device='cuda:0')
episode: 375 training return: tensor(-613.3939, device='cuda:0')
epoch: 94 test_true_pfm: 1219.2874316066845 sim_pfm: -25.836938135975895
episode: 376 training return: tensor(-611.6591, device='cuda:0')
episode: 377 training return: tensor(-556.3649, device='cuda:0')
episode: 378 training return: tensor(-633.1184, device='cuda:0')
episode: 379 training return: tensor(-45.7589, device='cuda:0')
epoch: 95 test_true_pfm: 3209.7557286615724 sim_pfm: -33.25919819056677
episode: 380 training return: tensor(-42.2164, device='cuda:0')
episode: 381 training return: tensor(-640.5201, device='cuda:0')
episode: 382 training return: tensor(-453.5028, device='cuda:0')
episode: 383 training return: tensor(-60.5300, device='cuda:0')
epoch: 96 test_true_pfm: 3210.25309663376 sim_pfm: -46.64415446533045
episode: 384 training return: tensor(-619.9771, device='cuda:0')
episode: 385 training return: tensor(-615.2458, device='cuda:0')
episode: 386 training return: tensor(-688.6998, device='cuda:0')
episode: 387 training return: tensor(-97.5740, device='cuda:0')
epoch: 97 test_true_pfm: 3192.5444266339678 sim_pfm: -163.96515645142063
episode: 388 training return: tensor(-609.7044, device='cuda:0')
episode: 389 training return: tensor(-366.5534, device='cuda:0')
episode: 390 training return: tensor(-585.0302, device='cuda:0')
episode: 391 training return: tensor(-17.5561, device='cuda:0')
epoch: 98 test_true_pfm: 1743.526325299518 sim_pfm: -141.14516280053067
episode: 392 training return: tensor(-37.2974, device='cuda:0')
episode: 393 training return: tensor(-689.4796, device='cuda:0')
episode: 394 training return: tensor(-48.6173, device='cuda:0')
episode: 395 training return: tensor(-622.5542, device='cuda:0')
epoch: 99 test_true_pfm: 3204.1870200451763 sim_pfm: -90.29732788501617
episode: 396 training return: tensor(-11.2438, device='cuda:0')
episode: 397 training return: tensor(-572.5804, device='cuda:0')
episode: 398 training return: tensor(-680.8581, device='cuda:0')
episode: 399 training return: tensor(-679.5342, device='cuda:0')
epoch: 100 test_true_pfm: 2788.722833025827 sim_pfm: -56.49038536155907
episode: 400 training return: tensor(-686.1163, device='cuda:0')
episode: 401 training return: tensor(-694.2397, device='cuda:0')
episode: 402 training return: tensor(-696.1570, device='cuda:0')
episode: 403 training return: tensor(-637.0537, device='cuda:0')
epoch: 101 test_true_pfm: 1841.8349382285353 sim_pfm: -421.1689964271209
episode: 404 training return: tensor(-73.9126, device='cuda:0')
episode: 405 training return: tensor(-690.3697, device='cuda:0')
episode: 406 training return: tensor(-642.6323, device='cuda:0')
episode: 407 training return: tensor(-381.9207, device='cuda:0')
epoch: 102 test_true_pfm: 1939.5829943393953 sim_pfm: -580.3870591316178
episode: 408 training return: tensor(-608.6215, device='cuda:0')
episode: 409 training return: tensor(-132.4382, device='cuda:0')
episode: 410 training return: tensor(-393.7425, device='cuda:0')
episode: 411 training return: tensor(-610.1202, device='cuda:0')
epoch: 103 test_true_pfm: 2327.887480312886 sim_pfm: -508.25991149522207
episode: 412 training return: tensor(-691.3372, device='cuda:0')
episode: 413 training return: tensor(-694.2065, device='cuda:0')
episode: 414 training return: tensor(-615.1608, device='cuda:0')
episode: 415 training return: tensor(-384.8358, device='cuda:0')
epoch: 104 test_true_pfm: 2560.9186603373864 sim_pfm: -65.75690095604902
episode: 416 training return: tensor(-692.9553, device='cuda:0')
episode: 417 training return: tensor(-389.4582, device='cuda:0')
episode: 418 training return: tensor(-210.1185, device='cuda:0')
episode: 419 training return: tensor(-676.4948, device='cuda:0')
epoch: 105 test_true_pfm: 1441.1685458158818 sim_pfm: -540.6927808642891
episode: 420 training return: tensor(-612.4192, device='cuda:0')
episode: 421 training return: tensor(-543.4582, device='cuda:0')
episode: 422 training return: tensor(-61.9012, device='cuda:0')
episode: 423 training return: tensor(-687.1478, device='cuda:0')
epoch: 106 test_true_pfm: 2811.4672438715447 sim_pfm: -190.36990757845342
episode: 424 training return: tensor(-61.2645, device='cuda:0')
episode: 425 training return: tensor(-690.6091, device='cuda:0')
episode: 426 training return: tensor(-689.1664, device='cuda:0')
episode: 427 training return: tensor(-63.7541, device='cuda:0')
epoch: 107 test_true_pfm: 3192.9880123874404 sim_pfm: -33.63911942645791
episode: 428 training return: tensor(-689.9381, device='cuda:0')
episode: 429 training return: tensor(-79.1869, device='cuda:0')
episode: 430 training return: tensor(-678.0184, device='cuda:0')
episode: 431 training return: tensor(-214.0849, device='cuda:0')
epoch: 108 test_true_pfm: 2544.091455258113 sim_pfm: -265.8042276583922
episode: 432 training return: tensor(-38.2792, device='cuda:0')
episode: 433 training return: tensor(-524.0558, device='cuda:0')
episode: 434 training return: tensor(-112.1912, device='cuda:0')
episode: 435 training return: tensor(-614.2349, device='cuda:0')
epoch: 109 test_true_pfm: 2058.026703824806 sim_pfm: -500.14338103844784
episode: 436 training return: tensor(-285.4386, device='cuda:0')
episode: 437 training return: tensor(-571.5268, device='cuda:0')
episode: 438 training return: tensor(-687.3301, device='cuda:0')
episode: 439 training return: tensor(-611.2612, device='cuda:0')
epoch: 110 test_true_pfm: 2910.9011874490548 sim_pfm: -44.595522700323876
episode: 440 training return: tensor(-57.3874, device='cuda:0')
episode: 441 training return: tensor(-521.1996, device='cuda:0')
episode: 442 training return: tensor(-37.6937, device='cuda:0')
episode: 443 training return: tensor(-58.5554, device='cuda:0')
epoch: 111 test_true_pfm: 3192.562628187418 sim_pfm: -79.75364703543407
episode: 444 training return: tensor(-632.3970, device='cuda:0')
episode: 445 training return: tensor(-693.2277, device='cuda:0')
episode: 446 training return: tensor(-25.0956, device='cuda:0')
episode: 447 training return: tensor(-664.4954, device='cuda:0')
epoch: 112 test_true_pfm: 3185.0660742795117 sim_pfm: -40.11495825048769
episode: 448 training return: tensor(-616.9896, device='cuda:0')
episode: 449 training return: tensor(-619.2901, device='cuda:0')
episode: 450 training return: tensor(-665.9700, device='cuda:0')
episode: 451 training return: tensor(-671.4637, device='cuda:0')
epoch: 113 test_true_pfm: 2654.070577602423 sim_pfm: -453.32904161155847
episode: 452 training return: tensor(-306.2523, device='cuda:0')
episode: 453 training return: tensor(-690.1187, device='cuda:0')
episode: 454 training return: tensor(-16.4990, device='cuda:0')
episode: 455 training return: tensor(-669.2569, device='cuda:0')
epoch: 114 test_true_pfm: 1540.8460691915825 sim_pfm: -486.8808315769614
episode: 456 training return: tensor(-603.2681, device='cuda:0')
episode: 457 training return: tensor(-110.8640, device='cuda:0')
episode: 458 training return: tensor(-618.0337, device='cuda:0')
episode: 459 training return: tensor(-609.1663, device='cuda:0')
epoch: 115 test_true_pfm: 1918.0441810084994 sim_pfm: -57.53257670150682
episode: 460 training return: tensor(-614.1264, device='cuda:0')
episode: 461 training return: tensor(-55.7751, device='cuda:0')
episode: 462 training return: tensor(-688.9980, device='cuda:0')
episode: 463 training return: tensor(-615.8153, device='cuda:0')
epoch: 116 test_true_pfm: 2091.980468877945 sim_pfm: -534.4676951842945
episode: 464 training return: tensor(-618.9155, device='cuda:0')
episode: 465 training return: tensor(-615.0283, device='cuda:0')
episode: 466 training return: tensor(-53.3908, device='cuda:0')
episode: 467 training return: tensor(-694.9222, device='cuda:0')
epoch: 117 test_true_pfm: 2157.092413247687 sim_pfm: -285.9010581120189
episode: 468 training return: tensor(-607.7073, device='cuda:0')
episode: 469 training return: tensor(-368.0129, device='cuda:0')
episode: 470 training return: tensor(-617.5325, device='cuda:0')
episode: 471 training return: tensor(-675.1831, device='cuda:0')
epoch: 118 test_true_pfm: 3066.6014822794987 sim_pfm: -36.811429069416285
episode: 472 training return: tensor(-24.7243, device='cuda:0')
episode: 473 training return: tensor(-363.5439, device='cuda:0')
episode: 474 training return: tensor(-699.4559, device='cuda:0')
episode: 475 training return: tensor(-687.6553, device='cuda:0')
epoch: 119 test_true_pfm: 1763.8749889537942 sim_pfm: -437.32275556435343
episode: 476 training return: tensor(-27.9186, device='cuda:0')
episode: 477 training return: tensor(-681.5579, device='cuda:0')
episode: 478 training return: tensor(-26.4486, device='cuda:0')
episode: 479 training return: tensor(-686.9291, device='cuda:0')
epoch: 120 test_true_pfm: 2216.7545577267547 sim_pfm: -144.99646911688615
episode: 480 training return: tensor(-616.0381, device='cuda:0')
episode: 481 training return: tensor(-22.1190, device='cuda:0')
episode: 482 training return: tensor(-530.3166, device='cuda:0')
episode: 483 training return: tensor(-615.2164, device='cuda:0')
epoch: 121 test_true_pfm: 1857.0207035199765 sim_pfm: -510.10268217204913
episode: 484 training return: tensor(-611.4557, device='cuda:0')
episode: 485 training return: tensor(29.9732, device='cuda:0')
episode: 486 training return: tensor(-132.4414, device='cuda:0')
episode: 487 training return: tensor(-693.0337, device='cuda:0')
epoch: 122 test_true_pfm: 2246.704909203787 sim_pfm: -528.9128923593865
episode: 488 training return: tensor(-612.5069, device='cuda:0')
episode: 489 training return: tensor(-651.5196, device='cuda:0')
episode: 490 training return: tensor(-622.3975, device='cuda:0')
episode: 491 training return: tensor(-651.0675, device='cuda:0')
epoch: 123 test_true_pfm: 2879.657991051725 sim_pfm: -167.50235945926397
episode: 492 training return: tensor(-413.2146, device='cuda:0')
episode: 493 training return: tensor(-72.5901, device='cuda:0')
episode: 494 training return: tensor(-71.8086, device='cuda:0')
episode: 495 training return: tensor(-484.0922, device='cuda:0')
epoch: 124 test_true_pfm: 3237.387317445337 sim_pfm: -144.41152636982346
episode: 496 training return: tensor(-306.0049, device='cuda:0')
episode: 497 training return: tensor(-55.6203, device='cuda:0')
episode: 498 training return: tensor(-617.0828, device='cuda:0')
episode: 499 training return: tensor(-6.5735, device='cuda:0')
epoch: 125 test_true_pfm: 3229.5458641855694 sim_pfm: -35.02733438044864
episode: 500 training return: tensor(-679.2979, device='cuda:0')
episode: 501 training return: tensor(-567.6624, device='cuda:0')
episode: 502 training return: tensor(-683.4755, device='cuda:0')
episode: 503 training return: tensor(-696.8934, device='cuda:0')
epoch: 126 test_true_pfm: 3187.3357800833633 sim_pfm: -7.407160779674693
episode: 504 training return: tensor(-579.2588, device='cuda:0')
episode: 505 training return: tensor(-31.5258, device='cuda:0')
episode: 506 training return: tensor(-43.8573, device='cuda:0')
episode: 507 training return: tensor(-612.2527, device='cuda:0')
epoch: 127 test_true_pfm: 2857.7236575923544 sim_pfm: -39.72478851322861
episode: 508 training return: tensor(-653.0937, device='cuda:0')
episode: 509 training return: tensor(-362.4621, device='cuda:0')
episode: 510 training return: tensor(2.4440, device='cuda:0')
episode: 511 training return: tensor(-179.8146, device='cuda:0')
epoch: 128 test_true_pfm: 1170.2599916699085 sim_pfm: -617.6783426565429
episode: 512 training return: tensor(-590.0698, device='cuda:0')
episode: 513 training return: tensor(-56.7544, device='cuda:0')
episode: 514 training return: tensor(-619.0228, device='cuda:0')
episode: 515 training return: tensor(-620.7088, device='cuda:0')
epoch: 129 test_true_pfm: 2250.529021187093 sim_pfm: -165.66250441241814
episode: 516 training return: tensor(-603.3323, device='cuda:0')
episode: 517 training return: tensor(-606.0707, device='cuda:0')
episode: 518 training return: tensor(-603.0273, device='cuda:0')
episode: 519 training return: tensor(-525.3019, device='cuda:0')
epoch: 130 test_true_pfm: 2808.629015783894 sim_pfm: -150.59334059511698
episode: 520 training return: tensor(-644.9397, device='cuda:0')
episode: 521 training return: tensor(-682.7034, device='cuda:0')
episode: 522 training return: tensor(-527.8859, device='cuda:0')
episode: 523 training return: tensor(-614.8093, device='cuda:0')
epoch: 131 test_true_pfm: 3224.42370545053 sim_pfm: -57.3794657572677
episode: 524 training return: tensor(-373.1750, device='cuda:0')
episode: 525 training return: tensor(-645.1063, device='cuda:0')
episode: 526 training return: tensor(-630.2127, device='cuda:0')
episode: 527 training return: tensor(-36.1305, device='cuda:0')
epoch: 132 test_true_pfm: 3195.834892166205 sim_pfm: -61.45595525082899
episode: 528 training return: tensor(-329.8627, device='cuda:0')
episode: 529 training return: tensor(-162.0416, device='cuda:0')
episode: 530 training return: tensor(-637.9312, device='cuda:0')
episode: 531 training return: tensor(-51.5299, device='cuda:0')
epoch: 133 test_true_pfm: 1841.6104942144686 sim_pfm: -476.667538903479
episode: 532 training return: tensor(-6.1883, device='cuda:0')
episode: 533 training return: tensor(-691.0351, device='cuda:0')
episode: 534 training return: tensor(-611.9576, device='cuda:0')
episode: 535 training return: tensor(-58.4790, device='cuda:0')
epoch: 134 test_true_pfm: 3169.788527493715 sim_pfm: -62.90457590582082
episode: 536 training return: tensor(-54.3698, device='cuda:0')
episode: 537 training return: tensor(-612.4356, device='cuda:0')
episode: 538 training return: tensor(-686.6003, device='cuda:0')
episode: 539 training return: tensor(-616.1613, device='cuda:0')
epoch: 135 test_true_pfm: 1619.2165367099178 sim_pfm: -396.0408835099176
episode: 540 training return: tensor(-702.2145, device='cuda:0')
episode: 541 training return: tensor(-621.5951, device='cuda:0')
episode: 542 training return: tensor(-615.6606, device='cuda:0')
episode: 543 training return: tensor(-446.0011, device='cuda:0')
epoch: 136 test_true_pfm: 2318.717838514853 sim_pfm: -61.84105117805302
episode: 544 training return: tensor(-630.5513, device='cuda:0')
episode: 545 training return: tensor(-454.5115, device='cuda:0')
episode: 546 training return: tensor(-47.9498, device='cuda:0')
episode: 547 training return: tensor(-289.2368, device='cuda:0')
epoch: 137 test_true_pfm: 2564.897978109783 sim_pfm: -327.2283418478328
episode: 548 training return: tensor(-620.2537, device='cuda:0')
episode: 549 training return: tensor(-296.1159, device='cuda:0')
episode: 550 training return: tensor(-23.1768, device='cuda:0')
episode: 551 training return: tensor(-34.0059, device='cuda:0')
epoch: 138 test_true_pfm: 3176.6539132480125 sim_pfm: -137.162695858938
episode: 552 training return: tensor(-618.3276, device='cuda:0')
episode: 553 training return: tensor(-691.4253, device='cuda:0')
episode: 554 training return: tensor(-693.2667, device='cuda:0')
episode: 555 training return: tensor(-609.9858, device='cuda:0')
epoch: 139 test_true_pfm: 2508.043097557774 sim_pfm: -258.64388951965765
episode: 556 training return: tensor(-692.7928, device='cuda:0')
episode: 557 training return: tensor(-36.7164, device='cuda:0')
episode: 558 training return: tensor(-690.4576, device='cuda:0')
episode: 559 training return: tensor(-358.8950, device='cuda:0')
epoch: 140 test_true_pfm: 2810.4058765013965 sim_pfm: -151.3257279942724
episode: 560 training return: tensor(-604.9457, device='cuda:0')
episode: 561 training return: tensor(-90.3547, device='cuda:0')
episode: 562 training return: tensor(-42.9607, device='cuda:0')
episode: 563 training return: tensor(-623.4426, device='cuda:0')
epoch: 141 test_true_pfm: 2809.9426229680053 sim_pfm: -150.3999450027477
episode: 564 training return: tensor(-284.5049, device='cuda:0')
episode: 565 training return: tensor(-613.9268, device='cuda:0')
episode: 566 training return: tensor(-376.7270, device='cuda:0')
episode: 567 training return: tensor(-373.0228, device='cuda:0')
epoch: 142 test_true_pfm: 2580.6567259549197 sim_pfm: -31.644905665753566
episode: 568 training return: tensor(-647.2877, device='cuda:0')
episode: 569 training return: tensor(-72.6896, device='cuda:0')
episode: 570 training return: tensor(-610.1671, device='cuda:0')
episode: 571 training return: tensor(-526.3918, device='cuda:0')
epoch: 143 test_true_pfm: 3233.793776506314 sim_pfm: -300.91511781688314
episode: 572 training return: tensor(-43.8954, device='cuda:0')
episode: 573 training return: tensor(-45.6823, device='cuda:0')
episode: 574 training return: tensor(-700.0883, device='cuda:0')
episode: 575 training return: tensor(-633.9196, device='cuda:0')
epoch: 144 test_true_pfm: 1943.4436767371753 sim_pfm: -295.4467348144196
episode: 576 training return: tensor(-654.1953, device='cuda:0')
episode: 577 training return: tensor(-679.9410, device='cuda:0')
episode: 578 training return: tensor(-271.6959, device='cuda:0')
episode: 579 training return: tensor(-642.4398, device='cuda:0')
epoch: 145 test_true_pfm: 2065.4771041314425 sim_pfm: -240.73867247904613
episode: 580 training return: tensor(-606.4902, device='cuda:0')
episode: 581 training return: tensor(-610.4434, device='cuda:0')
episode: 582 training return: tensor(-661.8748, device='cuda:0')
episode: 583 training return: tensor(-691.2298, device='cuda:0')
epoch: 146 test_true_pfm: 1601.11409657837 sim_pfm: -395.815462037174
episode: 584 training return: tensor(-681.1510, device='cuda:0')
episode: 585 training return: tensor(-625.5385, device='cuda:0')
episode: 586 training return: tensor(-89.4713, device='cuda:0')
episode: 587 training return: tensor(-661.5897, device='cuda:0')
epoch: 147 test_true_pfm: 2809.1555174496593 sim_pfm: -39.38700294395676
episode: 588 training return: tensor(-42.3765, device='cuda:0')
episode: 589 training return: tensor(-599.9241, device='cuda:0')
episode: 590 training return: tensor(-634.5609, device='cuda:0')
episode: 591 training return: tensor(-689.1688, device='cuda:0')
epoch: 148 test_true_pfm: 2941.4197213788902 sim_pfm: -180.9204404221576
episode: 592 training return: tensor(-371.3028, device='cuda:0')
episode: 593 training return: tensor(-370.4651, device='cuda:0')
episode: 594 training return: tensor(-682.1118, device='cuda:0')
episode: 595 training return: tensor(-446.4426, device='cuda:0')
epoch: 149 test_true_pfm: 1351.3171622762195 sim_pfm: -322.93342052899726
episode: 596 training return: tensor(-67.5270, device='cuda:0')
episode: 597 training return: tensor(-679.2899, device='cuda:0')
episode: 598 training return: tensor(-681.3242, device='cuda:0')
episode: 599 training return: tensor(-30.9892, device='cuda:0')
epoch: 150 test_true_pfm: 1817.7053927334366 sim_pfm: -379.9571509390759
