['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '4', '--data', '3000', '--sub']
epoch: 0 training_loss 0.2498112514615059 test_loss: 0.08706756830215454
epoch: 1 training_loss 0.150416940189898 test_loss: 0.06877412796020507
epoch: 2 training_loss 0.1280891503021121 test_loss: 0.06149492263793945
epoch: 3 training_loss 0.10760257568210363 test_loss: 0.07492942810058593
epoch: 4 training_loss 0.10693666968494654 test_loss: 0.06845815181732177
epoch: 5 training_loss 0.10760706651955843 test_loss: 0.05925325751304626
epoch: 6 training_loss 0.08664593074470758 test_loss: 0.06212688684463501
epoch: 7 training_loss 0.0948140929825604 test_loss: 0.062458211183547975
epoch: 8 training_loss 0.08875101745128632 test_loss: 0.05953705310821533
epoch: 9 training_loss 0.09374561931937933 test_loss: 0.06540588736534118
epoch: 10 training_loss 0.08172647975385189 test_loss: 0.05824289321899414
epoch: 11 training_loss 0.08627104323357343 test_loss: 0.07296028137207031
epoch: 12 training_loss 0.07520115249790252 test_loss: 0.06470308899879455
epoch: 13 training_loss 0.07641951577737928 test_loss: 0.06892514824867249
epoch: 14 training_loss 0.07106820337474345 test_loss: 0.06783328652381897
epoch: 15 training_loss 0.06927464980632067 test_loss: 0.06949189901351929
epoch: 16 training_loss 0.06632330792024732 test_loss: 0.06590445637702942
epoch: 17 training_loss 0.0740579773299396 test_loss: 0.06928644180297852
epoch: 18 training_loss 0.07074521987698973 test_loss: 0.07599574327468872
epoch: 19 training_loss 0.06313196287490427 test_loss: 0.0648540735244751
epoch: 20 training_loss 0.056450454890727995 test_loss: 0.07203344106674195
epoch: 21 training_loss 0.06044045448303223 test_loss: 0.0781332790851593
epoch: 22 training_loss 0.06082931468263268 test_loss: 0.08205612897872924
epoch: 23 training_loss 0.0602275162935257 test_loss: 0.0786193549633026
epoch: 24 training_loss 0.05899663168936968 test_loss: 0.07702125310897827
epoch: 25 training_loss 0.0557810290530324 test_loss: 0.07207803130149841
epoch: 26 training_loss 0.05988916042260826 test_loss: 0.08089905977249146
epoch: 27 training_loss 0.06411126065067947 test_loss: 0.07732295989990234
epoch: 28 training_loss 0.05459989268798381 test_loss: 0.09403536915779113
epoch: 29 training_loss 0.05586075112223625 test_loss: 0.09375880360603332
epoch: 30 training_loss 0.059164195884950456 test_loss: 0.0809913456439972
epoch: 31 training_loss 0.05622703405097127 test_loss: 0.08550277948379517
epoch: 32 training_loss 0.05255241987295449 test_loss: 0.07896301746368409
epoch: 33 training_loss 0.04776475046761334 test_loss: 0.09036872386932374
epoch: 34 training_loss 0.05491174593102187 test_loss: 0.0855965256690979
epoch: 35 training_loss 0.05242248511407524 test_loss: 0.08070424795150757
epoch: 36 training_loss 0.04793320416472852 test_loss: 0.09884575605392457
epoch: 37 training_loss 0.0454350467864424 test_loss: 0.10425869226455689
epoch: 38 training_loss 0.045198620148003105 test_loss: 0.08248828053474426
epoch: 39 training_loss 0.04686129285953939 test_loss: 0.08883869051933288
epoch: 40 training_loss 0.04768740510568023 test_loss: 0.08062989115715027
epoch: 41 training_loss 0.059415039988234636 test_loss: 0.08828999400138855
epoch: 42 training_loss 0.05149810609873384 test_loss: 0.08912956118583679
epoch: 43 training_loss 0.043514597890898586 test_loss: 0.08853406310081482
epoch: 44 training_loss 0.0451079115178436 test_loss: 0.09268446564674378
epoch: 45 training_loss 0.04483726936858148 test_loss: 0.08857438564300538
epoch: 46 training_loss 0.04417666449211538 test_loss: 0.11331524848937988
epoch: 47 training_loss 0.038532156171277164 test_loss: 0.09889540076255798
epoch: 48 training_loss 0.040877676010131835 test_loss: 0.09151014685630798
epoch: 49 training_loss 0.04133180507458747 test_loss: 0.09529743790626526
epoch: 50 training_loss 0.0480461453134194 test_loss: 0.09476136565208435
epoch: 51 training_loss 0.03608188560698181 test_loss: 0.10275806188583374
epoch: 52 training_loss 0.04207681274041533 test_loss: 0.10887168645858765
epoch: 53 training_loss 0.03946766934823245 test_loss: 0.10936878919601441
epoch: 54 training_loss 0.034348062267526985 test_loss: 0.10278799533843994
epoch: 55 training_loss 0.04141297363676131 test_loss: 0.10553905963897706
epoch: 56 training_loss 0.04256917118094861 test_loss: 0.10103716850280761
epoch: 57 training_loss 0.036163908815942704 test_loss: 0.11073890924453736
epoch: 58 training_loss 0.03158098857384175 test_loss: 0.10446792840957642
epoch: 59 training_loss 0.031191508816555143 test_loss: 0.10498225688934326
epoch: 60 training_loss 0.03439294936601073 test_loss: 0.10790135860443115
epoch: 61 training_loss 0.03443419114220887 test_loss: 0.10412499904632569
epoch: 62 training_loss 0.030230658818036316 test_loss: 0.10002610683441163
epoch: 63 training_loss 0.032312147526536134 test_loss: 0.1052851915359497
epoch: 64 training_loss 0.029057857247535138 test_loss: 0.11131974458694457
epoch: 65 training_loss 0.03086113292723894 test_loss: 0.10009815692901611
epoch: 66 training_loss 0.0275543470075354 test_loss: 0.11014435291290284
epoch: 67 training_loss 0.029058603430166842 test_loss: 0.118984055519104
epoch: 68 training_loss 0.025672145900316537 test_loss: 0.11285114288330078
epoch: 69 training_loss 0.031171003947965802 test_loss: 0.12232229709625245
epoch: 70 training_loss 0.0394771958142519 test_loss: 0.10319751501083374
epoch: 71 training_loss 0.029884694195352494 test_loss: 0.10865466594696045
epoch: 72 training_loss 0.025134349633008243 test_loss: 0.1214595913887024
epoch: 73 training_loss 0.02915990974288434 test_loss: 0.12299466133117676
epoch: 74 training_loss 0.0255540857790038 test_loss: 0.11144318580627441
epoch: 75 training_loss 0.02481245081871748 test_loss: 0.10672374963760375
epoch: 76 training_loss 0.0322432149364613 test_loss: 0.1107221007347107
epoch: 77 training_loss 0.02025499091949314 test_loss: 0.11795750856399537
epoch: 78 training_loss 0.024061634307727217 test_loss: 0.11355812549591064
epoch: 79 training_loss 0.019890667863655835 test_loss: 0.11061148643493653
epoch: 80 training_loss 0.023473614437971264 test_loss: 0.11725406646728516
epoch: 81 training_loss 0.02102064961567521 test_loss: 0.12008321285247803
epoch: 82 training_loss 0.018482678472064435 test_loss: 0.12174520492553711
epoch: 83 training_loss 0.022517890378367156 test_loss: 0.12755088806152343
epoch: 84 training_loss 0.023469618840608747 test_loss: 0.11971702575683593
epoch: 85 training_loss 0.017901246252004056 test_loss: 0.1106831431388855
epoch: 86 training_loss 0.017445634086616337 test_loss: 0.12296348810195923
epoch: 87 training_loss 0.01716405963525176 test_loss: 0.12209339141845703
epoch: 88 training_loss 0.01703936479985714 test_loss: 0.12122111320495606
epoch: 89 training_loss 0.02364534626249224 test_loss: 0.12722665071487427
epoch: 90 training_loss 0.036275797807611525 test_loss: 0.11962025165557862
epoch: 91 training_loss 0.024521841714158655 test_loss: 0.13371665477752687
epoch: 92 training_loss 0.01913057828089222 test_loss: 0.12994072437286378
epoch: 93 training_loss 0.016795599008910356 test_loss: 0.12619633674621583
epoch: 94 training_loss 0.016121281016385183 test_loss: 0.1264737367630005
epoch: 95 training_loss 0.01965926254168153 test_loss: 0.12552008628845215
epoch: 96 training_loss 0.017339249760843813 test_loss: 0.12088298797607422
epoch: 97 training_loss 0.017129830876365305 test_loss: 0.12393361330032349
epoch: 98 training_loss 0.017986615386325865 test_loss: 0.13121442794799804
epoch: 99 training_loss 0.012290229776408524 test_loss: 0.13393715620040894
epoch: 100 training_loss 0.015459908521734178 test_loss: 0.13198820352554322
epoch: 101 training_loss 0.012796651150565594 test_loss: 0.13141827583312987
epoch: 102 training_loss 0.015589912314899266 test_loss: 0.13718535900115966
epoch: 103 training_loss 0.018016892266459764 test_loss: 0.1320177435874939
epoch: 104 training_loss 0.013292489803861827 test_loss: 0.13306746482849122
epoch: 105 training_loss 0.013133193531539291 test_loss: 0.14064348936080934
epoch: 106 training_loss 0.011951446065213532 test_loss: 0.13481773138046266
epoch: 107 training_loss 0.014915892260614783 test_loss: 0.13700418472290038
epoch: 108 training_loss 0.012975084014469758 test_loss: 0.14029200077056886
epoch: 109 training_loss 0.019632130034733564 test_loss: 0.12407209873199462
epoch: 110 training_loss 0.017873055171221494 test_loss: 0.1258912205696106
epoch: 111 training_loss 0.016109789586625994 test_loss: 0.13507167100906373
epoch: 112 training_loss 0.013858862381894141 test_loss: 0.13499480485916138
epoch: 113 training_loss 0.009491795008070767 test_loss: 0.1342371106147766
epoch: 114 training_loss 0.008892747508361935 test_loss: 0.14018385410308837
epoch: 115 training_loss 0.00895052042673342 test_loss: 0.1377140164375305
epoch: 116 training_loss 0.008685312056913972 test_loss: 0.14060885906219484
epoch: 117 training_loss 0.008506617601960897 test_loss: 0.14238464832305908
epoch: 118 training_loss 0.02163300906540826 test_loss: 0.15639164447784423
epoch: 119 training_loss 0.035688896172214296 test_loss: 0.12631527185440064
epoch: 120 training_loss 0.016528535806573926 test_loss: 0.13227741718292235
epoch: 121 training_loss 0.010736732138320803 test_loss: 0.1343321681022644
epoch: 122 training_loss 0.008769178667571396 test_loss: 0.13967174291610718
epoch: 123 training_loss 0.008759173157159239 test_loss: 0.1360737442970276
epoch: 124 training_loss 0.008065647531766444 test_loss: 0.13268582820892333
epoch: 125 training_loss 0.008877727440558375 test_loss: 0.13884167671203612
epoch: 126 training_loss 0.007259642824064941 test_loss: 0.13512072563171387
epoch: 127 training_loss 0.00750240724068135 test_loss: 0.13843111991882323
epoch: 128 training_loss 0.007886381396092474 test_loss: 0.15669156312942506
epoch: 129 training_loss 0.022808576670940967 test_loss: 0.13356049060821534
epoch: 130 training_loss 0.0135302478633821 test_loss: 0.14496411085128785
epoch: 131 training_loss 0.00810936206835322 test_loss: 0.14842329025268555
epoch: 132 training_loss 0.0064236098644323645 test_loss: 0.14579927921295166
epoch: 133 training_loss 0.0061920417542569335 test_loss: 0.14780724048614502
epoch: 134 training_loss 0.005252108568092808 test_loss: 0.15009050369262694
epoch: 135 training_loss 0.00606994274072349 test_loss: 0.14839736223220826
epoch: 136 training_loss 0.008767332918941974 test_loss: 0.14015938043594361
epoch: 137 training_loss 0.007522884418722242 test_loss: 0.1498050093650818
epoch: 138 training_loss 0.00697903040330857 test_loss: 0.1475509524345398
epoch: 139 training_loss 0.004763674923451617 test_loss: 0.15033940076828003
epoch: 140 training_loss 0.004808476265752688 test_loss: 0.14737207889556886
epoch: 141 training_loss 0.006059098370606079 test_loss: 0.15875087976455687
epoch: 142 training_loss 0.03584911585086956 test_loss: 0.1794296383857727
epoch: 143 training_loss 0.06724583936389536 test_loss: 0.1325627565383911
epoch: 144 training_loss 0.027902193632908166 test_loss: 0.151419997215271
epoch: 145 training_loss 0.018427428207360208 test_loss: 0.14297821521759033
epoch: 146 training_loss 0.008420234164223075 test_loss: 0.1440606713294983
epoch: 147 training_loss 0.007419863089453429 test_loss: 0.14483901262283325
epoch: 148 training_loss 0.007282085390761495 test_loss: 0.14667822122573854
epoch: 149 training_loss 0.006176537192659453 test_loss: 0.149777090549469
epoch: 0 training_loss 38.3396724319458 test_loss: 9.406262969970703
epoch: 1 training_loss 16.419904384613037 test_loss: 6.412813568115235
epoch: 2 training_loss 12.606964616775512 test_loss: 5.191888809204102
epoch: 3 training_loss 10.545159158706666 test_loss: 4.502616882324219
epoch: 4 training_loss 9.358361239433288 test_loss: 4.050009918212891
epoch: 5 training_loss 8.305122628211976 test_loss: 3.7105369567871094
epoch: 6 training_loss 7.64149269580841 test_loss: 3.4663700103759765
epoch: 7 training_loss 7.18492169380188 test_loss: 3.2508594512939455
epoch: 8 training_loss 6.8121589136123655 test_loss: 3.14050407409668
epoch: 9 training_loss 6.46342426776886 test_loss: 2.991071319580078
epoch: 10 training_loss 6.20528739452362 test_loss: 2.874827194213867
epoch: 11 training_loss 5.8548222303390505 test_loss: 2.742866325378418
epoch: 12 training_loss 5.689784688949585 test_loss: 2.6596092224121093
epoch: 13 training_loss 5.392433123588562 test_loss: 2.552789306640625
epoch: 14 training_loss 5.245455219745636 test_loss: 2.4810226440429686
epoch: 15 training_loss 5.203666348457336 test_loss: 2.422139549255371
epoch: 16 training_loss 4.949390215873718 test_loss: 2.3809337615966797
epoch: 17 training_loss 4.700072641372681 test_loss: 2.2836767196655274
epoch: 18 training_loss 4.7002289628982545 test_loss: 2.263362503051758
epoch: 19 training_loss 4.6202337002754215 test_loss: 2.212434196472168
epoch: 20 training_loss 4.337503137588501 test_loss: 2.1498991012573243
epoch: 21 training_loss 4.364500262737274 test_loss: 2.102498245239258
epoch: 22 training_loss 4.3035950231552125 test_loss: 2.066364860534668
epoch: 23 training_loss 4.1353432035446165 test_loss: 2.0483091354370115
epoch: 24 training_loss 4.070346972942352 test_loss: 2.0080101013183596
epoch: 25 training_loss 3.9418080615997315 test_loss: 1.9766290664672852
epoch: 26 training_loss 3.95470415353775 test_loss: 1.9882450103759766
epoch: 27 training_loss 3.896165978908539 test_loss: 1.9340452194213866
epoch: 28 training_loss 3.826826021671295 test_loss: 1.924093246459961
epoch: 29 training_loss 3.7356530714035032 test_loss: 1.8550447463989257
epoch: 30 training_loss 3.808286964893341 test_loss: 1.8390541076660156
epoch: 31 training_loss 3.6372445583343507 test_loss: 1.8075525283813476
epoch: 32 training_loss 3.6041093850135804 test_loss: 1.808344268798828
epoch: 33 training_loss 3.481166000366211 test_loss: 1.7994949340820312
epoch: 34 training_loss 3.4600708150863646 test_loss: 1.7596227645874023
epoch: 35 training_loss 3.457597382068634 test_loss: 1.7568038940429687
epoch: 36 training_loss 3.355997793674469 test_loss: 1.726828384399414
epoch: 37 training_loss 3.343315634727478 test_loss: 1.7184654235839845
epoch: 38 training_loss 3.3156326317787173 test_loss: 1.6742630004882812
epoch: 39 training_loss 3.2486024785041807 test_loss: 1.6580142974853516
epoch: 40 training_loss 3.278331952095032 test_loss: 1.649642562866211
epoch: 41 training_loss 3.2732598638534545 test_loss: 1.637551498413086
epoch: 42 training_loss 3.1984024477005004 test_loss: 1.6534000396728517
epoch: 43 training_loss 3.227324569225311 test_loss: 1.6199926376342773
epoch: 44 training_loss 3.081223590373993 test_loss: 1.6185667037963867
epoch: 45 training_loss 3.085131070613861 test_loss: 1.5929068565368651
epoch: 46 training_loss 3.127534921169281 test_loss: 1.6083763122558594
epoch: 47 training_loss 3.012920684814453 test_loss: 1.560600471496582
epoch: 48 training_loss 3.0061835551261904 test_loss: 1.5545568466186523
epoch: 49 training_loss 3.0055306243896482 test_loss: 1.5397406578063966
epoch: 50 training_loss 3.0142772817611694 test_loss: 1.5650973320007324
epoch: 51 training_loss 2.986750965118408 test_loss: 1.533499813079834
epoch: 52 training_loss 2.9074862504005434 test_loss: 1.5278738021850586
epoch: 53 training_loss 2.9070496487617494 test_loss: 1.5303537368774414
epoch: 54 training_loss 2.877077991962433 test_loss: 1.502035427093506
epoch: 55 training_loss 2.845722823143005 test_loss: 1.5085269927978515
epoch: 56 training_loss 2.8758451104164124 test_loss: 1.4911486625671386
epoch: 57 training_loss 2.8637283515930174 test_loss: 1.4766682624816894
epoch: 58 training_loss 2.8494921016693113 test_loss: 1.479971218109131
epoch: 59 training_loss 2.760986865758896 test_loss: 1.4716897010803223
epoch: 60 training_loss 2.768879292011261 test_loss: 1.4588136672973633
epoch: 61 training_loss 2.7807679390907287 test_loss: 1.4744063377380372
epoch: 62 training_loss 2.677191162109375 test_loss: 1.4557177543640136
epoch: 63 training_loss 2.7137095522880554 test_loss: 1.4485097885131837
epoch: 64 training_loss 2.7156726717948914 test_loss: 1.4376796722412108
epoch: 65 training_loss 2.7127025890350343 test_loss: 1.4546010971069336
epoch: 66 training_loss 2.7250795221328734 test_loss: 1.4493651390075684
epoch: 67 training_loss 2.66686558008194 test_loss: 1.425708293914795
epoch: 68 training_loss 2.6386019563674927 test_loss: 1.3990548133850098
epoch: 69 training_loss 2.5965692257881163 test_loss: 1.421565341949463
epoch: 70 training_loss 2.602897279262543 test_loss: 1.4004690170288085
epoch: 71 training_loss 2.61042151927948 test_loss: 1.4031795501708983
epoch: 72 training_loss 2.59826935172081 test_loss: 1.417930793762207
epoch: 73 training_loss 2.5765834522247313 test_loss: 1.3909239768981934
epoch: 74 training_loss 2.5676681447029113 test_loss: 1.382524585723877
epoch: 75 training_loss 2.54561608672142 test_loss: 1.4032468795776367
epoch: 76 training_loss 2.5595014119148254 test_loss: 1.3956249237060547
epoch: 77 training_loss 2.540896933078766 test_loss: 1.3816473960876465
epoch: 78 training_loss 2.479083912372589 test_loss: 1.3848522186279297
epoch: 79 training_loss 2.4842849624156953 test_loss: 1.3485273361206054
epoch: 80 training_loss 2.4847366857528685 test_loss: 1.3597187042236327
epoch: 81 training_loss 2.4726230192184446 test_loss: 1.341862678527832
epoch: 82 training_loss 2.493912298679352 test_loss: 1.3642336845397949
epoch: 83 training_loss 2.5344620037078855 test_loss: 1.3726906776428223
epoch: 84 training_loss 2.484998849630356 test_loss: 1.3577811241149902
epoch: 85 training_loss 2.449438943862915 test_loss: 1.3382534027099608
epoch: 86 training_loss 2.4909480011463163 test_loss: 1.3454929351806642
epoch: 87 training_loss 2.390518230199814 test_loss: 1.3433589935302734
epoch: 88 training_loss 2.407196364402771 test_loss: 1.3277311325073242
epoch: 89 training_loss 2.3933810818195345 test_loss: 1.3097219467163086
epoch: 90 training_loss 2.366149039268494 test_loss: 1.3380317687988281
epoch: 91 training_loss 2.368680970668793 test_loss: 1.32519588470459
epoch: 92 training_loss 2.346289933919907 test_loss: 1.3137995719909668
epoch: 93 training_loss 2.352925190925598 test_loss: 1.3091344833374023
epoch: 94 training_loss 2.3563498067855835 test_loss: 1.3188460350036622
epoch: 95 training_loss 2.305505039691925 test_loss: 1.2935660362243653
epoch: 96 training_loss 2.335005234479904 test_loss: 1.3070741653442384
epoch: 97 training_loss 2.3070830976963044 test_loss: 1.2921653747558595
epoch: 98 training_loss 2.3047949516773225 test_loss: 1.2871031761169434
epoch: 99 training_loss 2.3285583782196047 test_loss: 1.3357369422912597
epoch: 100 training_loss 2.356627881526947 test_loss: 1.2723947525024415
epoch: 101 training_loss 2.2840569007396696 test_loss: 1.2766544342041015
epoch: 102 training_loss 2.3124672973155977 test_loss: 1.262305450439453
epoch: 103 training_loss 2.2646647560596467 test_loss: 1.2750983238220215
epoch: 104 training_loss 2.263878207206726 test_loss: 1.261120319366455
epoch: 105 training_loss 2.2406199264526365 test_loss: 1.2694760322570802
epoch: 106 training_loss 2.287891391515732 test_loss: 1.2767741203308105
epoch: 107 training_loss 2.2226150822639466 test_loss: 1.2632670402526855
epoch: 108 training_loss 2.2818582606315614 test_loss: 1.2594037055969238
epoch: 109 training_loss 2.2079444897174834 test_loss: 1.2694244384765625
epoch: 110 training_loss 2.2091739654541014 test_loss: 1.277211570739746
epoch: 111 training_loss 2.2673269295692444 test_loss: 1.2653428077697755
epoch: 112 training_loss 2.2138768470287324 test_loss: 1.2757072448730469
epoch: 113 training_loss 2.1639767944812776 test_loss: 1.2665891647338867
epoch: 114 training_loss 2.2286524188518526 test_loss: 1.2600760459899902
epoch: 115 training_loss 2.2167796552181245 test_loss: 1.2818201065063477
epoch: 116 training_loss 2.1671129035949708 test_loss: 1.258989143371582
epoch: 117 training_loss 2.2204141533374786 test_loss: 1.2350759506225586
epoch: 118 training_loss 2.1906700992584227 test_loss: 1.263211154937744
epoch: 119 training_loss 2.134912749528885 test_loss: 1.2403422355651856
epoch: 120 training_loss 2.2285549414157866 test_loss: 1.238377857208252
epoch: 121 training_loss 2.177094049453735 test_loss: 1.247842025756836
epoch: 122 training_loss 2.1892271673679353 test_loss: 1.238044548034668
epoch: 123 training_loss 2.1364138436317446 test_loss: 1.223153305053711
epoch: 124 training_loss 2.125667688846588 test_loss: 1.2380227088928222
epoch: 125 training_loss 2.1243855702877044 test_loss: 1.220522975921631
epoch: 126 training_loss 2.1343026626110078 test_loss: 1.2384517669677735
epoch: 127 training_loss 2.1332183253765105 test_loss: 1.2299674034118653
epoch: 128 training_loss 2.1460555768013 test_loss: 1.2173157691955567
epoch: 129 training_loss 2.1048762571811674 test_loss: 1.2159600257873535
epoch: 130 training_loss 2.125188580751419 test_loss: 1.2160442352294922
epoch: 131 training_loss 2.122348664999008 test_loss: 1.2233418464660644
epoch: 132 training_loss 2.0909849536418914 test_loss: 1.2168414115905761
epoch: 133 training_loss 2.1216928458213804 test_loss: 1.214076614379883
epoch: 134 training_loss 2.1063738214969634 test_loss: 1.218859577178955
epoch: 135 training_loss 2.11969043135643 test_loss: 1.1938301086425782
epoch: 136 training_loss 2.099746358394623 test_loss: 1.2208738327026367
epoch: 137 training_loss 2.115284661054611 test_loss: 1.2185778617858887
epoch: 138 training_loss 2.0894367158412934 test_loss: 1.2116812705993651
epoch: 139 training_loss 2.079646739959717 test_loss: 1.195035457611084
epoch: 140 training_loss 2.0849453699588776 test_loss: 1.2268733978271484
epoch: 141 training_loss 2.067445517778397 test_loss: 1.1975250244140625
epoch: 142 training_loss 2.04834238409996 test_loss: 1.202284336090088
epoch: 143 training_loss 2.0906060671806337 test_loss: 1.1972018241882325
epoch: 144 training_loss 2.0387156903743744 test_loss: 1.1934833526611328
epoch: 145 training_loss 2.0422961831092836 test_loss: 1.1830434799194336
epoch: 146 training_loss 2.056515235900879 test_loss: 1.1831076622009278
epoch: 147 training_loss 2.0482790756225584 test_loss: 1.2013334274291991
epoch: 148 training_loss 2.0366555678844454 test_loss: 1.2076178550720216
epoch: 149 training_loss 2.046883232593536 test_loss: 1.1683032989501954
2173.220798821632
episode: 0 training return: tensor(192.1081, device='cuda:0')
episode: 1 training return: tensor(-94.2895, device='cuda:0')
episode: 2 training return: tensor(95.6227, device='cuda:0')
episode: 3 training return: tensor(69.7294, device='cuda:0')
epoch: 1 test_true_pfm: 2692.7143916873215 sim_pfm: -205.94919568337113
episode: 4 training return: tensor(127.0933, device='cuda:0')
episode: 5 training return: tensor(-62.5511, device='cuda:0')
episode: 6 training return: tensor(265.6297, device='cuda:0')
episode: 7 training return: tensor(90.9332, device='cuda:0')
epoch: 2 test_true_pfm: 2205.7547075627795 sim_pfm: -41.53976110834628
episode: 8 training return: tensor(112.4504, device='cuda:0')
episode: 9 training return: tensor(-194.3254, device='cuda:0')
episode: 10 training return: tensor(97.2003, device='cuda:0')
episode: 11 training return: tensor(-302.8716, device='cuda:0')
epoch: 3 test_true_pfm: 2682.2620604108265 sim_pfm: -250.70184395906594
episode: 12 training return: tensor(79.0410, device='cuda:0')
episode: 13 training return: tensor(-393.6639, device='cuda:0')
episode: 14 training return: tensor(-324.7214, device='cuda:0')
episode: 15 training return: tensor(-129.1668, device='cuda:0')
epoch: 4 test_true_pfm: 2552.832619436505 sim_pfm: -210.45629903076528
episode: 16 training return: tensor(-319.7794, device='cuda:0')
episode: 17 training return: tensor(-295.6910, device='cuda:0')
episode: 18 training return: tensor(67.9375, device='cuda:0')
episode: 19 training return: tensor(139.0071, device='cuda:0')
epoch: 5 test_true_pfm: 3119.9577111965314 sim_pfm: -73.7979329219088
episode: 20 training return: tensor(-365.5263, device='cuda:0')
episode: 21 training return: tensor(-411.5570, device='cuda:0')
episode: 22 training return: tensor(195.5653, device='cuda:0')
episode: 23 training return: tensor(102.8046, device='cuda:0')
epoch: 6 test_true_pfm: 3324.0166700458944 sim_pfm: -134.1810413269947
episode: 24 training return: tensor(-141.3088, device='cuda:0')
episode: 25 training return: tensor(105.8011, device='cuda:0')
episode: 26 training return: tensor(-351.2308, device='cuda:0')
episode: 27 training return: tensor(215.5605, device='cuda:0')
epoch: 7 test_true_pfm: 2617.4407519898355 sim_pfm: 222.57318679113328
episode: 28 training return: tensor(-364.6704, device='cuda:0')
episode: 29 training return: tensor(176.3114, device='cuda:0')
episode: 30 training return: tensor(175.3884, device='cuda:0')
episode: 31 training return: tensor(249.2702, device='cuda:0')
epoch: 8 test_true_pfm: 2543.071467938011 sim_pfm: 138.46473105290593
episode: 32 training return: tensor(142.9930, device='cuda:0')
episode: 33 training return: tensor(73.6267, device='cuda:0')
episode: 34 training return: tensor(-340.5858, device='cuda:0')
episode: 35 training return: tensor(14.3118, device='cuda:0')
epoch: 9 test_true_pfm: 3479.8919655622376 sim_pfm: 224.4919539540618
episode: 36 training return: tensor(-208.8828, device='cuda:0')
episode: 37 training return: tensor(-317.5204, device='cuda:0')
episode: 38 training return: tensor(-374.7957, device='cuda:0')
episode: 39 training return: tensor(-370.3831, device='cuda:0')
epoch: 10 test_true_pfm: 1752.6859753118836 sim_pfm: 20.707091355851542
episode: 40 training return: tensor(221.1199, device='cuda:0')
episode: 41 training return: tensor(-243.8890, device='cuda:0')
episode: 42 training return: tensor(116.7155, device='cuda:0')
episode: 43 training return: tensor(-454.0712, device='cuda:0')
epoch: 11 test_true_pfm: 2371.242309604476 sim_pfm: 165.9395101450597
episode: 44 training return: tensor(-259.5699, device='cuda:0')
episode: 45 training return: tensor(-385.3665, device='cuda:0')
episode: 46 training return: tensor(-478.9948, device='cuda:0')
episode: 47 training return: tensor(215.2025, device='cuda:0')
epoch: 12 test_true_pfm: 2173.6644362884276 sim_pfm: 127.94167347093268
episode: 48 training return: tensor(199.0998, device='cuda:0')
episode: 49 training return: tensor(207.8876, device='cuda:0')
episode: 50 training return: tensor(-424.9195, device='cuda:0')
episode: 51 training return: tensor(175.7049, device='cuda:0')
epoch: 13 test_true_pfm: 2369.4842862349847 sim_pfm: -423.08435958662693
episode: 52 training return: tensor(-410.2824, device='cuda:0')
episode: 53 training return: tensor(-497.7502, device='cuda:0')
episode: 54 training return: tensor(100.7682, device='cuda:0')
episode: 55 training return: tensor(162.2674, device='cuda:0')
epoch: 14 test_true_pfm: 2246.151005687762 sim_pfm: -21.51576715358533
episode: 56 training return: tensor(-418.1583, device='cuda:0')
episode: 57 training return: tensor(-517.3425, device='cuda:0')
episode: 58 training return: tensor(218.3732, device='cuda:0')
episode: 59 training return: tensor(1.5423, device='cuda:0')
epoch: 15 test_true_pfm: 2505.9751281514814 sim_pfm: -85.09656625574765
episode: 60 training return: tensor(134.3227, device='cuda:0')
episode: 61 training return: tensor(213.8597, device='cuda:0')
episode: 62 training return: tensor(221.9738, device='cuda:0')
episode: 63 training return: tensor(-217.3454, device='cuda:0')
epoch: 16 test_true_pfm: 2242.5729906220618 sim_pfm: -0.14028037211392075
episode: 64 training return: tensor(163.3505, device='cuda:0')
episode: 65 training return: tensor(-59.1260, device='cuda:0')
episode: 66 training return: tensor(-258.9005, device='cuda:0')
episode: 67 training return: tensor(149.1579, device='cuda:0')
epoch: 17 test_true_pfm: 2193.354950815296 sim_pfm: -48.335558602508776
episode: 68 training return: tensor(218.7554, device='cuda:0')
episode: 69 training return: tensor(199.6866, device='cuda:0')
episode: 70 training return: tensor(-295.8708, device='cuda:0')
episode: 71 training return: tensor(-421.0662, device='cuda:0')
epoch: 18 test_true_pfm: 2992.2159098113657 sim_pfm: -76.28458676137961
episode: 72 training return: tensor(-414.7090, device='cuda:0')
episode: 73 training return: tensor(-259.9263, device='cuda:0')
episode: 74 training return: tensor(-357.8143, device='cuda:0')
episode: 75 training return: tensor(-418.1061, device='cuda:0')
epoch: 19 test_true_pfm: 2826.9889454298695 sim_pfm: -66.26416427486886
episode: 76 training return: tensor(34.2701, device='cuda:0')
episode: 77 training return: tensor(149.5409, device='cuda:0')
episode: 78 training return: tensor(185.1429, device='cuda:0')
episode: 79 training return: tensor(-106.3916, device='cuda:0')
epoch: 20 test_true_pfm: 2390.0166886530037 sim_pfm: -361.820140677155
episode: 80 training return: tensor(-463.0223, device='cuda:0')
episode: 81 training return: tensor(284.0987, device='cuda:0')
episode: 82 training return: tensor(-364.5714, device='cuda:0')
episode: 83 training return: tensor(278.9133, device='cuda:0')
epoch: 21 test_true_pfm: 3145.448963854453 sim_pfm: -6.302257324064461
episode: 84 training return: tensor(-203.4715, device='cuda:0')
episode: 85 training return: tensor(222.0972, device='cuda:0')
episode: 86 training return: tensor(229.5659, device='cuda:0')
episode: 87 training return: tensor(-264.9056, device='cuda:0')
epoch: 22 test_true_pfm: 2789.741852851363 sim_pfm: 13.347663027408998
episode: 88 training return: tensor(272.8053, device='cuda:0')
episode: 89 training return: tensor(-265.1627, device='cuda:0')
episode: 90 training return: tensor(97.5498, device='cuda:0')
episode: 91 training return: tensor(-140.6570, device='cuda:0')
epoch: 23 test_true_pfm: 3412.4044063119986 sim_pfm: -122.23623981055182
episode: 92 training return: tensor(228.7827, device='cuda:0')
episode: 93 training return: tensor(121.0613, device='cuda:0')
episode: 94 training return: tensor(93.4457, device='cuda:0')
episode: 95 training return: tensor(243.1741, device='cuda:0')
epoch: 24 test_true_pfm: 3402.867897845508 sim_pfm: 244.37135119140535
episode: 96 training return: tensor(165.3604, device='cuda:0')
episode: 97 training return: tensor(172.8852, device='cuda:0')
episode: 98 training return: tensor(234.0010, device='cuda:0')
episode: 99 training return: tensor(-252.9390, device='cuda:0')
epoch: 25 test_true_pfm: 3479.870378951006 sim_pfm: 285.82947561724967
episode: 100 training return: tensor(275.7967, device='cuda:0')
episode: 101 training return: tensor(350.6578, device='cuda:0')
episode: 102 training return: tensor(-413.1396, device='cuda:0')
episode: 103 training return: tensor(-336.7313, device='cuda:0')
epoch: 26 test_true_pfm: 3444.982461648864 sim_pfm: 40.720563200011384
episode: 104 training return: tensor(131.6581, device='cuda:0')
episode: 105 training return: tensor(278.9312, device='cuda:0')
episode: 106 training return: tensor(213.2109, device='cuda:0')
episode: 107 training return: tensor(-237.7385, device='cuda:0')
epoch: 27 test_true_pfm: 2944.1976425395783 sim_pfm: 286.98169702260446
episode: 108 training return: tensor(278.0105, device='cuda:0')
episode: 109 training return: tensor(304.9741, device='cuda:0')
episode: 110 training return: tensor(338.8097, device='cuda:0')
episode: 111 training return: tensor(-21.3164, device='cuda:0')
epoch: 28 test_true_pfm: 3518.5062671848996 sim_pfm: 241.92588527073772
episode: 112 training return: tensor(-384.4151, device='cuda:0')
episode: 113 training return: tensor(241.2499, device='cuda:0')
episode: 114 training return: tensor(290.8272, device='cuda:0')
episode: 115 training return: tensor(136.0312, device='cuda:0')
epoch: 29 test_true_pfm: 2923.159677281638 sim_pfm: 75.73004110553302
episode: 116 training return: tensor(226.8364, device='cuda:0')
episode: 117 training return: tensor(228.5891, device='cuda:0')
episode: 118 training return: tensor(-278.0548, device='cuda:0')
episode: 119 training return: tensor(265.3530, device='cuda:0')
epoch: 30 test_true_pfm: 2963.8332059781637 sim_pfm: 223.99164781173263
episode: 120 training return: tensor(242.2707, device='cuda:0')
episode: 121 training return: tensor(220.1868, device='cuda:0')
episode: 122 training return: tensor(181.0886, device='cuda:0')
episode: 123 training return: tensor(-234.9426, device='cuda:0')
epoch: 31 test_true_pfm: 3050.6324763400694 sim_pfm: 163.55317584144845
episode: 124 training return: tensor(10.8895, device='cuda:0')
episode: 125 training return: tensor(188.3958, device='cuda:0')
episode: 126 training return: tensor(232.3474, device='cuda:0')
episode: 127 training return: tensor(230.4445, device='cuda:0')
epoch: 32 test_true_pfm: 3434.305397848275 sim_pfm: 93.59110739363435
episode: 128 training return: tensor(234.6966, device='cuda:0')
episode: 129 training return: tensor(190.8960, device='cuda:0')
episode: 130 training return: tensor(-102.3678, device='cuda:0')
episode: 131 training return: tensor(219.4124, device='cuda:0')
epoch: 33 test_true_pfm: 3054.915968449534 sim_pfm: 2.622101800128197
episode: 132 training return: tensor(30.1257, device='cuda:0')
episode: 133 training return: tensor(147.5695, device='cuda:0')
episode: 134 training return: tensor(222.0872, device='cuda:0')
episode: 135 training return: tensor(165.4509, device='cuda:0')
epoch: 34 test_true_pfm: 3027.8403954071996 sim_pfm: 61.6349283363088
episode: 136 training return: tensor(-381.3794, device='cuda:0')
episode: 137 training return: tensor(332.5000, device='cuda:0')
episode: 138 training return: tensor(-316.7834, device='cuda:0')
episode: 139 training return: tensor(245.8725, device='cuda:0')
epoch: 35 test_true_pfm: 2630.0163982742774 sim_pfm: 62.894881685380824
episode: 140 training return: tensor(196.0220, device='cuda:0')
episode: 141 training return: tensor(292.9742, device='cuda:0')
episode: 142 training return: tensor(316.5710, device='cuda:0')
episode: 143 training return: tensor(-394.4611, device='cuda:0')
epoch: 36 test_true_pfm: 2518.5157618815642 sim_pfm: -66.80518433288671
episode: 144 training return: tensor(135.0695, device='cuda:0')
episode: 145 training return: tensor(226.1953, device='cuda:0')
episode: 146 training return: tensor(225.4610, device='cuda:0')
episode: 147 training return: tensor(275.4192, device='cuda:0')
epoch: 37 test_true_pfm: 2937.2144941954707 sim_pfm: 218.3652382651732
episode: 148 training return: tensor(440.7536, device='cuda:0')
episode: 149 training return: tensor(247.5101, device='cuda:0')
episode: 150 training return: tensor(147.8779, device='cuda:0')
episode: 151 training return: tensor(-229.1360, device='cuda:0')
epoch: 38 test_true_pfm: 3488.717988410348 sim_pfm: 314.14568082375143
episode: 152 training return: tensor(-68.9053, device='cuda:0')
episode: 153 training return: tensor(21.7844, device='cuda:0')
episode: 154 training return: tensor(-297.9365, device='cuda:0')
episode: 155 training return: tensor(245.8568, device='cuda:0')
epoch: 39 test_true_pfm: 3452.7828457464734 sim_pfm: 34.63266206641371
episode: 156 training return: tensor(124.1996, device='cuda:0')
episode: 157 training return: tensor(335.2829, device='cuda:0')
episode: 158 training return: tensor(141.8023, device='cuda:0')
episode: 159 training return: tensor(-310.9691, device='cuda:0')
epoch: 40 test_true_pfm: 3497.479398053785 sim_pfm: 301.49194341531256
episode: 160 training return: tensor(343.0442, device='cuda:0')
episode: 161 training return: tensor(261.9005, device='cuda:0')
episode: 162 training return: tensor(69.6380, device='cuda:0')
episode: 163 training return: tensor(222.9452, device='cuda:0')
epoch: 41 test_true_pfm: 3476.574982587273 sim_pfm: 172.6412398010822
episode: 164 training return: tensor(64.3068, device='cuda:0')
episode: 165 training return: tensor(-209.5435, device='cuda:0')
episode: 166 training return: tensor(-198.2657, device='cuda:0')
episode: 167 training return: tensor(-269.6830, device='cuda:0')
epoch: 42 test_true_pfm: 2429.2228973400374 sim_pfm: 76.735641225018
episode: 168 training return: tensor(178.2273, device='cuda:0')
episode: 169 training return: tensor(-284.5230, device='cuda:0')
episode: 170 training return: tensor(-237.5493, device='cuda:0')
episode: 171 training return: tensor(-170.1691, device='cuda:0')
epoch: 43 test_true_pfm: 2870.881624388117 sim_pfm: -40.60737384782018
episode: 172 training return: tensor(-482.0492, device='cuda:0')
episode: 173 training return: tensor(224.8271, device='cuda:0')
episode: 174 training return: tensor(-327.4777, device='cuda:0')
episode: 175 training return: tensor(276.7433, device='cuda:0')
epoch: 44 test_true_pfm: 2585.7851522126903 sim_pfm: 116.10625718049899
episode: 176 training return: tensor(-18.3081, device='cuda:0')
episode: 177 training return: tensor(-370.2541, device='cuda:0')
episode: 178 training return: tensor(103.2471, device='cuda:0')
episode: 179 training return: tensor(233.6218, device='cuda:0')
epoch: 45 test_true_pfm: 2673.4817072756578 sim_pfm: 249.3186700475053
episode: 180 training return: tensor(101.8570, device='cuda:0')
episode: 181 training return: tensor(-107.3362, device='cuda:0')
episode: 182 training return: tensor(245.3480, device='cuda:0')
episode: 183 training return: tensor(232.9182, device='cuda:0')
epoch: 46 test_true_pfm: 3477.0165715103053 sim_pfm: 122.66437710914761
episode: 184 training return: tensor(137.5802, device='cuda:0')
episode: 185 training return: tensor(238.9030, device='cuda:0')
episode: 186 training return: tensor(223.7555, device='cuda:0')
episode: 187 training return: tensor(199.3322, device='cuda:0')
epoch: 47 test_true_pfm: 3507.544033285212 sim_pfm: 303.33797650029493
episode: 188 training return: tensor(-316.2542, device='cuda:0')
episode: 189 training return: tensor(-277.1042, device='cuda:0')
episode: 190 training return: tensor(86.1621, device='cuda:0')
episode: 191 training return: tensor(248.7489, device='cuda:0')
epoch: 48 test_true_pfm: 2973.2674822453523 sim_pfm: -5.185588312917389
episode: 192 training return: tensor(-116.2002, device='cuda:0')
episode: 193 training return: tensor(252.9864, device='cuda:0')
episode: 194 training return: tensor(204.1136, device='cuda:0')
episode: 195 training return: tensor(134.8381, device='cuda:0')
epoch: 49 test_true_pfm: 3447.193942510877 sim_pfm: 302.70364087862737
episode: 196 training return: tensor(-162.9662, device='cuda:0')
episode: 197 training return: tensor(88.0289, device='cuda:0')
episode: 198 training return: tensor(-162.5179, device='cuda:0')
episode: 199 training return: tensor(-209.6405, device='cuda:0')
epoch: 50 test_true_pfm: 2844.5057844479015 sim_pfm: 84.38008224836085
episode: 200 training return: tensor(171.4909, device='cuda:0')
episode: 201 training return: tensor(-317.9758, device='cuda:0')
episode: 202 training return: tensor(141.1542, device='cuda:0')
episode: 203 training return: tensor(330.0852, device='cuda:0')
epoch: 51 test_true_pfm: 3470.593693344967 sim_pfm: 233.00160151151553
episode: 204 training return: tensor(240.7502, device='cuda:0')
episode: 205 training return: tensor(142.6379, device='cuda:0')
episode: 206 training return: tensor(117.2054, device='cuda:0')
episode: 207 training return: tensor(166.1105, device='cuda:0')
epoch: 52 test_true_pfm: 3452.1785611211467 sim_pfm: 295.06704707250657
episode: 208 training return: tensor(1.4207, device='cuda:0')
episode: 209 training return: tensor(142.4102, device='cuda:0')
episode: 210 training return: tensor(-89.6466, device='cuda:0')
episode: 211 training return: tensor(156.1068, device='cuda:0')
epoch: 53 test_true_pfm: 3489.809199944633 sim_pfm: 311.7875567598385
episode: 212 training return: tensor(-360.9485, device='cuda:0')
episode: 213 training return: tensor(217.7510, device='cuda:0')
episode: 214 training return: tensor(176.3245, device='cuda:0')
episode: 215 training return: tensor(277.1803, device='cuda:0')
epoch: 54 test_true_pfm: 2399.504727227349 sim_pfm: 253.4640848362275
episode: 216 training return: tensor(302.5244, device='cuda:0')
episode: 217 training return: tensor(-242.1970, device='cuda:0')
episode: 218 training return: tensor(127.7930, device='cuda:0')
episode: 219 training return: tensor(61.4176, device='cuda:0')
epoch: 55 test_true_pfm: 3428.887042645778 sim_pfm: 236.7716981920142
episode: 220 training return: tensor(-126.9237, device='cuda:0')
episode: 221 training return: tensor(361.5461, device='cuda:0')
episode: 222 training return: tensor(251.3513, device='cuda:0')
episode: 223 training return: tensor(-329.9442, device='cuda:0')
epoch: 56 test_true_pfm: 3523.1760339660837 sim_pfm: 106.52341279967611
episode: 224 training return: tensor(277.0764, device='cuda:0')
episode: 225 training return: tensor(256.3857, device='cuda:0')
episode: 226 training return: tensor(-641.6223, device='cuda:0')
episode: 227 training return: tensor(223.9966, device='cuda:0')
epoch: 57 test_true_pfm: 2456.167154374211 sim_pfm: 69.93128288226823
episode: 228 training return: tensor(-125.6218, device='cuda:0')
episode: 229 training return: tensor(336.3397, device='cuda:0')
episode: 230 training return: tensor(236.8370, device='cuda:0')
episode: 231 training return: tensor(42.9957, device='cuda:0')
epoch: 58 test_true_pfm: 3516.9137247346866 sim_pfm: 276.0311680624921
episode: 232 training return: tensor(-357.1132, device='cuda:0')
episode: 233 training return: tensor(232.8810, device='cuda:0')
episode: 234 training return: tensor(313.6628, device='cuda:0')
episode: 235 training return: tensor(267.0639, device='cuda:0')
epoch: 59 test_true_pfm: 3434.2878800833932 sim_pfm: 260.23705949825427
episode: 236 training return: tensor(-206.7789, device='cuda:0')
episode: 237 training return: tensor(71.5600, device='cuda:0')
episode: 238 training return: tensor(227.2760, device='cuda:0')
episode: 239 training return: tensor(240.4898, device='cuda:0')
epoch: 60 test_true_pfm: 3464.892788220535 sim_pfm: 250.7099913606944
episode: 240 training return: tensor(259.5029, device='cuda:0')
episode: 241 training return: tensor(224.0182, device='cuda:0')
episode: 242 training return: tensor(68.9326, device='cuda:0')
episode: 243 training return: tensor(177.3151, device='cuda:0')
epoch: 61 test_true_pfm: 3450.7628204320777 sim_pfm: -68.52641436252937
episode: 244 training return: tensor(296.8576, device='cuda:0')
episode: 245 training return: tensor(199.4483, device='cuda:0')
episode: 246 training return: tensor(-215.5723, device='cuda:0')
episode: 247 training return: tensor(317.0435, device='cuda:0')
epoch: 62 test_true_pfm: 3520.7847254586864 sim_pfm: 271.9532190714672
episode: 248 training return: tensor(256.4044, device='cuda:0')
episode: 249 training return: tensor(394.8329, device='cuda:0')
episode: 250 training return: tensor(131.1063, device='cuda:0')
episode: 251 training return: tensor(272.0515, device='cuda:0')
epoch: 63 test_true_pfm: 3451.3799407293086 sim_pfm: 247.626849760185
episode: 252 training return: tensor(-236.5084, device='cuda:0')
episode: 253 training return: tensor(296.4227, device='cuda:0')
episode: 254 training return: tensor(267.0677, device='cuda:0')
episode: 255 training return: tensor(229.0826, device='cuda:0')
epoch: 64 test_true_pfm: 3451.7362675904556 sim_pfm: 237.11404838358672
episode: 256 training return: tensor(256.1139, device='cuda:0')
episode: 257 training return: tensor(174.1528, device='cuda:0')
episode: 258 training return: tensor(304.1628, device='cuda:0')
episode: 259 training return: tensor(194.7392, device='cuda:0')
epoch: 65 test_true_pfm: 3479.114722096754 sim_pfm: 286.96349087768857
episode: 260 training return: tensor(247.2394, device='cuda:0')
episode: 261 training return: tensor(178.7387, device='cuda:0')
episode: 262 training return: tensor(247.6570, device='cuda:0')
episode: 263 training return: tensor(57.0992, device='cuda:0')
epoch: 66 test_true_pfm: 3480.841117730269 sim_pfm: 276.07155599885544
episode: 264 training return: tensor(255.6771, device='cuda:0')
episode: 265 training return: tensor(158.6054, device='cuda:0')
episode: 266 training return: tensor(197.9996, device='cuda:0')
episode: 267 training return: tensor(130.0943, device='cuda:0')
epoch: 67 test_true_pfm: 2985.8097240095735 sim_pfm: 354.3848232534171
episode: 268 training return: tensor(212.3026, device='cuda:0')
episode: 269 training return: tensor(185.5843, device='cuda:0')
episode: 270 training return: tensor(271.6656, device='cuda:0')
episode: 271 training return: tensor(186.1499, device='cuda:0')
epoch: 68 test_true_pfm: 3493.9568792502046 sim_pfm: 247.87965064890645
episode: 272 training return: tensor(218.0600, device='cuda:0')
episode: 273 training return: tensor(343.5084, device='cuda:0')
episode: 274 training return: tensor(3.7282, device='cuda:0')
episode: 275 training return: tensor(297.9395, device='cuda:0')
epoch: 69 test_true_pfm: 3465.6068993077533 sim_pfm: 277.10043297685723
episode: 276 training return: tensor(285.7627, device='cuda:0')
episode: 277 training return: tensor(334.5345, device='cuda:0')
episode: 278 training return: tensor(224.5331, device='cuda:0')
episode: 279 training return: tensor(243.4820, device='cuda:0')
epoch: 70 test_true_pfm: 3465.8503573446737 sim_pfm: 321.15160162505344
episode: 280 training return: tensor(169.9850, device='cuda:0')
episode: 281 training return: tensor(-180.2680, device='cuda:0')
episode: 282 training return: tensor(230.8354, device='cuda:0')
episode: 283 training return: tensor(278.6543, device='cuda:0')
epoch: 71 test_true_pfm: 3513.405856952337 sim_pfm: 314.07685157879797
episode: 284 training return: tensor(393.0020, device='cuda:0')
episode: 285 training return: tensor(193.4824, device='cuda:0')
episode: 286 training return: tensor(303.9875, device='cuda:0')
episode: 287 training return: tensor(217.6859, device='cuda:0')
epoch: 72 test_true_pfm: 3548.7137815811566 sim_pfm: 301.6592048161062
episode: 288 training return: tensor(249.4968, device='cuda:0')
episode: 289 training return: tensor(217.6399, device='cuda:0')
episode: 290 training return: tensor(212.4526, device='cuda:0')
episode: 291 training return: tensor(274.5234, device='cuda:0')
epoch: 73 test_true_pfm: 3402.9618427573464 sim_pfm: 167.90126986196265
episode: 292 training return: tensor(150.3066, device='cuda:0')
episode: 293 training return: tensor(315.3506, device='cuda:0')
episode: 294 training return: tensor(234.0462, device='cuda:0')
episode: 295 training return: tensor(267.7511, device='cuda:0')
epoch: 74 test_true_pfm: 3526.772479492864 sim_pfm: 293.7171016850237
episode: 296 training return: tensor(347.5068, device='cuda:0')
episode: 297 training return: tensor(192.5270, device='cuda:0')
episode: 298 training return: tensor(212.9269, device='cuda:0')
episode: 299 training return: tensor(263.6465, device='cuda:0')
epoch: 75 test_true_pfm: 3473.6649616839773 sim_pfm: 282.2893321477847
episode: 300 training return: tensor(328.1296, device='cuda:0')
episode: 301 training return: tensor(-285.1066, device='cuda:0')
episode: 302 training return: tensor(299.9281, device='cuda:0')
episode: 303 training return: tensor(256.1863, device='cuda:0')
epoch: 76 test_true_pfm: 2988.08959599869 sim_pfm: 299.98045745371684
episode: 304 training return: tensor(267.7069, device='cuda:0')
episode: 305 training return: tensor(265.8971, device='cuda:0')
episode: 306 training return: tensor(209.5992, device='cuda:0')
episode: 307 training return: tensor(300.6714, device='cuda:0')
epoch: 77 test_true_pfm: 3501.2764995297607 sim_pfm: 143.68786404762068
episode: 308 training return: tensor(209.6819, device='cuda:0')
episode: 309 training return: tensor(339.6860, device='cuda:0')
episode: 310 training return: tensor(204.5704, device='cuda:0')
episode: 311 training return: tensor(288.9501, device='cuda:0')
epoch: 78 test_true_pfm: 3462.581141844555 sim_pfm: 339.3184224445528
episode: 312 training return: tensor(-209.1744, device='cuda:0')
episode: 313 training return: tensor(299.5714, device='cuda:0')
episode: 314 training return: tensor(219.8257, device='cuda:0')
episode: 315 training return: tensor(295.3920, device='cuda:0')
epoch: 79 test_true_pfm: 3564.5185199916227 sim_pfm: 334.445557927024
episode: 316 training return: tensor(263.6969, device='cuda:0')
episode: 317 training return: tensor(349.0089, device='cuda:0')
episode: 318 training return: tensor(168.0091, device='cuda:0')
episode: 319 training return: tensor(149.0163, device='cuda:0')
epoch: 80 test_true_pfm: 3433.4751392198327 sim_pfm: 239.26130972365112
episode: 320 training return: tensor(256.0138, device='cuda:0')
episode: 321 training return: tensor(-188.6499, device='cuda:0')
episode: 322 training return: tensor(243.2134, device='cuda:0')
episode: 323 training return: tensor(194.5271, device='cuda:0')
epoch: 81 test_true_pfm: 3529.3107414860588 sim_pfm: 365.8106925614023
episode: 324 training return: tensor(308.9397, device='cuda:0')
episode: 325 training return: tensor(186.5778, device='cuda:0')
episode: 326 training return: tensor(323.3719, device='cuda:0')
episode: 327 training return: tensor(268.4938, device='cuda:0')
epoch: 82 test_true_pfm: 3521.248993303017 sim_pfm: 342.2396223323303
episode: 328 training return: tensor(308.3174, device='cuda:0')
episode: 329 training return: tensor(-279.0400, device='cuda:0')
episode: 330 training return: tensor(232.7390, device='cuda:0')
episode: 331 training return: tensor(161.7657, device='cuda:0')
epoch: 83 test_true_pfm: 3529.3628933167624 sim_pfm: 300.58725382753374
episode: 332 training return: tensor(287.9604, device='cuda:0')
episode: 333 training return: tensor(212.3603, device='cuda:0')
episode: 334 training return: tensor(341.0659, device='cuda:0')
episode: 335 training return: tensor(-25.9755, device='cuda:0')
epoch: 84 test_true_pfm: 3157.676391592279 sim_pfm: 350.3246255678726
episode: 336 training return: tensor(342.5811, device='cuda:0')
episode: 337 training return: tensor(181.0232, device='cuda:0')
episode: 338 training return: tensor(205.0046, device='cuda:0')
episode: 339 training return: tensor(276.3594, device='cuda:0')
epoch: 85 test_true_pfm: 3513.998012590137 sim_pfm: 374.218609167651
episode: 340 training return: tensor(274.2075, device='cuda:0')
episode: 341 training return: tensor(327.9793, device='cuda:0')
episode: 342 training return: tensor(283.1357, device='cuda:0')
episode: 343 training return: tensor(-217.9792, device='cuda:0')
epoch: 86 test_true_pfm: 3529.482854572309 sim_pfm: 349.68783702716854
episode: 344 training return: tensor(381.8892, device='cuda:0')
episode: 345 training return: tensor(278.2569, device='cuda:0')
episode: 346 training return: tensor(330.9783, device='cuda:0')
episode: 347 training return: tensor(298.5933, device='cuda:0')
epoch: 87 test_true_pfm: 3540.430386568138 sim_pfm: 382.4347504058387
episode: 348 training return: tensor(278.3234, device='cuda:0')
episode: 349 training return: tensor(347.8703, device='cuda:0')
episode: 350 training return: tensor(321.5403, device='cuda:0')
episode: 351 training return: tensor(214.0800, device='cuda:0')
epoch: 88 test_true_pfm: 3502.306848836848 sim_pfm: 170.3713694656229
episode: 352 training return: tensor(367.4455, device='cuda:0')
episode: 353 training return: tensor(364.9260, device='cuda:0')
episode: 354 training return: tensor(332.5505, device='cuda:0')
episode: 355 training return: tensor(285.8054, device='cuda:0')
epoch: 89 test_true_pfm: 3540.3935969819995 sim_pfm: 343.15041568303906
episode: 356 training return: tensor(359.9617, device='cuda:0')
episode: 357 training return: tensor(298.6115, device='cuda:0')
episode: 358 training return: tensor(-99.4154, device='cuda:0')
episode: 359 training return: tensor(314.0138, device='cuda:0')
epoch: 90 test_true_pfm: 3579.1881692748248 sim_pfm: 392.6738948511581
episode: 360 training return: tensor(390.3454, device='cuda:0')
episode: 361 training return: tensor(374.5388, device='cuda:0')
episode: 362 training return: tensor(312.5899, device='cuda:0')
episode: 363 training return: tensor(341.2879, device='cuda:0')
epoch: 91 test_true_pfm: 3537.425827491494 sim_pfm: 363.4413442585307
episode: 364 training return: tensor(222.6156, device='cuda:0')
episode: 365 training return: tensor(352.1338, device='cuda:0')
episode: 366 training return: tensor(290.4182, device='cuda:0')
episode: 367 training return: tensor(301.1456, device='cuda:0')
epoch: 92 test_true_pfm: 3533.843720008447 sim_pfm: 361.001253329004
episode: 368 training return: tensor(201.4657, device='cuda:0')
episode: 369 training return: tensor(349.0461, device='cuda:0')
episode: 370 training return: tensor(334.0527, device='cuda:0')
episode: 371 training return: tensor(257.1179, device='cuda:0')
epoch: 93 test_true_pfm: 3516.1941515199646 sim_pfm: 363.4783157130102
episode: 372 training return: tensor(-265.5018, device='cuda:0')
episode: 373 training return: tensor(276.1838, device='cuda:0')
episode: 374 training return: tensor(181.3099, device='cuda:0')
episode: 375 training return: tensor(-136.1888, device='cuda:0')
epoch: 94 test_true_pfm: 3575.5364721871833 sim_pfm: 374.98042463323026
episode: 376 training return: tensor(303.1088, device='cuda:0')
episode: 377 training return: tensor(323.8041, device='cuda:0')
episode: 378 training return: tensor(227.6424, device='cuda:0')
episode: 379 training return: tensor(204.0150, device='cuda:0')
epoch: 95 test_true_pfm: 3492.94118103487 sim_pfm: 285.57651161590667
episode: 380 training return: tensor(238.1221, device='cuda:0')
episode: 381 training return: tensor(308.7975, device='cuda:0')
episode: 382 training return: tensor(230.5571, device='cuda:0')
episode: 383 training return: tensor(281.6498, device='cuda:0')
epoch: 96 test_true_pfm: 3543.4681190015067 sim_pfm: 370.7748603806831
episode: 384 training return: tensor(251.4550, device='cuda:0')
episode: 385 training return: tensor(342.9311, device='cuda:0')
episode: 386 training return: tensor(-292.6929, device='cuda:0')
episode: 387 training return: tensor(222.5539, device='cuda:0')
epoch: 97 test_true_pfm: 3481.2728101772373 sim_pfm: 332.60532858542865
episode: 388 training return: tensor(356.1146, device='cuda:0')
episode: 389 training return: tensor(314.8059, device='cuda:0')
episode: 390 training return: tensor(332.6044, device='cuda:0')
episode: 391 training return: tensor(190.0370, device='cuda:0')
epoch: 98 test_true_pfm: 3523.7784538766227 sim_pfm: 369.8697281525626
episode: 392 training return: tensor(343.9939, device='cuda:0')
episode: 393 training return: tensor(298.4494, device='cuda:0')
episode: 394 training return: tensor(307.3542, device='cuda:0')
episode: 395 training return: tensor(288.4205, device='cuda:0')
epoch: 99 test_true_pfm: 3483.877642736267 sim_pfm: 329.23922401697683
episode: 396 training return: tensor(362.7893, device='cuda:0')
episode: 397 training return: tensor(326.5291, device='cuda:0')
episode: 398 training return: tensor(314.0612, device='cuda:0')
episode: 399 training return: tensor(375.2322, device='cuda:0')
epoch: 100 test_true_pfm: 3582.2425929841866 sim_pfm: 406.1366320086333
episode: 400 training return: tensor(362.0088, device='cuda:0')
episode: 401 training return: tensor(221.3557, device='cuda:0')
episode: 402 training return: tensor(319.6495, device='cuda:0')
episode: 403 training return: tensor(301.8457, device='cuda:0')
epoch: 101 test_true_pfm: 3530.809242562829 sim_pfm: 338.1120349387638
episode: 404 training return: tensor(365.9397, device='cuda:0')
episode: 405 training return: tensor(343.3081, device='cuda:0')
episode: 406 training return: tensor(352.0829, device='cuda:0')
episode: 407 training return: tensor(306.5471, device='cuda:0')
epoch: 102 test_true_pfm: 3486.655782244994 sim_pfm: 327.45299097924726
episode: 408 training return: tensor(312.7728, device='cuda:0')
episode: 409 training return: tensor(238.8033, device='cuda:0')
episode: 410 training return: tensor(242.1606, device='cuda:0')
episode: 411 training return: tensor(360.3613, device='cuda:0')
epoch: 103 test_true_pfm: 3557.113620911556 sim_pfm: 337.172945820319
episode: 412 training return: tensor(288.5914, device='cuda:0')
episode: 413 training return: tensor(323.8899, device='cuda:0')
episode: 414 training return: tensor(303.1299, device='cuda:0')
episode: 415 training return: tensor(340.2514, device='cuda:0')
epoch: 104 test_true_pfm: 3561.124224339046 sim_pfm: 396.3137964640434
episode: 416 training return: tensor(313.9706, device='cuda:0')
episode: 417 training return: tensor(319.9350, device='cuda:0')
episode: 418 training return: tensor(307.8468, device='cuda:0')
episode: 419 training return: tensor(259.7078, device='cuda:0')
epoch: 105 test_true_pfm: 3520.433267436976 sim_pfm: 345.8921877905377
episode: 420 training return: tensor(285.7538, device='cuda:0')
episode: 421 training return: tensor(312.5023, device='cuda:0')
episode: 422 training return: tensor(338.9565, device='cuda:0')
episode: 423 training return: tensor(272.8673, device='cuda:0')
epoch: 106 test_true_pfm: 3565.9770671051424 sim_pfm: 397.73508226900594
episode: 424 training return: tensor(324.8564, device='cuda:0')
episode: 425 training return: tensor(352.2634, device='cuda:0')
episode: 426 training return: tensor(370.6302, device='cuda:0')
episode: 427 training return: tensor(285.7104, device='cuda:0')
epoch: 107 test_true_pfm: 3524.381483798539 sim_pfm: 304.5890513647464
episode: 428 training return: tensor(331.5750, device='cuda:0')
episode: 429 training return: tensor(357.2802, device='cuda:0')
episode: 430 training return: tensor(234.7165, device='cuda:0')
episode: 431 training return: tensor(316.0609, device='cuda:0')
epoch: 108 test_true_pfm: 3555.3997534072423 sim_pfm: 379.3327572446142
episode: 432 training return: tensor(283.7684, device='cuda:0')
episode: 433 training return: tensor(330.0016, device='cuda:0')
episode: 434 training return: tensor(249.7381, device='cuda:0')
episode: 435 training return: tensor(353.8181, device='cuda:0')
epoch: 109 test_true_pfm: 3580.3142454588365 sim_pfm: 412.9478263405229
episode: 436 training return: tensor(375.5992, device='cuda:0')
episode: 437 training return: tensor(327.3866, device='cuda:0')
episode: 438 training return: tensor(364.5114, device='cuda:0')
episode: 439 training return: tensor(273.0394, device='cuda:0')
epoch: 110 test_true_pfm: 3317.863136377518 sim_pfm: 393.87524637094856
episode: 440 training return: tensor(280.3307, device='cuda:0')
episode: 441 training return: tensor(386.6902, device='cuda:0')
episode: 442 training return: tensor(226.2794, device='cuda:0')
episode: 443 training return: tensor(356.1111, device='cuda:0')
epoch: 111 test_true_pfm: 3546.4829203359513 sim_pfm: 363.52193256547133
episode: 444 training return: tensor(285.9524, device='cuda:0')
episode: 445 training return: tensor(335.4639, device='cuda:0')
episode: 446 training return: tensor(378.3004, device='cuda:0')
episode: 447 training return: tensor(353.1905, device='cuda:0')
epoch: 112 test_true_pfm: 3264.3058724575153 sim_pfm: 351.13032401457895
episode: 448 training return: tensor(343.9812, device='cuda:0')
episode: 449 training return: tensor(308.9389, device='cuda:0')
episode: 450 training return: tensor(268.5598, device='cuda:0')
episode: 451 training return: tensor(373.6527, device='cuda:0')
epoch: 113 test_true_pfm: 3594.60125574119 sim_pfm: 385.27277211260906
episode: 452 training return: tensor(328.8164, device='cuda:0')
episode: 453 training return: tensor(403.8789, device='cuda:0')
episode: 454 training return: tensor(332.4543, device='cuda:0')
episode: 455 training return: tensor(283.4757, device='cuda:0')
epoch: 114 test_true_pfm: 3624.5202983592403 sim_pfm: 391.48909062613774
episode: 456 training return: tensor(303.8105, device='cuda:0')
episode: 457 training return: tensor(262.8695, device='cuda:0')
episode: 458 training return: tensor(344.6369, device='cuda:0')
episode: 459 training return: tensor(400.9111, device='cuda:0')
epoch: 115 test_true_pfm: 3560.3729814596522 sim_pfm: 400.6682056657931
episode: 460 training return: tensor(248.6975, device='cuda:0')
episode: 461 training return: tensor(219.1375, device='cuda:0')
episode: 462 training return: tensor(-18.4153, device='cuda:0')
episode: 463 training return: tensor(315.7610, device='cuda:0')
epoch: 116 test_true_pfm: 2936.7118750154623 sim_pfm: 348.451811380306
episode: 464 training return: tensor(365.8255, device='cuda:0')
episode: 465 training return: tensor(375.6776, device='cuda:0')
episode: 466 training return: tensor(315.3627, device='cuda:0')
episode: 467 training return: tensor(323.5871, device='cuda:0')
epoch: 117 test_true_pfm: 3530.0543349773293 sim_pfm: 385.47620816424023
episode: 468 training return: tensor(312.1048, device='cuda:0')
episode: 469 training return: tensor(377.8806, device='cuda:0')
episode: 470 training return: tensor(251.8373, device='cuda:0')
episode: 471 training return: tensor(221.1067, device='cuda:0')
epoch: 118 test_true_pfm: 3573.5354807090384 sim_pfm: 381.82501185446745
episode: 472 training return: tensor(333.9374, device='cuda:0')
episode: 473 training return: tensor(405.0680, device='cuda:0')
episode: 474 training return: tensor(390.6180, device='cuda:0')
episode: 475 training return: tensor(405.2491, device='cuda:0')
epoch: 119 test_true_pfm: 3582.2361791510484 sim_pfm: 383.3730961130932
episode: 476 training return: tensor(242.8147, device='cuda:0')
episode: 477 training return: tensor(297.5510, device='cuda:0')
episode: 478 training return: tensor(305.6690, device='cuda:0')
episode: 479 training return: tensor(269.2948, device='cuda:0')
epoch: 120 test_true_pfm: 3537.85846659527 sim_pfm: 366.4295870867597
episode: 480 training return: tensor(246.8145, device='cuda:0')
episode: 481 training return: tensor(330.0501, device='cuda:0')
episode: 482 training return: tensor(360.4195, device='cuda:0')
episode: 483 training return: tensor(216.6849, device='cuda:0')
epoch: 121 test_true_pfm: 3548.5981429173794 sim_pfm: 410.09879376492853
episode: 484 training return: tensor(374.0699, device='cuda:0')
episode: 485 training return: tensor(317.2557, device='cuda:0')
episode: 486 training return: tensor(268.7404, device='cuda:0')
episode: 487 training return: tensor(325.5137, device='cuda:0')
epoch: 122 test_true_pfm: 3562.7565367884467 sim_pfm: 385.59552763499477
episode: 488 training return: tensor(350.2237, device='cuda:0')
episode: 489 training return: tensor(244.0638, device='cuda:0')
episode: 490 training return: tensor(297.0940, device='cuda:0')
episode: 491 training return: tensor(321.6142, device='cuda:0')
epoch: 123 test_true_pfm: 3526.1995834425215 sim_pfm: 346.5377447427406
episode: 492 training return: tensor(311.0481, device='cuda:0')
episode: 493 training return: tensor(-20.0614, device='cuda:0')
episode: 494 training return: tensor(-152.3646, device='cuda:0')
episode: 495 training return: tensor(281.7816, device='cuda:0')
epoch: 124 test_true_pfm: 3479.5330808762233 sim_pfm: 229.51946640741275
episode: 496 training return: tensor(253.3052, device='cuda:0')
episode: 497 training return: tensor(282.5172, device='cuda:0')
episode: 498 training return: tensor(213.7335, device='cuda:0')
episode: 499 training return: tensor(253.3954, device='cuda:0')
epoch: 125 test_true_pfm: 3311.5131467092287 sim_pfm: 261.87662272475427
episode: 500 training return: tensor(368.6351, device='cuda:0')
episode: 501 training return: tensor(366.7481, device='cuda:0')
episode: 502 training return: tensor(263.5009, device='cuda:0')
episode: 503 training return: tensor(340.8439, device='cuda:0')
epoch: 126 test_true_pfm: 3572.361934777098 sim_pfm: 363.27471930409473
episode: 504 training return: tensor(323.6948, device='cuda:0')
episode: 505 training return: tensor(-252.4769, device='cuda:0')
episode: 506 training return: tensor(379.4700, device='cuda:0')
episode: 507 training return: tensor(337.0683, device='cuda:0')
epoch: 127 test_true_pfm: 3246.7417852856947 sim_pfm: 426.61610684983316
episode: 508 training return: tensor(354.6557, device='cuda:0')
episode: 509 training return: tensor(320.9828, device='cuda:0')
episode: 510 training return: tensor(-235.8723, device='cuda:0')
episode: 511 training return: tensor(351.9300, device='cuda:0')
epoch: 128 test_true_pfm: 3058.989364555782 sim_pfm: 354.6876508811838
episode: 512 training return: tensor(293.2054, device='cuda:0')
episode: 513 training return: tensor(327.1842, device='cuda:0')
episode: 514 training return: tensor(337.3601, device='cuda:0')
episode: 515 training return: tensor(331.7559, device='cuda:0')
epoch: 129 test_true_pfm: 3571.9898713184266 sim_pfm: 387.5776609783546
episode: 516 training return: tensor(283.7694, device='cuda:0')
episode: 517 training return: tensor(332.0743, device='cuda:0')
episode: 518 training return: tensor(332.1892, device='cuda:0')
episode: 519 training return: tensor(3.5264, device='cuda:0')
epoch: 130 test_true_pfm: 3584.7472087679257 sim_pfm: 403.0647918508815
episode: 520 training return: tensor(399.9802, device='cuda:0')
episode: 521 training return: tensor(339.6224, device='cuda:0')
episode: 522 training return: tensor(347.6752, device='cuda:0')
episode: 523 training return: tensor(309.4893, device='cuda:0')
epoch: 131 test_true_pfm: 3576.339412421272 sim_pfm: 440.7878183320087
episode: 524 training return: tensor(329.0197, device='cuda:0')
episode: 525 training return: tensor(322.2834, device='cuda:0')
episode: 526 training return: tensor(292.9857, device='cuda:0')
episode: 527 training return: tensor(346.0538, device='cuda:0')
epoch: 132 test_true_pfm: 3549.3096784811764 sim_pfm: 417.9685452287861
episode: 528 training return: tensor(359.8772, device='cuda:0')
episode: 529 training return: tensor(317.2251, device='cuda:0')
episode: 530 training return: tensor(337.5298, device='cuda:0')
episode: 531 training return: tensor(309.1651, device='cuda:0')
epoch: 133 test_true_pfm: 3526.1353636159706 sim_pfm: 365.7053490331552
episode: 532 training return: tensor(259.7784, device='cuda:0')
episode: 533 training return: tensor(335.6169, device='cuda:0')
episode: 534 training return: tensor(270.2335, device='cuda:0')
episode: 535 training return: tensor(320.9022, device='cuda:0')
epoch: 134 test_true_pfm: 3555.383647294659 sim_pfm: 424.28314494881005
episode: 536 training return: tensor(396.5732, device='cuda:0')
episode: 537 training return: tensor(229.7291, device='cuda:0')
episode: 538 training return: tensor(328.1954, device='cuda:0')
episode: 539 training return: tensor(310.8208, device='cuda:0')
epoch: 135 test_true_pfm: 3618.3674406393898 sim_pfm: 390.9183758550789
episode: 540 training return: tensor(266.2277, device='cuda:0')
episode: 541 training return: tensor(282.7445, device='cuda:0')
episode: 542 training return: tensor(281.9180, device='cuda:0')
episode: 543 training return: tensor(266.6772, device='cuda:0')
epoch: 136 test_true_pfm: 3568.69383093646 sim_pfm: 368.0836252019314
episode: 544 training return: tensor(323.2363, device='cuda:0')
episode: 545 training return: tensor(330.6147, device='cuda:0')
episode: 546 training return: tensor(259.1204, device='cuda:0')
episode: 547 training return: tensor(355.4234, device='cuda:0')
epoch: 137 test_true_pfm: 3577.573568671856 sim_pfm: 374.0726858732135
episode: 548 training return: tensor(223.4957, device='cuda:0')
episode: 549 training return: tensor(306.8715, device='cuda:0')
episode: 550 training return: tensor(329.4579, device='cuda:0')
episode: 551 training return: tensor(246.0928, device='cuda:0')
epoch: 138 test_true_pfm: 3487.176731521657 sim_pfm: 335.7710232503402
episode: 552 training return: tensor(346.2888, device='cuda:0')
episode: 553 training return: tensor(281.1032, device='cuda:0')
episode: 554 training return: tensor(332.9363, device='cuda:0')
episode: 555 training return: tensor(329.6583, device='cuda:0')
epoch: 139 test_true_pfm: 3569.2347582899297 sim_pfm: 401.21629445782554
episode: 556 training return: tensor(331.0533, device='cuda:0')
episode: 557 training return: tensor(315.0612, device='cuda:0')
episode: 558 training return: tensor(327.5536, device='cuda:0')
episode: 559 training return: tensor(367.6339, device='cuda:0')
epoch: 140 test_true_pfm: 3519.6288089862996 sim_pfm: 360.67883437967004
episode: 560 training return: tensor(315.7693, device='cuda:0')
episode: 561 training return: tensor(189.2549, device='cuda:0')
episode: 562 training return: tensor(335.6484, device='cuda:0')
episode: 563 training return: tensor(277.9621, device='cuda:0')
epoch: 141 test_true_pfm: 3619.9697866333895 sim_pfm: 418.17956091459683
episode: 564 training return: tensor(365.8426, device='cuda:0')
episode: 565 training return: tensor(200.2357, device='cuda:0')
episode: 566 training return: tensor(280.9360, device='cuda:0')
episode: 567 training return: tensor(376.2214, device='cuda:0')
epoch: 142 test_true_pfm: 3583.0322194993023 sim_pfm: 430.8140971818163
episode: 568 training return: tensor(331.3291, device='cuda:0')
episode: 569 training return: tensor(345.0856, device='cuda:0')
episode: 570 training return: tensor(223.0950, device='cuda:0')
episode: 571 training return: tensor(304.9709, device='cuda:0')
epoch: 143 test_true_pfm: 3522.6800089697094 sim_pfm: -1.3190290873268775
episode: 572 training return: tensor(324.2384, device='cuda:0')
episode: 573 training return: tensor(370.5877, device='cuda:0')
episode: 574 training return: tensor(377.7959, device='cuda:0')
episode: 575 training return: tensor(362.0271, device='cuda:0')
epoch: 144 test_true_pfm: 3545.272403055012 sim_pfm: 374.31705902635196
episode: 576 training return: tensor(320.2004, device='cuda:0')
episode: 577 training return: tensor(331.8988, device='cuda:0')
episode: 578 training return: tensor(297.4595, device='cuda:0')
episode: 579 training return: tensor(367.1012, device='cuda:0')
epoch: 145 test_true_pfm: 3546.8561913221056 sim_pfm: 381.57096939524246
episode: 580 training return: tensor(294.5693, device='cuda:0')
episode: 581 training return: tensor(323.0066, device='cuda:0')
episode: 582 training return: tensor(352.7231, device='cuda:0')
episode: 583 training return: tensor(351.5700, device='cuda:0')
epoch: 146 test_true_pfm: 3603.621290130582 sim_pfm: 386.51498155583005
episode: 584 training return: tensor(266.5389, device='cuda:0')
episode: 585 training return: tensor(319.5293, device='cuda:0')
episode: 586 training return: tensor(304.3033, device='cuda:0')
episode: 587 training return: tensor(370.4204, device='cuda:0')
epoch: 147 test_true_pfm: 3608.2139132269835 sim_pfm: 395.61826169207535
episode: 588 training return: tensor(322.1829, device='cuda:0')
episode: 589 training return: tensor(292.5514, device='cuda:0')
episode: 590 training return: tensor(408.1880, device='cuda:0')
episode: 591 training return: tensor(378.9672, device='cuda:0')
epoch: 148 test_true_pfm: 3602.6327164551312 sim_pfm: 439.4480503328862
episode: 592 training return: tensor(414.1289, device='cuda:0')
episode: 593 training return: tensor(375.6007, device='cuda:0')
episode: 594 training return: tensor(375.3217, device='cuda:0')
episode: 595 training return: tensor(243.3529, device='cuda:0')
epoch: 149 test_true_pfm: 3556.3781006269705 sim_pfm: 247.91023118025623
episode: 596 training return: tensor(301.0977, device='cuda:0')
episode: 597 training return: tensor(331.9705, device='cuda:0')
episode: 598 training return: tensor(303.8243, device='cuda:0')
episode: 599 training return: tensor(352.3013, device='cuda:0')
epoch: 150 test_true_pfm: 3550.2207206147955 sim_pfm: 387.66918111476116
