['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'behavior', '--traj', 'mixed', '--seed', '3', '--data', '100000']
epoch: 0 training_loss 0.1902776662260294 test_loss: 0.14959977865219115
epoch: 1 training_loss 0.14618932362645864 test_loss: 0.14479125738143922
epoch: 2 training_loss 0.12817602161318065 test_loss: 0.12030205726623536
epoch: 3 training_loss 0.11229173455387353 test_loss: 0.12603873014450073
epoch: 4 training_loss 0.1240069591253996 test_loss: 0.12714006900787353
epoch: 5 training_loss 0.11223360072821378 test_loss: 0.1248428463935852
epoch: 6 training_loss 0.11407488420605659 test_loss: 0.1163735032081604
epoch: 7 training_loss 0.11347927793860435 test_loss: 0.12362638711929322
epoch: 8 training_loss 0.1138405815511942 test_loss: 0.11406223773956299
epoch: 9 training_loss 0.11101825717836618 test_loss: 0.11378573179244995
epoch: 10 training_loss 0.10763826796784998 test_loss: 0.1192850112915039
epoch: 11 training_loss 0.1133417514152825 test_loss: 0.10320082902908326
epoch: 12 training_loss 0.10120493989437819 test_loss: 0.10020201206207276
epoch: 13 training_loss 0.10043343782424927 test_loss: 0.1149161696434021
epoch: 14 training_loss 0.10973577857017518 test_loss: 0.09866217970848083
epoch: 15 training_loss 0.10786097057163715 test_loss: 0.12035130262374878
epoch: 16 training_loss 0.10831438936293125 test_loss: 0.09926687479019165
epoch: 17 training_loss 0.09664052305743098 test_loss: 0.11607435941696168
epoch: 18 training_loss 0.10304423918947578 test_loss: 0.10157610177993774
epoch: 19 training_loss 0.1049344184435904 test_loss: 0.10091173648834229
epoch: 20 training_loss 0.11089325772598385 test_loss: 0.09331583380699157
epoch: 21 training_loss 0.10544922478497028 test_loss: 0.08820678591728211
epoch: 22 training_loss 0.11324204824864864 test_loss: 0.09511406421661377
epoch: 23 training_loss 0.10385506359860301 test_loss: 0.10455628633499145
epoch: 24 training_loss 0.10282239126041531 test_loss: 0.10193579196929932
epoch: 25 training_loss 0.10503776974976063 test_loss: 0.0969357430934906
epoch: 26 training_loss 0.1037413108535111 test_loss: 0.10416204929351806
epoch: 27 training_loss 0.10054434519261121 test_loss: 0.11453047990798951
epoch: 28 training_loss 0.10565657429397106 test_loss: 0.1010617733001709
epoch: 29 training_loss 0.09468550868332386 test_loss: 0.10730850696563721
epoch: 30 training_loss 0.10579523846507072 test_loss: 0.10384577512741089
epoch: 31 training_loss 0.10496222859248519 test_loss: 0.11118898391723633
epoch: 32 training_loss 0.10861626490950585 test_loss: 0.10550906658172607
epoch: 33 training_loss 0.10820575747638941 test_loss: 0.09356069564819336
epoch: 34 training_loss 0.10004832834005356 test_loss: 0.11265262365341186
epoch: 35 training_loss 0.1143851663544774 test_loss: 0.10849063396453858
epoch: 36 training_loss 0.09702422469854355 test_loss: 0.10567317008972169
epoch: 37 training_loss 0.101903072334826 test_loss: 0.08281850218772888
epoch: 38 training_loss 0.10325614683330059 test_loss: 0.0970835566520691
epoch: 39 training_loss 0.10684175867587328 test_loss: 0.10135881900787354
epoch: 40 training_loss 0.0983753932081163 test_loss: 0.10708552598953247
epoch: 41 training_loss 0.10745285481214523 test_loss: 0.10783673524856567
epoch: 42 training_loss 0.0985442558862269 test_loss: 0.09935015439987183
epoch: 43 training_loss 0.0971871468424797 test_loss: 0.11443443298339843
epoch: 44 training_loss 0.10510567147284747 test_loss: 0.09802672266960144
epoch: 45 training_loss 0.10631506722420454 test_loss: 0.10991350412368775
epoch: 46 training_loss 0.10071830788627267 test_loss: 0.10010945796966553
epoch: 47 training_loss 0.10264194868505001 test_loss: 0.10425065755844116
epoch: 48 training_loss 0.09568049846217036 test_loss: 0.11642624139785766
epoch: 49 training_loss 0.10532374134287238 test_loss: 0.1088176965713501
epoch: 50 training_loss 0.09810754038393497 test_loss: 0.09637307524681091
epoch: 51 training_loss 0.10690970739349723 test_loss: 0.10341072082519531
epoch: 52 training_loss 0.09447103399783373 test_loss: 0.09059767723083496
epoch: 53 training_loss 0.1014809550717473 test_loss: 0.10849854946136475
epoch: 54 training_loss 0.10111302640289069 test_loss: 0.10852987766265869
epoch: 55 training_loss 0.09402004962787032 test_loss: 0.10271748304367065
epoch: 56 training_loss 0.09660639960318804 test_loss: 0.12026245594024658
epoch: 57 training_loss 0.10428037654608488 test_loss: 0.10375411510467529
epoch: 58 training_loss 0.1011408744379878 test_loss: 0.0919937551021576
epoch: 59 training_loss 0.10431364700198173 test_loss: 0.10233937501907349
epoch: 60 training_loss 0.10177655011415482 test_loss: 0.09420962929725647
epoch: 61 training_loss 0.09822739433497191 test_loss: 0.10523045063018799
epoch: 62 training_loss 0.0970465367846191 test_loss: 0.10659186840057373
epoch: 63 training_loss 0.10129159968346357 test_loss: 0.10612982511520386
epoch: 64 training_loss 0.10105345187708735 test_loss: 0.11541212797164917
epoch: 65 training_loss 0.09979655189439654 test_loss: 0.10879312753677368
epoch: 66 training_loss 0.09700232516042888 test_loss: 0.09634363055229186
epoch: 67 training_loss 0.0974428516253829 test_loss: 0.11558064222335815
epoch: 68 training_loss 0.09683777848258615 test_loss: 0.1127134084701538
epoch: 69 training_loss 0.10165062734857201 test_loss: 0.1121563196182251
epoch: 70 training_loss 0.10476823212578892 test_loss: 0.09855456948280335
epoch: 71 training_loss 0.09717482773587108 test_loss: 0.10119075775146484
epoch: 72 training_loss 0.09862606953829527 test_loss: 0.10411596298217773
epoch: 73 training_loss 0.09950708517804742 test_loss: 0.1076129674911499
epoch: 74 training_loss 0.10184662807732821 test_loss: 0.11385911703109741
epoch: 75 training_loss 0.09783800175413489 test_loss: 0.0951259434223175
epoch: 76 training_loss 0.10149325145408511 test_loss: 0.1234464168548584
epoch: 77 training_loss 0.09862263768911361 test_loss: 0.1130675196647644
epoch: 78 training_loss 0.0981885578110814 test_loss: 0.0942049503326416
epoch: 79 training_loss 0.09415674481540919 test_loss: 0.10960519313812256
epoch: 80 training_loss 0.10132727976888418 test_loss: 0.10522650480270386
epoch: 81 training_loss 0.09839771224185824 test_loss: 0.10256484746932984
epoch: 82 training_loss 0.09976761162281036 test_loss: 0.09220387935638427
epoch: 83 training_loss 0.0907880780287087 test_loss: 0.10250225067138671
epoch: 84 training_loss 0.0948226054571569 test_loss: 0.10339449644088745
epoch: 85 training_loss 0.10346003711223602 test_loss: 0.09664743542671203
epoch: 86 training_loss 0.09935600200667977 test_loss: 0.09300298094749451
epoch: 87 training_loss 0.09711819814518094 test_loss: 0.11032969951629638
epoch: 88 training_loss 0.09355456322431564 test_loss: 0.09080697298049926
epoch: 89 training_loss 0.09562517484650015 test_loss: 0.11473530530929565
epoch: 90 training_loss 0.09228000752627849 test_loss: 0.09528685808181762
epoch: 91 training_loss 0.09481173574924469 test_loss: 0.10255814790725708
epoch: 92 training_loss 0.09394223980605602 test_loss: 0.10481046438217163
epoch: 93 training_loss 0.1061676198989153 test_loss: 0.09039285778999329
epoch: 94 training_loss 0.09739745447412133 test_loss: 0.09041602015495301
epoch: 95 training_loss 0.0926716941408813 test_loss: 0.09089620709419251
epoch: 96 training_loss 0.10333749320358038 test_loss: 0.09441452622413635
epoch: 97 training_loss 0.10000050388276577 test_loss: 0.0903782844543457
epoch: 98 training_loss 0.09774687934666872 test_loss: 0.10656225681304932
epoch: 99 training_loss 0.09586901739239692 test_loss: 0.09828102588653564
epoch: 100 training_loss 0.09730899503454565 test_loss: 0.09189006090164184
epoch: 101 training_loss 0.0955927986651659 test_loss: 0.09279809594154358
epoch: 102 training_loss 0.09325616016983986 test_loss: 0.09353989362716675
epoch: 103 training_loss 0.10526938993483782 test_loss: 0.10047816038131714
epoch: 104 training_loss 0.10132319949567319 test_loss: 0.10546720027923584
epoch: 105 training_loss 0.09129394201561808 test_loss: 0.09853798747062684
epoch: 106 training_loss 0.09582316506654025 test_loss: 0.1009868025779724
epoch: 107 training_loss 0.09565847229212522 test_loss: 0.0992802083492279
epoch: 108 training_loss 0.09178910408169032 test_loss: 0.10949108600616456
epoch: 109 training_loss 0.09850415538996458 test_loss: 0.09446812272071839
epoch: 110 training_loss 0.10259278658777475 test_loss: 0.09434080719947815
epoch: 111 training_loss 0.0954288474470377 test_loss: 0.093099045753479
epoch: 112 training_loss 0.09585190195590258 test_loss: 0.09367777109146118
epoch: 113 training_loss 0.10039279129356146 test_loss: 0.10561398267745972
epoch: 114 training_loss 0.09760155960917473 test_loss: 0.09565971493721008
epoch: 115 training_loss 0.09966913320124149 test_loss: 0.09558426737785339
epoch: 116 training_loss 0.09595597103238106 test_loss: 0.08907488584518433
epoch: 117 training_loss 0.10110625188797712 test_loss: 0.09576927423477173
epoch: 118 training_loss 0.09660751741379499 test_loss: 0.09063289761543274
epoch: 119 training_loss 0.1019010615721345 test_loss: 0.09450928568840027
epoch: 120 training_loss 0.0961389023438096 test_loss: 0.08716158270835876
epoch: 121 training_loss 0.09402037117630244 test_loss: 0.09980233907699584
epoch: 122 training_loss 0.09978721870109439 test_loss: 0.09866049289703369
epoch: 123 training_loss 0.100075226649642 test_loss: 0.10379399061203003
epoch: 124 training_loss 0.10056511394679546 test_loss: 0.09843753576278687
epoch: 125 training_loss 0.09511342994868756 test_loss: 0.09089376926422119
epoch: 126 training_loss 0.09631321314722299 test_loss: 0.08572425246238709
epoch: 127 training_loss 0.09559445966035128 test_loss: 0.09985422492027282
epoch: 128 training_loss 0.10016586091369391 test_loss: 0.09871765375137329
epoch: 129 training_loss 0.097326635196805 test_loss: 0.08102907538414002
epoch: 130 training_loss 0.0928258709423244 test_loss: 0.11038600206375122
epoch: 131 training_loss 0.10063819020986557 test_loss: 0.1017562747001648
epoch: 132 training_loss 0.0928933496400714 test_loss: 0.10233975648880005
epoch: 133 training_loss 0.0929777790978551 test_loss: 0.09940099716186523
epoch: 134 training_loss 0.09877686638385058 test_loss: 0.0935746967792511
epoch: 135 training_loss 0.09591467063874007 test_loss: 0.10135496854782104
epoch: 136 training_loss 0.09364101724699139 test_loss: 0.08437495231628418
epoch: 137 training_loss 0.09666979417204857 test_loss: 0.10195302963256836
epoch: 138 training_loss 0.09728230111300945 test_loss: 0.1003099799156189
epoch: 139 training_loss 0.09143596792593599 test_loss: 0.10076509714126587
epoch: 140 training_loss 0.09491016432642936 test_loss: 0.08584065437316894
epoch: 141 training_loss 0.09601798346266151 test_loss: 0.09735079407691956
epoch: 142 training_loss 0.10337038487195968 test_loss: 0.10431973934173584
epoch: 143 training_loss 0.09690778836607933 test_loss: 0.09847506284713745
epoch: 144 training_loss 0.10090079536661506 test_loss: 0.09860340356826783
epoch: 145 training_loss 0.09020196191966534 test_loss: 0.10018327236175537
epoch: 146 training_loss 0.09614652799442411 test_loss: 0.10830138921737671
epoch: 147 training_loss 0.10077930675819516 test_loss: 0.10466119050979614
epoch: 148 training_loss 0.09944953441619873 test_loss: 0.0801258146762848
epoch: 149 training_loss 0.09534065544605255 test_loss: 0.09389405846595764
epoch: 0 training_loss 8.9497420835495 test_loss: 6.163583374023437
epoch: 1 training_loss 5.112812705039978 test_loss: 4.4236701965332035
epoch: 2 training_loss 3.993695151805878 test_loss: 3.620725631713867
epoch: 3 training_loss 3.328506438732147 test_loss: 3.2856693267822266
epoch: 4 training_loss 2.9509975504875183 test_loss: 2.7847877502441407
epoch: 5 training_loss 2.611220347881317 test_loss: 2.4963327407836915
epoch: 6 training_loss 2.428659328222275 test_loss: 2.364771270751953
epoch: 7 training_loss 2.2665304279327394 test_loss: 2.162890625
epoch: 8 training_loss 2.0789657711982725 test_loss: 1.998763084411621
epoch: 9 training_loss 1.9813563907146454 test_loss: 1.996547317504883
epoch: 10 training_loss 1.8902341794967652 test_loss: 1.811223602294922
epoch: 11 training_loss 1.8253173565864562 test_loss: 1.8218212127685547
epoch: 12 training_loss 1.7691892576217652 test_loss: 1.7181182861328126
epoch: 13 training_loss 1.7194546818733216 test_loss: 1.6822046279907226
epoch: 14 training_loss 1.6744136381149293 test_loss: 1.6032295227050781
epoch: 15 training_loss 1.5727402532100678 test_loss: 1.5567103385925294
epoch: 16 training_loss 1.5447690451145173 test_loss: 1.563641357421875
epoch: 17 training_loss 1.5131634736061097 test_loss: 1.5288840293884278
epoch: 18 training_loss 1.49597971200943 test_loss: 1.4780662536621094
epoch: 19 training_loss 1.4383541464805603 test_loss: 1.466563892364502
epoch: 20 training_loss 1.4190329611301422 test_loss: 1.4497297286987305
epoch: 21 training_loss 1.4048710882663726 test_loss: 1.3839821815490723
epoch: 22 training_loss 1.3544928348064422 test_loss: 1.3361424446105956
epoch: 23 training_loss 1.3392671251296997 test_loss: 1.2584020614624023
epoch: 24 training_loss 1.3108540511131286 test_loss: 1.3351193428039552
epoch: 25 training_loss 1.319807449579239 test_loss: 1.304800796508789
epoch: 26 training_loss 1.239272426366806 test_loss: 1.2418956756591797
epoch: 27 training_loss 1.2497345793247223 test_loss: 1.2634687423706055
epoch: 28 training_loss 1.2419956815242768 test_loss: 1.2835688591003418
epoch: 29 training_loss 1.189707141518593 test_loss: 1.1814007759094238
epoch: 30 training_loss 1.2196941256523133 test_loss: 1.232374095916748
epoch: 31 training_loss 1.1818833673000335 test_loss: 1.2058773994445802
epoch: 32 training_loss 1.192596579194069 test_loss: 1.1746492385864258
epoch: 33 training_loss 1.1492331647872924 test_loss: 1.1486572265625
epoch: 34 training_loss 1.1435457706451415 test_loss: 1.1159658432006836
epoch: 35 training_loss 1.1389429712295531 test_loss: 1.1277431488037108
epoch: 36 training_loss 1.13390388071537 test_loss: 1.1857896804809571
epoch: 37 training_loss 1.109818572998047 test_loss: 1.0756733894348145
epoch: 38 training_loss 1.0937829852104186 test_loss: 1.135393238067627
epoch: 39 training_loss 1.0852882260084151 test_loss: 1.039498996734619
epoch: 40 training_loss 1.0802550381422042 test_loss: 1.091524600982666
epoch: 41 training_loss 1.0571237653493881 test_loss: 1.0553553581237793
epoch: 42 training_loss 1.0491912335157394 test_loss: 1.0639012336730957
epoch: 43 training_loss 1.067068302631378 test_loss: 1.064196491241455
epoch: 44 training_loss 1.0510904002189636 test_loss: 1.071031093597412
epoch: 45 training_loss 1.051466400027275 test_loss: 1.0691160202026366
epoch: 46 training_loss 1.0279323476552964 test_loss: 1.036532211303711
epoch: 47 training_loss 1.0321207177639007 test_loss: 1.0170454025268554
epoch: 48 training_loss 1.0196788656711577 test_loss: 1.0377183914184571
epoch: 49 training_loss 1.006336408853531 test_loss: 1.018986701965332
epoch: 50 training_loss 1.0205091136693953 test_loss: 0.9777819633483886
epoch: 51 training_loss 1.0195041280984878 test_loss: 1.0640643119812012
epoch: 52 training_loss 0.9913829731941223 test_loss: 1.0161801338195802
epoch: 53 training_loss 0.9872199928760529 test_loss: 0.9337100982666016
epoch: 54 training_loss 0.984453821182251 test_loss: 0.9728321075439453
epoch: 55 training_loss 0.9851071619987488 test_loss: 0.9518587112426757
epoch: 56 training_loss 0.9893289029598236 test_loss: 0.9958728790283203
epoch: 57 training_loss 0.9670333224534988 test_loss: 0.9674213409423829
epoch: 58 training_loss 0.9899677300453186 test_loss: 1.0194156646728516
epoch: 59 training_loss 0.9635221046209336 test_loss: 1.0127860069274903
epoch: 60 training_loss 0.9581628423929215 test_loss: 0.9374850273132325
epoch: 61 training_loss 0.9421818059682846 test_loss: 0.9659890174865723
epoch: 62 training_loss 0.9230853241682052 test_loss: 0.9329035758972168
epoch: 63 training_loss 0.9611372870206832 test_loss: 0.9515561103820801
epoch: 64 training_loss 0.9250424003601074 test_loss: 0.9777321815490723
epoch: 65 training_loss 0.9355599176883698 test_loss: 0.9263496398925781
epoch: 66 training_loss 0.9513716411590576 test_loss: 0.913311767578125
epoch: 67 training_loss 0.9286143606901169 test_loss: 0.9439995765686036
epoch: 68 training_loss 0.9196042042970657 test_loss: 0.9458235740661621
epoch: 69 training_loss 0.9151238077878951 test_loss: 0.8921029090881347
epoch: 70 training_loss 0.9098150837421417 test_loss: 0.9235827445983886
epoch: 71 training_loss 0.8903935807943344 test_loss: 0.9056659698486328
epoch: 72 training_loss 0.8964467984437943 test_loss: 0.922819995880127
epoch: 73 training_loss 0.8998784720897675 test_loss: 0.9148550987243652
epoch: 74 training_loss 0.8982724434137345 test_loss: 0.9173213958740234
epoch: 75 training_loss 0.8879729545116425 test_loss: 0.9289315223693848
epoch: 76 training_loss 0.8843599659204483 test_loss: 0.9081226348876953
epoch: 77 training_loss 0.8907678699493409 test_loss: 0.8770095825195312
epoch: 78 training_loss 0.8799346166849137 test_loss: 0.8880279541015625
epoch: 79 training_loss 0.8981083101034164 test_loss: 0.8953393936157227
epoch: 80 training_loss 0.865177463889122 test_loss: 0.885892105102539
epoch: 81 training_loss 0.8605358141660691 test_loss: 0.894096851348877
epoch: 82 training_loss 0.877207270860672 test_loss: 0.8833118438720703
epoch: 83 training_loss 0.8834985530376435 test_loss: 0.9067037582397461
epoch: 84 training_loss 0.8604775238037109 test_loss: 0.8877944946289062
epoch: 85 training_loss 0.8661108767986297 test_loss: 0.8736063957214355
epoch: 86 training_loss 0.87477363884449 test_loss: 0.8463308334350585
epoch: 87 training_loss 0.862276571393013 test_loss: 0.8909093856811523
epoch: 88 training_loss 0.8306713181734086 test_loss: 0.8566659927368164
epoch: 89 training_loss 0.8301254779100418 test_loss: 0.8789397239685058
epoch: 90 training_loss 0.8525348484516144 test_loss: 0.8988944053649902
epoch: 91 training_loss 0.8586683869361877 test_loss: 0.8277815818786621
epoch: 92 training_loss 0.8359181654453277 test_loss: 0.8449379920959472
epoch: 93 training_loss 0.8498952561616897 test_loss: 0.8546955108642578
epoch: 94 training_loss 0.8365866333246231 test_loss: 0.8169175148010254
epoch: 95 training_loss 0.8448359543085098 test_loss: 0.8603020668029785
epoch: 96 training_loss 0.8397544407844544 test_loss: 0.8254396438598632
epoch: 97 training_loss 0.8187421154975891 test_loss: 0.8079573631286621
epoch: 98 training_loss 0.8460495889186859 test_loss: 0.8714524269104004
epoch: 99 training_loss 0.844485958814621 test_loss: 0.8413135528564453
epoch: 100 training_loss 0.8328511929512024 test_loss: 0.83908052444458
epoch: 101 training_loss 0.8146369230747222 test_loss: 0.8041948318481446
epoch: 102 training_loss 0.8119719117879868 test_loss: 0.8080342292785645
epoch: 103 training_loss 0.8159719014167786 test_loss: 0.8007209777832032
epoch: 104 training_loss 0.81099858045578 test_loss: 0.8030417442321778
epoch: 105 training_loss 0.8215082097053528 test_loss: 0.8234346389770508
epoch: 106 training_loss 0.8102151423692703 test_loss: 0.8181144714355468
epoch: 107 training_loss 0.809909273982048 test_loss: 0.8055026054382324
epoch: 108 training_loss 0.8142479008436203 test_loss: 0.775997543334961
epoch: 109 training_loss 0.8047404271364212 test_loss: 0.8075839042663574
epoch: 110 training_loss 0.8048542284965515 test_loss: 0.8106441497802734
epoch: 111 training_loss 0.8060569435358047 test_loss: 0.825584888458252
epoch: 112 training_loss 0.8007824444770812 test_loss: 0.8204515457153321
epoch: 113 training_loss 0.8156598937511444 test_loss: 0.8344805717468262
epoch: 114 training_loss 0.803840019106865 test_loss: 0.8093555450439454
epoch: 115 training_loss 0.7892020452022552 test_loss: 0.7932955265045166
epoch: 116 training_loss 0.7889127415418625 test_loss: 0.7726126670837402
epoch: 117 training_loss 0.7873377126455307 test_loss: 0.8218430519104004
epoch: 118 training_loss 0.7907285803556442 test_loss: 0.8210993766784668
epoch: 119 training_loss 0.7810753554105758 test_loss: 0.7826584339141845
epoch: 120 training_loss 0.7861216825246811 test_loss: 0.781441593170166
epoch: 121 training_loss 0.7735401946306228 test_loss: 0.8440873146057128
epoch: 122 training_loss 0.7935376638174056 test_loss: 0.7881547451019287
epoch: 123 training_loss 0.7772174561023713 test_loss: 0.7759177207946777
epoch: 124 training_loss 0.7758234697580337 test_loss: 0.790238618850708
epoch: 125 training_loss 0.7961585909128189 test_loss: 0.7902243614196778
epoch: 126 training_loss 0.7767469900846481 test_loss: 0.7699624061584472
epoch: 127 training_loss 0.7858687198162079 test_loss: 0.7806971073150635
epoch: 128 training_loss 0.7767917996644974 test_loss: 0.7556349754333496
epoch: 129 training_loss 0.7619125592708588 test_loss: 0.7890296936035156
epoch: 130 training_loss 0.776283724308014 test_loss: 0.7614731788635254
epoch: 131 training_loss 0.7691607612371445 test_loss: 0.7729336738586425
epoch: 132 training_loss 0.7664081919193267 test_loss: 0.7434287548065186
epoch: 133 training_loss 0.7584411388635636 test_loss: 0.7508009910583496
epoch: 134 training_loss 0.7606529915332794 test_loss: 0.7548237800598144
epoch: 135 training_loss 0.7792160367965698 test_loss: 0.7542017936706543
epoch: 136 training_loss 0.7543681472539902 test_loss: 0.7789437294006347
epoch: 137 training_loss 0.7475287985801696 test_loss: 0.737784481048584
epoch: 138 training_loss 0.7733309090137481 test_loss: 0.7913340568542481
epoch: 139 training_loss 0.7526271879673004 test_loss: 0.7233104705810547
epoch: 140 training_loss 0.7578396940231323 test_loss: 0.7266481399536133
epoch: 141 training_loss 0.7385300600528717 test_loss: 0.7346914291381836
epoch: 142 training_loss 0.7567550879716873 test_loss: 0.7285645961761474
epoch: 143 training_loss 0.7618464374542236 test_loss: 0.7229090213775635
epoch: 144 training_loss 0.7446314656734466 test_loss: 0.7583539485931396
epoch: 145 training_loss 0.7511485183238983 test_loss: 0.7281126976013184
epoch: 146 training_loss 0.7532283163070679 test_loss: 0.742180585861206
epoch: 147 training_loss 0.7594439464807511 test_loss: 0.7506319999694824
epoch: 148 training_loss 0.7603793120384217 test_loss: 0.7043859481811523
epoch: 149 training_loss 0.7446960151195526 test_loss: 0.7144605159759522
3100.553858047103
episode: 0 training return: tensor(-57.6461, device='cuda:0')
episode: 1 training return: tensor(128.6057, device='cuda:0')
episode: 2 training return: tensor(127.5137, device='cuda:0')
episode: 3 training return: tensor(59.6860, device='cuda:0')
epoch: 1 test_true_pfm: 2918.719707307116 sim_pfm: 101.49530062167712
episode: 4 training return: tensor(127.1766, device='cuda:0')
episode: 5 training return: tensor(74.7837, device='cuda:0')
episode: 6 training return: tensor(100.6698, device='cuda:0')
episode: 7 training return: tensor(71.7446, device='cuda:0')
epoch: 2 test_true_pfm: 2689.0080759048546 sim_pfm: 77.11883762850387
episode: 8 training return: tensor(69.2260, device='cuda:0')
episode: 9 training return: tensor(-155.1650, device='cuda:0')
episode: 10 training return: tensor(-79.9345, device='cuda:0')
episode: 11 training return: tensor(-229.8582, device='cuda:0')
epoch: 3 test_true_pfm: 2202.5475198153295 sim_pfm: -304.5933482678083
episode: 12 training return: tensor(99.9316, device='cuda:0')
episode: 13 training return: tensor(192.9043, device='cuda:0')
episode: 14 training return: tensor(-338.8939, device='cuda:0')
episode: 15 training return: tensor(-402.1600, device='cuda:0')
epoch: 4 test_true_pfm: 3182.4018569109817 sim_pfm: -97.80427676618758
episode: 16 training return: tensor(91.3863, device='cuda:0')
episode: 17 training return: tensor(-253.2644, device='cuda:0')
episode: 18 training return: tensor(-286.9724, device='cuda:0')
episode: 19 training return: tensor(134.6778, device='cuda:0')
epoch: 5 test_true_pfm: 3169.7954169409873 sim_pfm: 67.26895761908963
episode: 20 training return: tensor(-168.2106, device='cuda:0')
episode: 21 training return: tensor(99.9860, device='cuda:0')
episode: 22 training return: tensor(92.8551, device='cuda:0')
episode: 23 training return: tensor(88.0918, device='cuda:0')
epoch: 6 test_true_pfm: 3029.73839448945 sim_pfm: 14.6921571281855
episode: 24 training return: tensor(-247.7988, device='cuda:0')
episode: 25 training return: tensor(-53.3307, device='cuda:0')
episode: 26 training return: tensor(-238.1036, device='cuda:0')
episode: 27 training return: tensor(124.1953, device='cuda:0')
epoch: 7 test_true_pfm: 2194.780340597819 sim_pfm: -133.74271666810577
episode: 28 training return: tensor(-197.5927, device='cuda:0')
episode: 29 training return: tensor(191.2040, device='cuda:0')
episode: 30 training return: tensor(-367.6633, device='cuda:0')
episode: 31 training return: tensor(-59.6572, device='cuda:0')
epoch: 8 test_true_pfm: 3165.6884618223303 sim_pfm: 105.95365267788293
episode: 32 training return: tensor(84.7683, device='cuda:0')
episode: 33 training return: tensor(-182.1439, device='cuda:0')
episode: 34 training return: tensor(-274.5168, device='cuda:0')
episode: 35 training return: tensor(198.0542, device='cuda:0')
epoch: 9 test_true_pfm: 3210.716163968504 sim_pfm: 111.74162335321307
episode: 36 training return: tensor(98.1716, device='cuda:0')
episode: 37 training return: tensor(-224.7944, device='cuda:0')
episode: 38 training return: tensor(185.3556, device='cuda:0')
episode: 39 training return: tensor(86.6205, device='cuda:0')
epoch: 10 test_true_pfm: 3211.694792951723 sim_pfm: 86.13236402568873
episode: 40 training return: tensor(-160.5201, device='cuda:0')
episode: 41 training return: tensor(106.6559, device='cuda:0')
episode: 42 training return: tensor(-262.1837, device='cuda:0')
episode: 43 training return: tensor(-14.1318, device='cuda:0')
epoch: 11 test_true_pfm: 2804.7608763572166 sim_pfm: 18.327469067104783
episode: 44 training return: tensor(-294.0250, device='cuda:0')
episode: 45 training return: tensor(74.5142, device='cuda:0')
episode: 46 training return: tensor(195.6477, device='cuda:0')
episode: 47 training return: tensor(-346.0533, device='cuda:0')
epoch: 12 test_true_pfm: 2713.623942589377 sim_pfm: -139.12386082441662
episode: 48 training return: tensor(100.7919, device='cuda:0')
episode: 49 training return: tensor(165.3519, device='cuda:0')
episode: 50 training return: tensor(-224.4433, device='cuda:0')
episode: 51 training return: tensor(-143.7477, device='cuda:0')
epoch: 13 test_true_pfm: 3189.634461640488 sim_pfm: 12.598971921921475
episode: 52 training return: tensor(62.7911, device='cuda:0')
episode: 53 training return: tensor(-76.8028, device='cuda:0')
episode: 54 training return: tensor(173.5462, device='cuda:0')
episode: 55 training return: tensor(59.9236, device='cuda:0')
epoch: 14 test_true_pfm: 2967.7565845823187 sim_pfm: 79.55895264034432
episode: 56 training return: tensor(-235.8181, device='cuda:0')
episode: 57 training return: tensor(-162.9104, device='cuda:0')
episode: 58 training return: tensor(113.3582, device='cuda:0')
episode: 59 training return: tensor(112.8543, device='cuda:0')
epoch: 15 test_true_pfm: 3170.51274868625 sim_pfm: 63.73513857578897
episode: 60 training return: tensor(-198.6884, device='cuda:0')
episode: 61 training return: tensor(86.6692, device='cuda:0')
episode: 62 training return: tensor(63.7541, device='cuda:0')
episode: 63 training return: tensor(204.5319, device='cuda:0')
epoch: 16 test_true_pfm: 3126.3039249332573 sim_pfm: 79.92896809667582
episode: 64 training return: tensor(-266.3521, device='cuda:0')
episode: 65 training return: tensor(174.7638, device='cuda:0')
episode: 66 training return: tensor(157.9207, device='cuda:0')
episode: 67 training return: tensor(-54.0190, device='cuda:0')
epoch: 17 test_true_pfm: 3195.646953534994 sim_pfm: 99.2147828215772
episode: 68 training return: tensor(153.9427, device='cuda:0')
episode: 69 training return: tensor(-249.0825, device='cuda:0')
episode: 70 training return: tensor(-156.4692, device='cuda:0')
episode: 71 training return: tensor(153.6784, device='cuda:0')
epoch: 18 test_true_pfm: 3181.564754355692 sim_pfm: 68.22347777758841
episode: 72 training return: tensor(-282.6546, device='cuda:0')
episode: 73 training return: tensor(-94.0698, device='cuda:0')
episode: 74 training return: tensor(147.2213, device='cuda:0')
episode: 75 training return: tensor(-92.9922, device='cuda:0')
epoch: 19 test_true_pfm: 3138.914292070442 sim_pfm: 81.25416214362485
episode: 76 training return: tensor(70.4497, device='cuda:0')
episode: 77 training return: tensor(142.7613, device='cuda:0')
episode: 78 training return: tensor(115.2898, device='cuda:0')
episode: 79 training return: tensor(30.0199, device='cuda:0')
epoch: 20 test_true_pfm: 3148.937415245799 sim_pfm: 101.20721040997887
episode: 80 training return: tensor(105.9831, device='cuda:0')
episode: 81 training return: tensor(-138.7880, device='cuda:0')
episode: 82 training return: tensor(164.1273, device='cuda:0')
episode: 83 training return: tensor(-181.6125, device='cuda:0')
epoch: 21 test_true_pfm: 2578.6182184724225 sim_pfm: 53.37179679940649
episode: 84 training return: tensor(-64.3192, device='cuda:0')
episode: 85 training return: tensor(88.8125, device='cuda:0')
episode: 86 training return: tensor(-251.2088, device='cuda:0')
episode: 87 training return: tensor(-165.8791, device='cuda:0')
epoch: 22 test_true_pfm: 2782.275523709141 sim_pfm: 91.69466865817473
episode: 88 training return: tensor(-160.3762, device='cuda:0')
episode: 89 training return: tensor(97.9912, device='cuda:0')
episode: 90 training return: tensor(-311.9420, device='cuda:0')
episode: 91 training return: tensor(78.1240, device='cuda:0')
epoch: 23 test_true_pfm: 3180.054723059357 sim_pfm: 28.267404660883283
episode: 92 training return: tensor(-322.6410, device='cuda:0')
episode: 93 training return: tensor(116.8258, device='cuda:0')
episode: 94 training return: tensor(10.6552, device='cuda:0')
episode: 95 training return: tensor(84.3054, device='cuda:0')
epoch: 24 test_true_pfm: 3122.942194715968 sim_pfm: 81.99739804204243
episode: 96 training return: tensor(27.2610, device='cuda:0')
episode: 97 training return: tensor(184.9314, device='cuda:0')
episode: 98 training return: tensor(-225.8806, device='cuda:0')
episode: 99 training return: tensor(-199.1942, device='cuda:0')
epoch: 25 test_true_pfm: 3004.340783859923 sim_pfm: 45.534546807447136
episode: 100 training return: tensor(152.2081, device='cuda:0')
episode: 101 training return: tensor(85.5579, device='cuda:0')
episode: 102 training return: tensor(126.6816, device='cuda:0')
episode: 103 training return: tensor(-248.0964, device='cuda:0')
epoch: 26 test_true_pfm: 2207.7140073000674 sim_pfm: -163.17771673959214
episode: 104 training return: tensor(-400.4294, device='cuda:0')
episode: 105 training return: tensor(-290.2934, device='cuda:0')
episode: 106 training return: tensor(186.9572, device='cuda:0')
episode: 107 training return: tensor(-44.7772, device='cuda:0')
epoch: 27 test_true_pfm: 1856.623920436482 sim_pfm: -277.0304134856754
episode: 108 training return: tensor(100.2027, device='cuda:0')
episode: 109 training return: tensor(-162.2587, device='cuda:0')
episode: 110 training return: tensor(-344.7821, device='cuda:0')
episode: 111 training return: tensor(-259.4046, device='cuda:0')
epoch: 28 test_true_pfm: 2930.1914899554777 sim_pfm: 84.0050893976392
episode: 112 training return: tensor(-68.0301, device='cuda:0')
episode: 113 training return: tensor(133.3554, device='cuda:0')
episode: 114 training return: tensor(-320.3441, device='cuda:0')
episode: 115 training return: tensor(178.1700, device='cuda:0')
epoch: 29 test_true_pfm: 3055.832162753013 sim_pfm: 66.6984764590646
episode: 116 training return: tensor(172.4795, device='cuda:0')
episode: 117 training return: tensor(-164.7248, device='cuda:0')
episode: 118 training return: tensor(-178.4546, device='cuda:0')
episode: 119 training return: tensor(125.3065, device='cuda:0')
epoch: 30 test_true_pfm: 3247.1429114357747 sim_pfm: 25.653735579622055
episode: 120 training return: tensor(121.9046, device='cuda:0')
episode: 121 training return: tensor(114.0303, device='cuda:0')
episode: 122 training return: tensor(116.1957, device='cuda:0')
episode: 123 training return: tensor(123.6322, device='cuda:0')
epoch: 31 test_true_pfm: 2818.4203711936475 sim_pfm: -53.82777252822416
episode: 124 training return: tensor(-248.0816, device='cuda:0')
episode: 125 training return: tensor(-169.1035, device='cuda:0')
episode: 126 training return: tensor(-284.1805, device='cuda:0')
episode: 127 training return: tensor(-404.4987, device='cuda:0')
epoch: 32 test_true_pfm: 2792.814923035346 sim_pfm: 103.74773272621678
episode: 128 training return: tensor(-234.7776, device='cuda:0')
episode: 129 training return: tensor(89.3171, device='cuda:0')
episode: 130 training return: tensor(-127.3436, device='cuda:0')
episode: 131 training return: tensor(96.4623, device='cuda:0')
epoch: 33 test_true_pfm: 1997.1047818855823 sim_pfm: -196.45687330740233
episode: 132 training return: tensor(104.7279, device='cuda:0')
episode: 133 training return: tensor(79.7607, device='cuda:0')
episode: 134 training return: tensor(105.8464, device='cuda:0')
episode: 135 training return: tensor(25.3352, device='cuda:0')
epoch: 34 test_true_pfm: 2197.312594809291 sim_pfm: -118.66686019081196
episode: 136 training return: tensor(-285.2746, device='cuda:0')
episode: 137 training return: tensor(-53.6800, device='cuda:0')
episode: 138 training return: tensor(113.2914, device='cuda:0')
episode: 139 training return: tensor(-43.9483, device='cuda:0')
epoch: 35 test_true_pfm: 2781.313042256572 sim_pfm: -65.4710076516106
episode: 140 training return: tensor(-325.8179, device='cuda:0')
episode: 141 training return: tensor(-169.1349, device='cuda:0')
episode: 142 training return: tensor(126.1260, device='cuda:0')
episode: 143 training return: tensor(-340.5556, device='cuda:0')
epoch: 36 test_true_pfm: 2543.4524530080507 sim_pfm: -53.227728271662876
episode: 144 training return: tensor(-353.6241, device='cuda:0')
episode: 145 training return: tensor(90.7998, device='cuda:0')
episode: 146 training return: tensor(-141.2012, device='cuda:0')
episode: 147 training return: tensor(-113.7529, device='cuda:0')
epoch: 37 test_true_pfm: 2463.572725187225 sim_pfm: 6.975965621308812
episode: 148 training return: tensor(-317.3083, device='cuda:0')
episode: 149 training return: tensor(-312.5898, device='cuda:0')
episode: 150 training return: tensor(-258.7823, device='cuda:0')
episode: 151 training return: tensor(-399.8451, device='cuda:0')
epoch: 38 test_true_pfm: 1971.518736832498 sim_pfm: -256.82142430666136
episode: 152 training return: tensor(197.5898, device='cuda:0')
episode: 153 training return: tensor(-166.9265, device='cuda:0')
episode: 154 training return: tensor(17.8542, device='cuda:0')
episode: 155 training return: tensor(-320.8735, device='cuda:0')
epoch: 39 test_true_pfm: 2124.0902809829827 sim_pfm: -322.2408322719857
episode: 156 training return: tensor(43.2075, device='cuda:0')
episode: 157 training return: tensor(-328.2921, device='cuda:0')
episode: 158 training return: tensor(-101.2333, device='cuda:0')
episode: 159 training return: tensor(-357.7853, device='cuda:0')
epoch: 40 test_true_pfm: 1675.2601966103894 sim_pfm: -260.4956081469912
episode: 160 training return: tensor(161.4487, device='cuda:0')
episode: 161 training return: tensor(-167.3355, device='cuda:0')
episode: 162 training return: tensor(-35.5369, device='cuda:0')
episode: 163 training return: tensor(-241.1799, device='cuda:0')
epoch: 41 test_true_pfm: 2302.4629825014895 sim_pfm: -17.18173032483901
episode: 164 training return: tensor(-194.8824, device='cuda:0')
episode: 165 training return: tensor(-309.7559, device='cuda:0')
episode: 166 training return: tensor(-294.2831, device='cuda:0')
episode: 167 training return: tensor(167.3154, device='cuda:0')
epoch: 42 test_true_pfm: 2594.913565522816 sim_pfm: -281.07899006200023
episode: 168 training return: tensor(-296.1584, device='cuda:0')
episode: 169 training return: tensor(-335.0290, device='cuda:0')
episode: 170 training return: tensor(-235.0856, device='cuda:0')
episode: 171 training return: tensor(-251.3431, device='cuda:0')
epoch: 43 test_true_pfm: 1997.4781800210947 sim_pfm: -154.10084678963176
episode: 172 training return: tensor(-328.7709, device='cuda:0')
episode: 173 training return: tensor(-413.3346, device='cuda:0')
episode: 174 training return: tensor(-294.3161, device='cuda:0')
episode: 175 training return: tensor(-144.2756, device='cuda:0')
epoch: 44 test_true_pfm: 2074.51332152004 sim_pfm: -214.70915948797483
episode: 176 training return: tensor(193.4149, device='cuda:0')
episode: 177 training return: tensor(-214.8273, device='cuda:0')
episode: 178 training return: tensor(-282.5032, device='cuda:0')
episode: 179 training return: tensor(182.1407, device='cuda:0')
epoch: 45 test_true_pfm: 1743.0773092224133 sim_pfm: -279.99250328513637
episode: 180 training return: tensor(104.7351, device='cuda:0')
episode: 181 training return: tensor(159.8523, device='cuda:0')
episode: 182 training return: tensor(-191.2925, device='cuda:0')
episode: 183 training return: tensor(-328.6844, device='cuda:0')
epoch: 46 test_true_pfm: 2046.4771184105894 sim_pfm: -286.44388710140873
episode: 184 training return: tensor(-316.5169, device='cuda:0')
episode: 185 training return: tensor(133.5598, device='cuda:0')
episode: 186 training return: tensor(-235.5697, device='cuda:0')
episode: 187 training return: tensor(-228.1938, device='cuda:0')
epoch: 47 test_true_pfm: 2269.6767336113444 sim_pfm: -293.96218389854766
episode: 188 training return: tensor(-308.8271, device='cuda:0')
episode: 189 training return: tensor(-461.6953, device='cuda:0')
episode: 190 training return: tensor(-211.8996, device='cuda:0')
episode: 191 training return: tensor(-282.1478, device='cuda:0')
epoch: 48 test_true_pfm: 2737.676468439009 sim_pfm: -131.44043592076437
episode: 192 training return: tensor(172.3553, device='cuda:0')
episode: 193 training return: tensor(-429.9322, device='cuda:0')
episode: 194 training return: tensor(-371.5468, device='cuda:0')
episode: 195 training return: tensor(170.8545, device='cuda:0')
epoch: 49 test_true_pfm: 1827.1624650589813 sim_pfm: -307.51709779594484
episode: 196 training return: tensor(-401.5157, device='cuda:0')
episode: 197 training return: tensor(-305.6542, device='cuda:0')
episode: 198 training return: tensor(-319.3120, device='cuda:0')
episode: 199 training return: tensor(-183.4029, device='cuda:0')
epoch: 50 test_true_pfm: 1930.1176285728486 sim_pfm: -324.50673989146406
episode: 200 training return: tensor(-308.5446, device='cuda:0')
episode: 201 training return: tensor(-50.5997, device='cuda:0')
episode: 202 training return: tensor(-260.6705, device='cuda:0')
episode: 203 training return: tensor(-120.7006, device='cuda:0')
epoch: 51 test_true_pfm: 1937.441752087436 sim_pfm: -299.32684029363253
episode: 204 training return: tensor(117.5149, device='cuda:0')
episode: 205 training return: tensor(-123.8457, device='cuda:0')
episode: 206 training return: tensor(-118.1610, device='cuda:0')
episode: 207 training return: tensor(-25.0012, device='cuda:0')
epoch: 52 test_true_pfm: 2138.176400698658 sim_pfm: -218.73938191628744
episode: 208 training return: tensor(88.1222, device='cuda:0')
episode: 209 training return: tensor(-240.3384, device='cuda:0')
episode: 210 training return: tensor(-111.2113, device='cuda:0')
episode: 211 training return: tensor(-345.9160, device='cuda:0')
epoch: 53 test_true_pfm: 1834.6534176926923 sim_pfm: -290.83532745185465
episode: 212 training return: tensor(-295.6522, device='cuda:0')
episode: 213 training return: tensor(-315.7650, device='cuda:0')
episode: 214 training return: tensor(58.9793, device='cuda:0')
episode: 215 training return: tensor(-158.8347, device='cuda:0')
epoch: 54 test_true_pfm: 2603.1267858814176 sim_pfm: 47.58700609657293
episode: 216 training return: tensor(-284.3617, device='cuda:0')
episode: 217 training return: tensor(-244.7428, device='cuda:0')
episode: 218 training return: tensor(54.3514, device='cuda:0')
episode: 219 training return: tensor(-162.6930, device='cuda:0')
epoch: 55 test_true_pfm: 1876.202174896426 sim_pfm: -212.94860183785204
episode: 220 training return: tensor(37.0772, device='cuda:0')
episode: 221 training return: tensor(-260.0804, device='cuda:0')
episode: 222 training return: tensor(-201.2952, device='cuda:0')
episode: 223 training return: tensor(97.6856, device='cuda:0')
epoch: 56 test_true_pfm: 2955.2610128256783 sim_pfm: -14.966564314905554
episode: 224 training return: tensor(-336.6131, device='cuda:0')
episode: 225 training return: tensor(-349.3711, device='cuda:0')
episode: 226 training return: tensor(180.1274, device='cuda:0')
episode: 227 training return: tensor(-271.5749, device='cuda:0')
epoch: 57 test_true_pfm: 2037.443107966464 sim_pfm: -268.6589703973926
episode: 228 training return: tensor(-333.2375, device='cuda:0')
episode: 229 training return: tensor(-129.4490, device='cuda:0')
episode: 230 training return: tensor(-282.4950, device='cuda:0')
episode: 231 training return: tensor(-253.1769, device='cuda:0')
epoch: 58 test_true_pfm: 2211.653388319051 sim_pfm: 21.62033614421186
episode: 232 training return: tensor(-240.1307, device='cuda:0')
episode: 233 training return: tensor(113.4634, device='cuda:0')
episode: 234 training return: tensor(-356.6447, device='cuda:0')
episode: 235 training return: tensor(-344.4094, device='cuda:0')
epoch: 59 test_true_pfm: 2313.0032823671195 sim_pfm: -129.8891483270078
episode: 236 training return: tensor(-206.9468, device='cuda:0')
episode: 237 training return: tensor(-15.3093, device='cuda:0')
episode: 238 training return: tensor(-252.9710, device='cuda:0')
episode: 239 training return: tensor(-347.5246, device='cuda:0')
epoch: 60 test_true_pfm: 1955.2556034949964 sim_pfm: -163.30176048943153
episode: 240 training return: tensor(-3.4482, device='cuda:0')
episode: 241 training return: tensor(-152.8314, device='cuda:0')
episode: 242 training return: tensor(-155.0172, device='cuda:0')
episode: 243 training return: tensor(-376.8457, device='cuda:0')
epoch: 61 test_true_pfm: 1890.5815389987508 sim_pfm: -271.70104176313424
episode: 244 training return: tensor(-85.4538, device='cuda:0')
episode: 245 training return: tensor(-13.8036, device='cuda:0')
episode: 246 training return: tensor(-214.8050, device='cuda:0')
episode: 247 training return: tensor(-302.9512, device='cuda:0')
epoch: 62 test_true_pfm: 2137.5201353991747 sim_pfm: -184.8424001908667
episode: 248 training return: tensor(83.0619, device='cuda:0')
episode: 249 training return: tensor(-209.3618, device='cuda:0')
episode: 250 training return: tensor(-148.1150, device='cuda:0')
episode: 251 training return: tensor(46.5715, device='cuda:0')
epoch: 63 test_true_pfm: 3068.589634685746 sim_pfm: 46.8215362522072
episode: 252 training return: tensor(-272.0132, device='cuda:0')
episode: 253 training return: tensor(115.5479, device='cuda:0')
episode: 254 training return: tensor(90.4189, device='cuda:0')
episode: 255 training return: tensor(149.7986, device='cuda:0')
epoch: 64 test_true_pfm: 2105.56577174298 sim_pfm: -231.86958408530336
episode: 256 training return: tensor(-289.4340, device='cuda:0')
episode: 257 training return: tensor(115.8390, device='cuda:0')
episode: 258 training return: tensor(121.0270, device='cuda:0')
episode: 259 training return: tensor(-265.5733, device='cuda:0')
epoch: 65 test_true_pfm: 2090.0667549432105 sim_pfm: -215.98086026885235
episode: 260 training return: tensor(-405.4727, device='cuda:0')
episode: 261 training return: tensor(-17.0087, device='cuda:0')
episode: 262 training return: tensor(-263.9777, device='cuda:0')
episode: 263 training return: tensor(126.0515, device='cuda:0')
epoch: 66 test_true_pfm: 1853.2858808880246 sim_pfm: -235.39537441520952
episode: 264 training return: tensor(127.4726, device='cuda:0')
episode: 265 training return: tensor(96.4450, device='cuda:0')
episode: 266 training return: tensor(-192.0221, device='cuda:0')
episode: 267 training return: tensor(-286.3915, device='cuda:0')
epoch: 67 test_true_pfm: 2430.7979150287415 sim_pfm: -98.80562686378835
episode: 268 training return: tensor(-353.0927, device='cuda:0')
episode: 269 training return: tensor(-2.3733, device='cuda:0')
episode: 270 training return: tensor(-296.7661, device='cuda:0')
episode: 271 training return: tensor(-269.2701, device='cuda:0')
epoch: 68 test_true_pfm: 1945.419935693084 sim_pfm: -340.27420909142046
episode: 272 training return: tensor(-448.1515, device='cuda:0')
episode: 273 training return: tensor(113.0929, device='cuda:0')
episode: 274 training return: tensor(62.4960, device='cuda:0')
episode: 275 training return: tensor(-309.8076, device='cuda:0')
epoch: 69 test_true_pfm: 2804.834962218733 sim_pfm: -34.14705045918041
episode: 276 training return: tensor(-333.5673, device='cuda:0')
episode: 277 training return: tensor(-328.9488, device='cuda:0')
episode: 278 training return: tensor(-102.6027, device='cuda:0')
episode: 279 training return: tensor(-151.0572, device='cuda:0')
epoch: 70 test_true_pfm: 2147.0055387929183 sim_pfm: -286.9296797114948
episode: 280 training return: tensor(-339.5070, device='cuda:0')
episode: 281 training return: tensor(-118.2093, device='cuda:0')
episode: 282 training return: tensor(-145.1300, device='cuda:0')
episode: 283 training return: tensor(-213.1023, device='cuda:0')
epoch: 71 test_true_pfm: 2305.4254126609612 sim_pfm: -203.48618843193012
episode: 284 training return: tensor(-39.8448, device='cuda:0')
episode: 285 training return: tensor(-244.6799, device='cuda:0')
episode: 286 training return: tensor(-328.0406, device='cuda:0')
episode: 287 training return: tensor(-332.1914, device='cuda:0')
epoch: 72 test_true_pfm: 1915.6512972446017 sim_pfm: -244.95531064274837
episode: 288 training return: tensor(-140.9824, device='cuda:0')
episode: 289 training return: tensor(-154.4926, device='cuda:0')
episode: 290 training return: tensor(-220.1412, device='cuda:0')
episode: 291 training return: tensor(-198.8719, device='cuda:0')
epoch: 73 test_true_pfm: 1753.4485511782689 sim_pfm: -273.6087894559314
episode: 292 training return: tensor(188.4845, device='cuda:0')
episode: 293 training return: tensor(157.7621, device='cuda:0')
episode: 294 training return: tensor(194.8666, device='cuda:0')
episode: 295 training return: tensor(-138.8596, device='cuda:0')
epoch: 74 test_true_pfm: 2142.27222019419 sim_pfm: -105.64954896495328
episode: 296 training return: tensor(167.9909, device='cuda:0')
episode: 297 training return: tensor(-252.2145, device='cuda:0')
episode: 298 training return: tensor(-62.5425, device='cuda:0')
episode: 299 training return: tensor(-327.5760, device='cuda:0')
epoch: 75 test_true_pfm: 2466.585112503746 sim_pfm: -248.61376457317965
episode: 300 training return: tensor(-369.2867, device='cuda:0')
episode: 301 training return: tensor(-139.2637, device='cuda:0')
episode: 302 training return: tensor(-228.7251, device='cuda:0')
episode: 303 training return: tensor(-298.5778, device='cuda:0')
epoch: 76 test_true_pfm: 2022.0883798111129 sim_pfm: -250.3234748192675
episode: 304 training return: tensor(-289.5273, device='cuda:0')
episode: 305 training return: tensor(-298.6038, device='cuda:0')
episode: 306 training return: tensor(88.8190, device='cuda:0')
episode: 307 training return: tensor(-341.6435, device='cuda:0')
epoch: 77 test_true_pfm: 1752.5885609891877 sim_pfm: -277.1651347401591
episode: 308 training return: tensor(-206.7956, device='cuda:0')
episode: 309 training return: tensor(-204.2241, device='cuda:0')
episode: 310 training return: tensor(-208.0515, device='cuda:0')
episode: 311 training return: tensor(-131.9878, device='cuda:0')
epoch: 78 test_true_pfm: 1820.3652174224108 sim_pfm: -236.21846675085058
episode: 312 training return: tensor(-407.7435, device='cuda:0')
episode: 313 training return: tensor(-136.6250, device='cuda:0')
episode: 314 training return: tensor(131.9465, device='cuda:0')
episode: 315 training return: tensor(27.9148, device='cuda:0')
epoch: 79 test_true_pfm: 1943.8670246058093 sim_pfm: -277.5561657420476
episode: 316 training return: tensor(-308.6682, device='cuda:0')
episode: 317 training return: tensor(-234.9486, device='cuda:0')
episode: 318 training return: tensor(-285.8698, device='cuda:0')
episode: 319 training return: tensor(-259.3371, device='cuda:0')
epoch: 80 test_true_pfm: 2323.374956362389 sim_pfm: -169.83013418971677
episode: 320 training return: tensor(-111.4329, device='cuda:0')
episode: 321 training return: tensor(-328.4960, device='cuda:0')
episode: 322 training return: tensor(8.0183, device='cuda:0')
episode: 323 training return: tensor(-322.5662, device='cuda:0')
epoch: 81 test_true_pfm: 2423.134997635669 sim_pfm: -64.73451140993468
episode: 324 training return: tensor(112.7283, device='cuda:0')
episode: 325 training return: tensor(-208.2779, device='cuda:0')
episode: 326 training return: tensor(-69.2709, device='cuda:0')
episode: 327 training return: tensor(-66.6114, device='cuda:0')
epoch: 82 test_true_pfm: 1855.6883298885175 sim_pfm: -280.57928884510574
episode: 328 training return: tensor(-61.9641, device='cuda:0')
episode: 329 training return: tensor(40.4893, device='cuda:0')
episode: 330 training return: tensor(-166.5006, device='cuda:0')
episode: 331 training return: tensor(34.1234, device='cuda:0')
epoch: 83 test_true_pfm: 2352.794424563101 sim_pfm: -141.12489243166056
episode: 332 training return: tensor(-183.8451, device='cuda:0')
episode: 333 training return: tensor(132.0757, device='cuda:0')
episode: 334 training return: tensor(-118.9606, device='cuda:0')
episode: 335 training return: tensor(-405.2169, device='cuda:0')
epoch: 84 test_true_pfm: 3213.1125353436023 sim_pfm: 69.851780805145
episode: 336 training return: tensor(-303.3965, device='cuda:0')
episode: 337 training return: tensor(-297.3083, device='cuda:0')
episode: 338 training return: tensor(104.0535, device='cuda:0')
episode: 339 training return: tensor(125.7899, device='cuda:0')
epoch: 85 test_true_pfm: 2492.8792834761716 sim_pfm: -181.93563202110818
episode: 340 training return: tensor(-108.7543, device='cuda:0')
episode: 341 training return: tensor(-316.2508, device='cuda:0')
episode: 342 training return: tensor(116.7245, device='cuda:0')
episode: 343 training return: tensor(-246.5866, device='cuda:0')
epoch: 86 test_true_pfm: 2210.158582242793 sim_pfm: -149.8954497623878
episode: 344 training return: tensor(-22.7256, device='cuda:0')
episode: 345 training return: tensor(-296.1919, device='cuda:0')
episode: 346 training return: tensor(-271.8530, device='cuda:0')
episode: 347 training return: tensor(-58.9322, device='cuda:0')
epoch: 87 test_true_pfm: 2247.622578674323 sim_pfm: -212.55497816265174
episode: 348 training return: tensor(-126.3106, device='cuda:0')
episode: 349 training return: tensor(158.0928, device='cuda:0')
episode: 350 training return: tensor(-120.2945, device='cuda:0')
episode: 351 training return: tensor(117.8986, device='cuda:0')
epoch: 88 test_true_pfm: 1862.2479213129106 sim_pfm: -297.90881169830874
episode: 352 training return: tensor(-190.8594, device='cuda:0')
episode: 353 training return: tensor(-281.4680, device='cuda:0')
episode: 354 training return: tensor(-134.6342, device='cuda:0')
episode: 355 training return: tensor(-272.6023, device='cuda:0')
epoch: 89 test_true_pfm: 2231.948779256571 sim_pfm: -204.78948361139433
episode: 356 training return: tensor(-442.4048, device='cuda:0')
episode: 357 training return: tensor(-311.9149, device='cuda:0')
episode: 358 training return: tensor(-428.4193, device='cuda:0')
episode: 359 training return: tensor(-285.8075, device='cuda:0')
epoch: 90 test_true_pfm: 1866.637553377598 sim_pfm: -134.28629743509615
episode: 360 training return: tensor(-254.8809, device='cuda:0')
episode: 361 training return: tensor(20.6036, device='cuda:0')
episode: 362 training return: tensor(-226.4112, device='cuda:0')
episode: 363 training return: tensor(-96.8531, device='cuda:0')
epoch: 91 test_true_pfm: 1820.4248273735495 sim_pfm: -213.82908963479954
episode: 364 training return: tensor(-397.6268, device='cuda:0')
episode: 365 training return: tensor(-29.5273, device='cuda:0')
episode: 366 training return: tensor(-154.2413, device='cuda:0')
episode: 367 training return: tensor(19.2430, device='cuda:0')
epoch: 92 test_true_pfm: 2011.1578254272881 sim_pfm: -186.90478467506668
episode: 368 training return: tensor(-142.8899, device='cuda:0')
episode: 369 training return: tensor(143.6894, device='cuda:0')
episode: 370 training return: tensor(-222.1925, device='cuda:0')
episode: 371 training return: tensor(-259.6708, device='cuda:0')
epoch: 93 test_true_pfm: 2503.6747597471517 sim_pfm: -194.7910424832953
episode: 372 training return: tensor(195.3295, device='cuda:0')
episode: 373 training return: tensor(-160.8059, device='cuda:0')
episode: 374 training return: tensor(-116.1351, device='cuda:0')
episode: 375 training return: tensor(-331.6405, device='cuda:0')
epoch: 94 test_true_pfm: 2130.5653577448506 sim_pfm: -266.46550111166044
episode: 376 training return: tensor(-242.5266, device='cuda:0')
episode: 377 training return: tensor(-219.6971, device='cuda:0')
episode: 378 training return: tensor(-235.7512, device='cuda:0')
episode: 379 training return: tensor(-282.1757, device='cuda:0')
epoch: 95 test_true_pfm: 2576.68240498791 sim_pfm: -55.67418583695932
episode: 380 training return: tensor(-343.9550, device='cuda:0')
episode: 381 training return: tensor(-304.0863, device='cuda:0')
episode: 382 training return: tensor(-382.1115, device='cuda:0')
episode: 383 training return: tensor(-155.7339, device='cuda:0')
epoch: 96 test_true_pfm: 2130.3180296585774 sim_pfm: -269.28347129196237
episode: 384 training return: tensor(-120.2381, device='cuda:0')
episode: 385 training return: tensor(-116.4556, device='cuda:0')
episode: 386 training return: tensor(47.7710, device='cuda:0')
episode: 387 training return: tensor(115.1605, device='cuda:0')
epoch: 97 test_true_pfm: 2507.3032270971694 sim_pfm: 27.733552464982495
episode: 388 training return: tensor(-322.2699, device='cuda:0')
episode: 389 training return: tensor(79.7652, device='cuda:0')
episode: 390 training return: tensor(183.9509, device='cuda:0')
episode: 391 training return: tensor(-320.6599, device='cuda:0')
epoch: 98 test_true_pfm: 1839.275270301351 sim_pfm: -202.8698415590528
episode: 392 training return: tensor(-179.1796, device='cuda:0')
episode: 393 training return: tensor(19.2488, device='cuda:0')
episode: 394 training return: tensor(183.0883, device='cuda:0')
episode: 395 training return: tensor(-240.3871, device='cuda:0')
epoch: 99 test_true_pfm: 2201.246877374158 sim_pfm: -265.7020127255819
episode: 396 training return: tensor(-5.0241, device='cuda:0')
episode: 397 training return: tensor(-187.4693, device='cuda:0')
episode: 398 training return: tensor(113.6665, device='cuda:0')
episode: 399 training return: tensor(-45.3580, device='cuda:0')
epoch: 100 test_true_pfm: 2115.890092758095 sim_pfm: -274.7185963038161
episode: 400 training return: tensor(-289.5095, device='cuda:0')
episode: 401 training return: tensor(198.0369, device='cuda:0')
episode: 402 training return: tensor(-269.7366, device='cuda:0')
episode: 403 training return: tensor(-80.3992, device='cuda:0')
epoch: 101 test_true_pfm: 2379.7027578649922 sim_pfm: -201.09536876758406
episode: 404 training return: tensor(-274.9706, device='cuda:0')
episode: 405 training return: tensor(-338.1466, device='cuda:0')
episode: 406 training return: tensor(-96.8636, device='cuda:0')
episode: 407 training return: tensor(146.1719, device='cuda:0')
epoch: 102 test_true_pfm: 2820.95491334186 sim_pfm: -219.82895273107957
episode: 408 training return: tensor(-312.6366, device='cuda:0')
episode: 409 training return: tensor(-138.0517, device='cuda:0')
episode: 410 training return: tensor(78.0680, device='cuda:0')
episode: 411 training return: tensor(-0.6136, device='cuda:0')
epoch: 103 test_true_pfm: 2006.6829708501893 sim_pfm: -207.40808368501408
episode: 412 training return: tensor(-332.6036, device='cuda:0')
episode: 413 training return: tensor(189.3569, device='cuda:0')
episode: 414 training return: tensor(-347.8809, device='cuda:0')
episode: 415 training return: tensor(167.7585, device='cuda:0')
epoch: 104 test_true_pfm: 1948.4946179813915 sim_pfm: -220.03943921147342
episode: 416 training return: tensor(-319.5233, device='cuda:0')
episode: 417 training return: tensor(-151.2965, device='cuda:0')
episode: 418 training return: tensor(-259.2116, device='cuda:0')
episode: 419 training return: tensor(-223.3178, device='cuda:0')
epoch: 105 test_true_pfm: 2091.360420263863 sim_pfm: 0.12916306071565486
episode: 420 training return: tensor(-471.9535, device='cuda:0')
episode: 421 training return: tensor(-302.3649, device='cuda:0')
episode: 422 training return: tensor(-298.2365, device='cuda:0')
episode: 423 training return: tensor(-330.3787, device='cuda:0')
epoch: 106 test_true_pfm: 2265.785937001869 sim_pfm: -177.00220257793748
episode: 424 training return: tensor(179.3963, device='cuda:0')
episode: 425 training return: tensor(-231.7567, device='cuda:0')
episode: 426 training return: tensor(54.0634, device='cuda:0')
episode: 427 training return: tensor(-218.3616, device='cuda:0')
epoch: 107 test_true_pfm: 1916.6165306578375 sim_pfm: -102.61027287433778
episode: 428 training return: tensor(151.2310, device='cuda:0')
episode: 429 training return: tensor(145.2671, device='cuda:0')
episode: 430 training return: tensor(-222.7532, device='cuda:0')
episode: 431 training return: tensor(85.5870, device='cuda:0')
epoch: 108 test_true_pfm: 2263.994395203263 sim_pfm: -115.12833710443617
episode: 432 training return: tensor(-23.5633, device='cuda:0')
episode: 433 training return: tensor(-320.4942, device='cuda:0')
episode: 434 training return: tensor(-320.1062, device='cuda:0')
episode: 435 training return: tensor(-130.9135, device='cuda:0')
epoch: 109 test_true_pfm: 2081.4034263931812 sim_pfm: -136.03429406910436
episode: 436 training return: tensor(-319.4569, device='cuda:0')
episode: 437 training return: tensor(61.1729, device='cuda:0')
episode: 438 training return: tensor(-291.2908, device='cuda:0')
episode: 439 training return: tensor(-247.8945, device='cuda:0')
epoch: 110 test_true_pfm: 2066.0829377558143 sim_pfm: -287.56669290336623
episode: 440 training return: tensor(0.4924, device='cuda:0')
episode: 441 training return: tensor(-179.8603, device='cuda:0')
episode: 442 training return: tensor(-151.1174, device='cuda:0')
episode: 443 training return: tensor(-230.4496, device='cuda:0')
epoch: 111 test_true_pfm: 1920.472394454725 sim_pfm: -136.7770462684954
episode: 444 training return: tensor(-328.7287, device='cuda:0')
episode: 445 training return: tensor(-131.3413, device='cuda:0')
episode: 446 training return: tensor(-67.1898, device='cuda:0')
episode: 447 training return: tensor(25.2002, device='cuda:0')
epoch: 112 test_true_pfm: 2080.42085375551 sim_pfm: -189.11382380410214
episode: 448 training return: tensor(-335.5617, device='cuda:0')
episode: 449 training return: tensor(-131.3744, device='cuda:0')
episode: 450 training return: tensor(-78.5533, device='cuda:0')
episode: 451 training return: tensor(-46.8597, device='cuda:0')
epoch: 113 test_true_pfm: 2142.6534437751743 sim_pfm: -192.45389176912917
episode: 452 training return: tensor(-281.5922, device='cuda:0')
episode: 453 training return: tensor(-464.6101, device='cuda:0')
episode: 454 training return: tensor(-306.0258, device='cuda:0')
episode: 455 training return: tensor(-247.7017, device='cuda:0')
epoch: 114 test_true_pfm: 2063.4391139026898 sim_pfm: -227.93046874397746
episode: 456 training return: tensor(-255.7251, device='cuda:0')
episode: 457 training return: tensor(24.1216, device='cuda:0')
episode: 458 training return: tensor(-302.9099, device='cuda:0')
episode: 459 training return: tensor(-199.8081, device='cuda:0')
epoch: 115 test_true_pfm: 2473.3376525312283 sim_pfm: -261.57093112089206
episode: 460 training return: tensor(-392.5444, device='cuda:0')
episode: 461 training return: tensor(-264.6422, device='cuda:0')
episode: 462 training return: tensor(-421.8639, device='cuda:0')
episode: 463 training return: tensor(-335.9711, device='cuda:0')
epoch: 116 test_true_pfm: 2116.875997974443 sim_pfm: -239.28327158584338
episode: 464 training return: tensor(-261.1030, device='cuda:0')
episode: 465 training return: tensor(-250.2463, device='cuda:0')
episode: 466 training return: tensor(-274.4423, device='cuda:0')
episode: 467 training return: tensor(-239.8493, device='cuda:0')
epoch: 117 test_true_pfm: 2137.957104044411 sim_pfm: -242.73098011823217
episode: 468 training return: tensor(-302.4060, device='cuda:0')
episode: 469 training return: tensor(-190.4524, device='cuda:0')
episode: 470 training return: tensor(-211.8498, device='cuda:0')
episode: 471 training return: tensor(-325.6096, device='cuda:0')
epoch: 118 test_true_pfm: 2540.1849672947124 sim_pfm: -173.95726006662395
episode: 472 training return: tensor(-66.4290, device='cuda:0')
episode: 473 training return: tensor(-267.2772, device='cuda:0')
episode: 474 training return: tensor(-314.9149, device='cuda:0')
episode: 475 training return: tensor(-287.6903, device='cuda:0')
epoch: 119 test_true_pfm: 2132.7403343488827 sim_pfm: -209.62435104342876
episode: 476 training return: tensor(-450.2624, device='cuda:0')
episode: 477 training return: tensor(-157.7142, device='cuda:0')
episode: 478 training return: tensor(-287.1390, device='cuda:0')
episode: 479 training return: tensor(-218.5947, device='cuda:0')
epoch: 120 test_true_pfm: 2040.098031102532 sim_pfm: -265.7176468447821
episode: 480 training return: tensor(-169.2062, device='cuda:0')
episode: 481 training return: tensor(-322.8988, device='cuda:0')
episode: 482 training return: tensor(-310.1024, device='cuda:0')
episode: 483 training return: tensor(-275.2057, device='cuda:0')
epoch: 121 test_true_pfm: 2441.550837728457 sim_pfm: -183.9923643686149
episode: 484 training return: tensor(-149.0927, device='cuda:0')
episode: 485 training return: tensor(-327.7592, device='cuda:0')
episode: 486 training return: tensor(-234.0525, device='cuda:0')
episode: 487 training return: tensor(-113.3937, device='cuda:0')
epoch: 122 test_true_pfm: 2848.550062064863 sim_pfm: -43.813470140487574
episode: 488 training return: tensor(-290.5847, device='cuda:0')
episode: 489 training return: tensor(136.0923, device='cuda:0')
episode: 490 training return: tensor(-17.8177, device='cuda:0')
episode: 491 training return: tensor(-96.8609, device='cuda:0')
epoch: 123 test_true_pfm: 2353.802222343853 sim_pfm: -83.8755577521127
episode: 492 training return: tensor(-232.2458, device='cuda:0')
episode: 493 training return: tensor(-209.8981, device='cuda:0')
episode: 494 training return: tensor(145.3503, device='cuda:0')
episode: 495 training return: tensor(-157.0773, device='cuda:0')
epoch: 124 test_true_pfm: 1889.8232655391782 sim_pfm: -305.07302930097404
episode: 496 training return: tensor(-264.0621, device='cuda:0')
episode: 497 training return: tensor(-236.2237, device='cuda:0')
episode: 498 training return: tensor(-269.8182, device='cuda:0')
episode: 499 training return: tensor(-185.4046, device='cuda:0')
epoch: 125 test_true_pfm: 1999.8920119688191 sim_pfm: -267.95822427330614
episode: 500 training return: tensor(-438.6199, device='cuda:0')
episode: 501 training return: tensor(-286.6175, device='cuda:0')
episode: 502 training return: tensor(-336.1248, device='cuda:0')
episode: 503 training return: tensor(-319.2991, device='cuda:0')
epoch: 126 test_true_pfm: 2121.7425618086095 sim_pfm: -134.5069508041294
episode: 504 training return: tensor(-42.9782, device='cuda:0')
episode: 505 training return: tensor(-351.6291, device='cuda:0')
episode: 506 training return: tensor(-335.5789, device='cuda:0')
episode: 507 training return: tensor(-158.3686, device='cuda:0')
epoch: 127 test_true_pfm: 2044.8105107161382 sim_pfm: -267.68326811119914
episode: 508 training return: tensor(-45.6788, device='cuda:0')
episode: 509 training return: tensor(116.6819, device='cuda:0')
episode: 510 training return: tensor(11.1562, device='cuda:0')
episode: 511 training return: tensor(93.6706, device='cuda:0')
epoch: 128 test_true_pfm: 2082.99855605663 sim_pfm: -181.187602230435
episode: 512 training return: tensor(-131.0800, device='cuda:0')
episode: 513 training return: tensor(-334.7357, device='cuda:0')
episode: 514 training return: tensor(104.5941, device='cuda:0')
episode: 515 training return: tensor(-154.4178, device='cuda:0')
epoch: 129 test_true_pfm: 2424.5845855541083 sim_pfm: -313.18036215377896
episode: 516 training return: tensor(-375.9397, device='cuda:0')
episode: 517 training return: tensor(-226.4784, device='cuda:0')
episode: 518 training return: tensor(-301.1088, device='cuda:0')
episode: 519 training return: tensor(-401.7158, device='cuda:0')
epoch: 130 test_true_pfm: 1926.1361670831511 sim_pfm: -262.926280376482
episode: 520 training return: tensor(-288.3154, device='cuda:0')
episode: 521 training return: tensor(-194.9109, device='cuda:0')
episode: 522 training return: tensor(131.3586, device='cuda:0')
episode: 523 training return: tensor(-184.3516, device='cuda:0')
epoch: 131 test_true_pfm: 2160.4978296841473 sim_pfm: -57.504241844202625
episode: 524 training return: tensor(-246.8128, device='cuda:0')
episode: 525 training return: tensor(-90.5439, device='cuda:0')
episode: 526 training return: tensor(-230.7815, device='cuda:0')
episode: 527 training return: tensor(148.2666, device='cuda:0')
epoch: 132 test_true_pfm: 2065.9225157538744 sim_pfm: -176.50493841215697
episode: 528 training return: tensor(-152.2201, device='cuda:0')
episode: 529 training return: tensor(201.1203, device='cuda:0')
episode: 530 training return: tensor(32.8138, device='cuda:0')
episode: 531 training return: tensor(-293.8453, device='cuda:0')
epoch: 133 test_true_pfm: 2029.2471659635569 sim_pfm: -108.87212847567086
episode: 532 training return: tensor(-11.2679, device='cuda:0')
episode: 533 training return: tensor(-292.9299, device='cuda:0')
episode: 534 training return: tensor(-226.3610, device='cuda:0')
episode: 535 training return: tensor(130.4536, device='cuda:0')
epoch: 134 test_true_pfm: 2151.0633542325086 sim_pfm: -142.83722184807993
episode: 536 training return: tensor(-420.2974, device='cuda:0')
episode: 537 training return: tensor(-137.9993, device='cuda:0')
episode: 538 training return: tensor(-14.3449, device='cuda:0')
episode: 539 training return: tensor(-257.1407, device='cuda:0')
epoch: 135 test_true_pfm: 2452.1767283772792 sim_pfm: -232.00353291188367
episode: 540 training return: tensor(-161.8436, device='cuda:0')
episode: 541 training return: tensor(-311.3116, device='cuda:0')
episode: 542 training return: tensor(-117.5358, device='cuda:0')
episode: 543 training return: tensor(-240.7830, device='cuda:0')
epoch: 136 test_true_pfm: 2127.3082178355767 sim_pfm: -179.10779115900127
episode: 544 training return: tensor(-194.3279, device='cuda:0')
episode: 545 training return: tensor(-212.7135, device='cuda:0')
episode: 546 training return: tensor(-407.9387, device='cuda:0')
episode: 547 training return: tensor(-136.9954, device='cuda:0')
epoch: 137 test_true_pfm: 2193.2351401778647 sim_pfm: -254.7248303759358
episode: 548 training return: tensor(-223.4432, device='cuda:0')
episode: 549 training return: tensor(-273.5048, device='cuda:0')
episode: 550 training return: tensor(-234.2314, device='cuda:0')
episode: 551 training return: tensor(-271.8429, device='cuda:0')
epoch: 138 test_true_pfm: 2007.2512312195674 sim_pfm: -256.5130740641034
episode: 552 training return: tensor(-60.4312, device='cuda:0')
episode: 553 training return: tensor(-330.9003, device='cuda:0')
episode: 554 training return: tensor(-37.7093, device='cuda:0')
episode: 555 training return: tensor(55.3832, device='cuda:0')
epoch: 139 test_true_pfm: 1990.9567902138467 sim_pfm: -175.11477685781816
episode: 556 training return: tensor(-282.3039, device='cuda:0')
episode: 557 training return: tensor(-125.9141, device='cuda:0')
episode: 558 training return: tensor(189.1968, device='cuda:0')
episode: 559 training return: tensor(-146.6028, device='cuda:0')
epoch: 140 test_true_pfm: 2269.3555888201718 sim_pfm: -124.01830634449531
episode: 560 training return: tensor(-102.4450, device='cuda:0')
episode: 561 training return: tensor(-293.1807, device='cuda:0')
episode: 562 training return: tensor(92.2075, device='cuda:0')
episode: 563 training return: tensor(-367.0692, device='cuda:0')
epoch: 141 test_true_pfm: 1899.033599052279 sim_pfm: -192.5680629532629
episode: 564 training return: tensor(-341.3516, device='cuda:0')
episode: 565 training return: tensor(-60.0046, device='cuda:0')
episode: 566 training return: tensor(-163.8357, device='cuda:0')
episode: 567 training return: tensor(-121.4544, device='cuda:0')
epoch: 142 test_true_pfm: 2425.4149335194816 sim_pfm: -248.42496303499988
episode: 568 training return: tensor(-387.4874, device='cuda:0')
episode: 569 training return: tensor(-224.5658, device='cuda:0')
episode: 570 training return: tensor(203.6832, device='cuda:0')
episode: 571 training return: tensor(-318.4392, device='cuda:0')
epoch: 143 test_true_pfm: 2580.902285224906 sim_pfm: -42.823267195470784
episode: 572 training return: tensor(-228.1889, device='cuda:0')
episode: 573 training return: tensor(16.9325, device='cuda:0')
episode: 574 training return: tensor(-262.7173, device='cuda:0')
episode: 575 training return: tensor(-405.2088, device='cuda:0')
epoch: 144 test_true_pfm: 2178.5826496597388 sim_pfm: -176.488717307162
episode: 576 training return: tensor(108.9914, device='cuda:0')
episode: 577 training return: tensor(-65.3745, device='cuda:0')
episode: 578 training return: tensor(-471.1871, device='cuda:0')
episode: 579 training return: tensor(-17.8953, device='cuda:0')
epoch: 145 test_true_pfm: 2124.573739111092 sim_pfm: -99.87334579717329
episode: 580 training return: tensor(-311.1537, device='cuda:0')
episode: 581 training return: tensor(-233.3266, device='cuda:0')
episode: 582 training return: tensor(-464.8808, device='cuda:0')
episode: 583 training return: tensor(-99.9139, device='cuda:0')
epoch: 146 test_true_pfm: 2018.9320785210582 sim_pfm: -231.40907357658338
episode: 584 training return: tensor(-182.8158, device='cuda:0')
episode: 585 training return: tensor(-128.8438, device='cuda:0')
episode: 586 training return: tensor(175.7958, device='cuda:0')
episode: 587 training return: tensor(-315.0220, device='cuda:0')
epoch: 147 test_true_pfm: 2069.6253139959786 sim_pfm: -212.80116778163938
episode: 588 training return: tensor(-203.9523, device='cuda:0')
episode: 589 training return: tensor(147.0351, device='cuda:0')
episode: 590 training return: tensor(-312.2034, device='cuda:0')
episode: 591 training return: tensor(-301.6479, device='cuda:0')
epoch: 148 test_true_pfm: 2029.885822196681 sim_pfm: -261.3899129504959
episode: 592 training return: tensor(-136.5607, device='cuda:0')
episode: 593 training return: tensor(-229.9083, device='cuda:0')
episode: 594 training return: tensor(-181.0873, device='cuda:0')
episode: 595 training return: tensor(-443.9630, device='cuda:0')
epoch: 149 test_true_pfm: 2485.2532849992144 sim_pfm: -83.16499586512025
episode: 596 training return: tensor(-403.2911, device='cuda:0')
episode: 597 training return: tensor(-283.4146, device='cuda:0')
episode: 598 training return: tensor(-375.7660, device='cuda:0')
episode: 599 training return: tensor(-273.2062, device='cuda:0')
epoch: 150 test_true_pfm: 1997.715923784371 sim_pfm: -187.65431163727771
