['--alg', 'ddpg', '--env', 'Swimmer-v3', '--learn', 'behavior', '--traj', 'expert', '--seed', '4']
epoch: 0 training_loss 0.21025391213595868 test_loss: 0.14598642587661742
epoch: 1 training_loss 0.1469907845184207 test_loss: 0.13455383777618407
epoch: 2 training_loss 0.12573883153498172 test_loss: 0.12632639408111573
epoch: 3 training_loss 0.13307998076081276 test_loss: 0.12259955406188965
epoch: 4 training_loss 0.1337940878048539 test_loss: 0.1195497989654541
epoch: 5 training_loss 0.12459160458296538 test_loss: 0.12082418203353881
epoch: 6 training_loss 0.12183508981019259 test_loss: 0.13083238601684571
epoch: 7 training_loss 0.11918762285262346 test_loss: 0.13158972263336183
epoch: 8 training_loss 0.12870640087872742 test_loss: 0.11222270727157593
epoch: 9 training_loss 0.11956869684159756 test_loss: 0.1179302453994751
epoch: 10 training_loss 0.1120060109347105 test_loss: 0.11241923570632935
epoch: 11 training_loss 0.11341999020427465 test_loss: 0.11686793565750123
epoch: 12 training_loss 0.11847781177610159 test_loss: 0.10167288780212402
epoch: 13 training_loss 0.11488376192748546 test_loss: 0.11676698923110962
epoch: 14 training_loss 0.1128434468060732 test_loss: 0.12451728582382202
epoch: 15 training_loss 0.11409715339541435 test_loss: 0.11086816787719726
epoch: 16 training_loss 0.11841652382165194 test_loss: 0.12902019023895264
epoch: 17 training_loss 0.11764810927212238 test_loss: 0.1074475884437561
epoch: 18 training_loss 0.11945717703551054 test_loss: 0.11604950428009034
epoch: 19 training_loss 0.1211064901202917 test_loss: 0.13560690879821777
epoch: 20 training_loss 0.11905144084244966 test_loss: 0.12932703495025635
epoch: 21 training_loss 0.11909840136766434 test_loss: 0.1310714840888977
epoch: 22 training_loss 0.1187978795543313 test_loss: 0.1255829453468323
epoch: 23 training_loss 0.11676586247980594 test_loss: 0.11500234603881836
epoch: 24 training_loss 0.11936466734856367 test_loss: 0.1334598183631897
epoch: 25 training_loss 0.11496393706649542 test_loss: 0.13141144514083863
epoch: 26 training_loss 0.11909455172717572 test_loss: 0.1086876630783081
epoch: 27 training_loss 0.1118112663179636 test_loss: 0.11708813905715942
epoch: 28 training_loss 0.12009018398821354 test_loss: 0.11728695631027222
epoch: 29 training_loss 0.12147922229021788 test_loss: 0.13599778413772584
epoch: 30 training_loss 0.11369001638144255 test_loss: 0.13403787612915039
epoch: 31 training_loss 0.12007179267704488 test_loss: 0.10919783115386963
epoch: 32 training_loss 0.11203044228255749 test_loss: 0.1293657660484314
epoch: 33 training_loss 0.11193477816879749 test_loss: 0.125834321975708
epoch: 34 training_loss 0.11762779209762812 test_loss: 0.10498524904251098
epoch: 35 training_loss 0.11189695663750171 test_loss: 0.12154573202133179
epoch: 36 training_loss 0.10947727017104626 test_loss: 0.10827031135559081
epoch: 37 training_loss 0.12032809771597386 test_loss: 0.1115613579750061
epoch: 38 training_loss 0.10785048101097346 test_loss: 0.1056973695755005
epoch: 39 training_loss 0.11506983913481235 test_loss: 0.12830876111984252
epoch: 40 training_loss 0.11291594475507737 test_loss: 0.10770090818405151
epoch: 41 training_loss 0.10990567229688168 test_loss: 0.12409229278564453
epoch: 42 training_loss 0.11061948362737895 test_loss: 0.10288418531417846
epoch: 43 training_loss 0.11515478229150176 test_loss: 0.11542695760726929
epoch: 44 training_loss 0.1182605704665184 test_loss: 0.11517640352249145
epoch: 45 training_loss 0.12467384018003941 test_loss: 0.12103461027145386
epoch: 46 training_loss 0.11313364606350661 test_loss: 0.10555491447448731
epoch: 47 training_loss 0.11401511043310165 test_loss: 0.1318489670753479
epoch: 48 training_loss 0.11747738093137741 test_loss: 0.12542397975921632
epoch: 49 training_loss 0.11109629936516285 test_loss: 0.12634958028793336
epoch: 50 training_loss 0.12014802590012551 test_loss: 0.1002391219139099
epoch: 51 training_loss 0.1187330761551857 test_loss: 0.11247742176055908
epoch: 52 training_loss 0.11840662941336631 test_loss: 0.10972702503204346
epoch: 53 training_loss 0.11162217777222395 test_loss: 0.11578940153121949
epoch: 54 training_loss 0.11375835176557303 test_loss: 0.12734214067459107
epoch: 55 training_loss 0.11355638880282641 test_loss: 0.11632708311080933
epoch: 56 training_loss 0.11301648087799548 test_loss: 0.12355332374572754
epoch: 57 training_loss 0.12258680004626513 test_loss: 0.12339462041854858
epoch: 58 training_loss 0.10935658168047667 test_loss: 0.10670911073684693
epoch: 59 training_loss 0.11771047309041023 test_loss: 0.11977612972259521
epoch: 60 training_loss 0.1137290327809751 test_loss: 0.126098370552063
epoch: 61 training_loss 0.11456649146974086 test_loss: 0.11497045755386352
epoch: 62 training_loss 0.11807587992399932 test_loss: 0.1105383276939392
epoch: 63 training_loss 0.11686209701001644 test_loss: 0.11786872148513794
epoch: 64 training_loss 0.11170569896697997 test_loss: 0.1143733024597168
epoch: 65 training_loss 0.11114362165331841 test_loss: 0.1273908019065857
epoch: 66 training_loss 0.1098078652843833 test_loss: 0.11121258735656739
epoch: 67 training_loss 0.11936286609619856 test_loss: 0.1219243049621582
epoch: 68 training_loss 0.10995833493769169 test_loss: 0.12235435247421264
epoch: 69 training_loss 0.1127638253197074 test_loss: 0.11210458278656006
epoch: 70 training_loss 0.10863861102610826 test_loss: 0.11712498664855957
epoch: 71 training_loss 0.11745501048862934 test_loss: 0.11771612167358399
epoch: 72 training_loss 0.11032999504357577 test_loss: 0.11850156784057617
epoch: 73 training_loss 0.12148948814719915 test_loss: 0.10948694944381714
epoch: 74 training_loss 0.11394766811281443 test_loss: 0.1214900016784668
epoch: 75 training_loss 0.11577486239373684 test_loss: 0.12263107299804688
epoch: 76 training_loss 0.11274791784584522 test_loss: 0.12143319845199585
epoch: 77 training_loss 0.11241484716534615 test_loss: 0.12527916431427003
epoch: 78 training_loss 0.1079591677337885 test_loss: 0.11514980792999267
epoch: 79 training_loss 0.11228011019527911 test_loss: 0.10960272550582886
epoch: 80 training_loss 0.1092123145982623 test_loss: 0.10386288166046143
epoch: 81 training_loss 0.11382034339010716 test_loss: 0.10391891002655029
epoch: 82 training_loss 0.11602951966226101 test_loss: 0.11147292852401733
epoch: 83 training_loss 0.1111483396217227 test_loss: 0.14121900796890258
epoch: 84 training_loss 0.11078338842839003 test_loss: 0.11350295543670655
epoch: 85 training_loss 0.11770750951021909 test_loss: 0.11572873592376709
epoch: 86 training_loss 0.11109946768730879 test_loss: 0.11058090925216675
epoch: 87 training_loss 0.1120644685998559 test_loss: 0.12212870121002198
epoch: 88 training_loss 0.1118017154186964 test_loss: 0.1348761200904846
epoch: 89 training_loss 0.11573453485965729 test_loss: 0.10907275676727295
epoch: 90 training_loss 0.11268471047282219 test_loss: 0.11069496870040893
epoch: 91 training_loss 0.10790434826165438 test_loss: 0.10388450622558594
epoch: 92 training_loss 0.11222225598990918 test_loss: 0.11846647262573243
epoch: 93 training_loss 0.11511560022830963 test_loss: 0.11671760082244872
epoch: 94 training_loss 0.11510810140520335 test_loss: 0.11879146099090576
epoch: 95 training_loss 0.11335767131298781 test_loss: 0.11916581392288209
epoch: 96 training_loss 0.11058094434440136 test_loss: 0.1097291111946106
epoch: 97 training_loss 0.11686544083058834 test_loss: 0.11228832006454467
epoch: 98 training_loss 0.10997706830501557 test_loss: 0.1136515498161316
epoch: 99 training_loss 0.11335749734193086 test_loss: 0.11376537084579467
epoch: 100 training_loss 0.11529777437448502 test_loss: 0.1125449538230896
epoch: 101 training_loss 0.10993539955466985 test_loss: 0.10317866802215576
epoch: 102 training_loss 0.1128088077902794 test_loss: 0.1215134620666504
epoch: 103 training_loss 0.10958676785230637 test_loss: 0.10901211500167847
epoch: 104 training_loss 0.11001721180975438 test_loss: 0.11024115085601807
epoch: 105 training_loss 0.11902332119643688 test_loss: 0.12043830156326293
epoch: 106 training_loss 0.11318824354559183 test_loss: 0.11892993450164795
epoch: 107 training_loss 0.10889953050762415 test_loss: 0.11777974367141723
epoch: 108 training_loss 0.1043447607010603 test_loss: 0.1170839548110962
epoch: 109 training_loss 0.11610846102237701 test_loss: 0.1123158097267151
epoch: 110 training_loss 0.10828911390155554 test_loss: 0.11830904483795165
epoch: 111 training_loss 0.11033878773450852 test_loss: 0.11504663228988647
epoch: 112 training_loss 0.11358382791280747 test_loss: 0.11249399185180664
epoch: 113 training_loss 0.11246928751468659 test_loss: 0.11146630048751831
epoch: 114 training_loss 0.10731036998331547 test_loss: 0.11018187999725342
epoch: 115 training_loss 0.10938299268484115 test_loss: 0.10358372926712037
epoch: 116 training_loss 0.10721348475664855 test_loss: 0.10664045810699463
epoch: 117 training_loss 0.11260663319379091 test_loss: 0.10791445970535278
epoch: 118 training_loss 0.10994214083999396 test_loss: 0.11437420845031739
epoch: 119 training_loss 0.10561110667884349 test_loss: 0.10457018613815308
epoch: 120 training_loss 0.11144757974892855 test_loss: 0.10539710521697998
epoch: 121 training_loss 0.10825892500579357 test_loss: 0.10993883609771729
epoch: 122 training_loss 0.11224055532366037 test_loss: 0.10508753061294555
epoch: 123 training_loss 0.11865465261042119 test_loss: 0.11088818311691284
epoch: 124 training_loss 0.11667695507407189 test_loss: 0.12960076332092285
epoch: 125 training_loss 0.1087979343533516 test_loss: 0.12464158535003662
epoch: 126 training_loss 0.10180239547044038 test_loss: 0.13244476318359374
epoch: 127 training_loss 0.11325032956898212 test_loss: 0.13743505477905274
epoch: 128 training_loss 0.10850308064371347 test_loss: 0.11674667596817016
epoch: 129 training_loss 0.11246369097381831 test_loss: 0.1072089672088623
epoch: 130 training_loss 0.1117599044367671 test_loss: 0.11769603490829468
epoch: 131 training_loss 0.10994051847606898 test_loss: 0.10125484466552734
epoch: 132 training_loss 0.11067175723612309 test_loss: 0.1199786901473999
epoch: 133 training_loss 0.11336121223866939 test_loss: 0.10267553329467774
epoch: 134 training_loss 0.10734236009418964 test_loss: 0.10553348064422607
epoch: 135 training_loss 0.10713805865496397 test_loss: 0.11625040769577026
epoch: 136 training_loss 0.10788189936429263 test_loss: 0.11146986484527588
epoch: 137 training_loss 0.10926467649638653 test_loss: 0.12463898658752441
epoch: 138 training_loss 0.11081726856529713 test_loss: 0.11067661046981811
epoch: 139 training_loss 0.10815613374114036 test_loss: 0.11801198720932007
epoch: 140 training_loss 0.11033329840749502 test_loss: 0.10524551868438721
epoch: 141 training_loss 0.11394111398607493 test_loss: 0.10823270082473754
epoch: 142 training_loss 0.10808317437767982 test_loss: 0.1266266107559204
epoch: 143 training_loss 0.11067244291305542 test_loss: 0.1262912631034851
epoch: 144 training_loss 0.10862577598541975 test_loss: 0.11170827150344849
epoch: 145 training_loss 0.11234006222337484 test_loss: 0.11177225112915039
epoch: 146 training_loss 0.11711624473333358 test_loss: 0.11512553691864014
epoch: 147 training_loss 0.10642283376306295 test_loss: 0.12218844890594482
epoch: 148 training_loss 0.11025971427559852 test_loss: 0.11676770448684692
epoch: 149 training_loss 0.10739202596247197 test_loss: 0.11583104133605956
epoch: 0 training_loss 21.536893396377565 test_loss: 15.18310546875
epoch: 1 training_loss 11.338400239944457 test_loss: 9.169181823730469
epoch: 2 training_loss 8.28950303554535 test_loss: 7.546222686767578
epoch: 3 training_loss 7.15207004070282 test_loss: 6.961155700683594
epoch: 4 training_loss 6.560623693466186 test_loss: 6.297464752197266
epoch: 5 training_loss 5.975431852340698 test_loss: 5.870291900634766
epoch: 6 training_loss 5.499773335456848 test_loss: 5.435136032104492
epoch: 7 training_loss 5.058044703006744 test_loss: 5.335818099975586
epoch: 8 training_loss 4.84640221118927 test_loss: 4.777799987792969
epoch: 9 training_loss 4.564011192321777 test_loss: 4.305529022216797
epoch: 10 training_loss 4.372887718677521 test_loss: 4.055001831054687
epoch: 11 training_loss 4.228498613834381 test_loss: 4.373043441772461
epoch: 12 training_loss 4.026573143005371 test_loss: 4.046341323852539
epoch: 13 training_loss 3.9272728514671327 test_loss: 3.94815673828125
epoch: 14 training_loss 3.9077099895477296 test_loss: 4.016013717651367
epoch: 15 training_loss 3.605446140766144 test_loss: 3.5481910705566406
epoch: 16 training_loss 3.634375898838043 test_loss: 3.6868289947509765
epoch: 17 training_loss 3.525596878528595 test_loss: 3.409514617919922
epoch: 18 training_loss 3.3819987416267394 test_loss: 3.2634918212890627
epoch: 19 training_loss 3.2796490836143493 test_loss: 3.4025306701660156
epoch: 20 training_loss 3.3245763301849367 test_loss: 3.440536117553711
epoch: 21 training_loss 3.2505855131149293 test_loss: 3.1248119354248045
epoch: 22 training_loss 3.1725728607177732 test_loss: 3.3005935668945314
epoch: 23 training_loss 3.233566710948944 test_loss: 3.293084716796875
epoch: 24 training_loss 3.1222040033340455 test_loss: 3.2970199584960938
epoch: 25 training_loss 3.0610489678382873 test_loss: 3.263539505004883
epoch: 26 training_loss 3.0707371616363526 test_loss: 2.895465850830078
epoch: 27 training_loss 3.0307873392105105 test_loss: 3.0045097351074217
epoch: 28 training_loss 2.9997666692733764 test_loss: 2.918085479736328
epoch: 29 training_loss 2.9770596575737 test_loss: 3.0334775924682615
epoch: 30 training_loss 2.9766847562789915 test_loss: 2.8276575088500975
epoch: 31 training_loss 2.898484933376312 test_loss: 2.8337238311767576
epoch: 32 training_loss 2.818941295146942 test_loss: 3.0194829940795898
epoch: 33 training_loss 2.868184497356415 test_loss: 2.7646921157836912
epoch: 34 training_loss 2.772392966747284 test_loss: 2.617302131652832
epoch: 35 training_loss 2.7117089354991912 test_loss: 2.767938232421875
epoch: 36 training_loss 2.7341284561157226 test_loss: 2.6234249114990233
epoch: 37 training_loss 2.6796826839447023 test_loss: 2.7278570175170898
epoch: 38 training_loss 2.7084258008003235 test_loss: 2.6847118377685546
epoch: 39 training_loss 2.5838254737854003 test_loss: 2.345850944519043
epoch: 40 training_loss 2.4956351387500764 test_loss: 2.35091609954834
epoch: 41 training_loss 2.429700816869736 test_loss: 2.4391931533813476
epoch: 42 training_loss 2.4719269990921022 test_loss: 2.436421775817871
epoch: 43 training_loss 2.3651182150840757 test_loss: 2.541681098937988
epoch: 44 training_loss 2.3176969623565675 test_loss: 2.278183364868164
epoch: 45 training_loss 2.348091014623642 test_loss: 2.38863525390625
epoch: 46 training_loss 2.33720455288887 test_loss: 2.2437822341918947
epoch: 47 training_loss 2.241282000541687 test_loss: 2.4073246002197264
epoch: 48 training_loss 2.3307229816913604 test_loss: 2.313230514526367
epoch: 49 training_loss 2.3004268193244934 test_loss: 2.347274971008301
epoch: 50 training_loss 2.2551191103458406 test_loss: 2.3487007141113283
epoch: 51 training_loss 2.240672105550766 test_loss: 2.194132423400879
epoch: 52 training_loss 2.2248267769813537 test_loss: 2.413343811035156
epoch: 53 training_loss 2.2365091574192046 test_loss: 2.3306978225708006
epoch: 54 training_loss 2.2706349909305574 test_loss: 2.150628089904785
epoch: 55 training_loss 2.222298765182495 test_loss: 2.174547004699707
epoch: 56 training_loss 2.2205697906017305 test_loss: 2.3530548095703123
epoch: 57 training_loss 2.1780760538578035 test_loss: 2.256100082397461
epoch: 58 training_loss 2.134355216026306 test_loss: 2.1908912658691406
epoch: 59 training_loss 2.0910346448421477 test_loss: 2.3206079483032225
epoch: 60 training_loss 2.1932604444026946 test_loss: 2.185892868041992
epoch: 61 training_loss 2.1415916645526885 test_loss: 2.195695686340332
epoch: 62 training_loss 2.153502330780029 test_loss: 2.1815282821655275
epoch: 63 training_loss 2.128398369550705 test_loss: 1.9830791473388671
epoch: 64 training_loss 2.1073860704898832 test_loss: 2.1613384246826173
epoch: 65 training_loss 2.1465830492973326 test_loss: 2.06784610748291
epoch: 66 training_loss 2.07110680103302 test_loss: 2.1329015731811523
epoch: 67 training_loss 2.0397795820236206 test_loss: 2.201201820373535
epoch: 68 training_loss 2.0791023421287536 test_loss: 2.298928451538086
epoch: 69 training_loss 2.1011309957504274 test_loss: 2.276595687866211
epoch: 70 training_loss 2.1069337916374207 test_loss: 1.9893510818481446
epoch: 71 training_loss 2.0689817905426025 test_loss: 2.0503890991210936
epoch: 72 training_loss 2.0517151308059693 test_loss: 1.9937938690185546
epoch: 73 training_loss 2.0431858563423155 test_loss: 2.0072967529296877
epoch: 74 training_loss 1.949321870803833 test_loss: 2.107016944885254
epoch: 75 training_loss 2.0759198439121245 test_loss: 2.1030675888061525
epoch: 76 training_loss 1.9994691550731658 test_loss: 2.2452606201171874
epoch: 77 training_loss 2.0135731077194214 test_loss: 2.0974803924560548
epoch: 78 training_loss 2.049077332019806 test_loss: 2.0494260787963867
epoch: 79 training_loss 1.9170727622509003 test_loss: 1.972205352783203
epoch: 80 training_loss 1.990594643354416 test_loss: 2.0041955947875976
epoch: 81 training_loss 1.9842050397396087 test_loss: 1.976100540161133
epoch: 82 training_loss 1.9281899237632751 test_loss: 1.941307258605957
epoch: 83 training_loss 1.9554172718524934 test_loss: 2.009115982055664
epoch: 84 training_loss 2.0066693782806397 test_loss: 2.0590003967285155
epoch: 85 training_loss 1.937412565946579 test_loss: 1.9504119873046875
epoch: 86 training_loss 1.9320939671993256 test_loss: 1.9658447265625
epoch: 87 training_loss 1.9276505053043365 test_loss: 1.986631202697754
epoch: 88 training_loss 1.9881226575374604 test_loss: 1.9406513214111327
epoch: 89 training_loss 1.901673002243042 test_loss: 2.099472999572754
epoch: 90 training_loss 1.903023499250412 test_loss: 1.7855905532836913
epoch: 91 training_loss 1.9052721798419952 test_loss: 2.000519943237305
epoch: 92 training_loss 1.9137757885456086 test_loss: 1.9969448089599608
epoch: 93 training_loss 1.887741310596466 test_loss: 1.868951416015625
epoch: 94 training_loss 1.875639613866806 test_loss: 1.8467031478881837
epoch: 95 training_loss 1.8982591497898103 test_loss: 1.8391063690185547
epoch: 96 training_loss 1.9525440049171447 test_loss: 1.8525262832641602
epoch: 97 training_loss 1.9046206152439118 test_loss: 1.991221046447754
epoch: 98 training_loss 1.7781582796573638 test_loss: 1.9336780548095702
epoch: 99 training_loss 1.8907108092308045 test_loss: 2.053702735900879
epoch: 100 training_loss 1.8789483642578124 test_loss: 1.7209983825683595
epoch: 101 training_loss 1.911718921661377 test_loss: 1.787766456604004
epoch: 102 training_loss 1.8040174406766891 test_loss: 1.9304845809936524
epoch: 103 training_loss 1.8407032907009124 test_loss: 1.9875001907348633
epoch: 104 training_loss 1.84702103972435 test_loss: 1.9181400299072267
epoch: 105 training_loss 1.885253336429596 test_loss: 2.0341941833496096
epoch: 106 training_loss 1.8615442430973053 test_loss: 1.9385034561157226
epoch: 107 training_loss 1.817556163072586 test_loss: 1.9015609741210937
epoch: 108 training_loss 1.8356198239326478 test_loss: 1.8044754028320313
epoch: 109 training_loss 1.8293851435184478 test_loss: 2.258967399597168
epoch: 110 training_loss 1.8288991558551788 test_loss: 1.7930004119873046
epoch: 111 training_loss 1.8494742918014526 test_loss: 1.9996654510498046
epoch: 112 training_loss 1.817984846830368 test_loss: 1.7140489578247071
epoch: 113 training_loss 1.8017622232437134 test_loss: 1.891299057006836
epoch: 114 training_loss 1.7619806611537934 test_loss: 1.7872394561767577
epoch: 115 training_loss 1.8212701284885406 test_loss: 1.7997739791870118
epoch: 116 training_loss 1.7854904198646546 test_loss: 1.8656389236450195
epoch: 117 training_loss 1.8040802717208861 test_loss: 1.8898435592651368
epoch: 118 training_loss 1.7728272032737733 test_loss: 1.7875120162963867
epoch: 119 training_loss 1.8006814396381379 test_loss: 1.7786052703857422
epoch: 120 training_loss 1.7638840436935426 test_loss: 1.7534561157226562
epoch: 121 training_loss 1.7436921870708466 test_loss: 1.6662870407104493
epoch: 122 training_loss 1.7357784223556518 test_loss: 1.7722883224487305
epoch: 123 training_loss 1.7776819384098053 test_loss: 1.8372207641601563
epoch: 124 training_loss 1.7537548458576202 test_loss: 1.664278793334961
epoch: 125 training_loss 1.7515551710128785 test_loss: 1.8051000595092774
epoch: 126 training_loss 1.8134428679943084 test_loss: 1.9476631164550782
epoch: 127 training_loss 1.775556434392929 test_loss: 1.8299348831176758
epoch: 128 training_loss 1.7102743911743163 test_loss: 1.8461492538452149
epoch: 129 training_loss 1.7790751123428346 test_loss: 1.817262840270996
epoch: 130 training_loss 1.7601815283298492 test_loss: 1.6957988739013672
epoch: 131 training_loss 1.6919480812549592 test_loss: 1.7908849716186523
epoch: 132 training_loss 1.7471688055992127 test_loss: 1.7234687805175781
epoch: 133 training_loss 1.7405451953411102 test_loss: 1.8273733139038086
epoch: 134 training_loss 1.7356066799163818 test_loss: 1.790985679626465
epoch: 135 training_loss 1.6821526896953582 test_loss: 1.700845718383789
epoch: 136 training_loss 1.6954566478729247 test_loss: 1.7504537582397461
epoch: 137 training_loss 1.7249733185768128 test_loss: 1.626682662963867
epoch: 138 training_loss 1.715837550163269 test_loss: 1.7268402099609375
epoch: 139 training_loss 1.7079650938510895 test_loss: 1.702438735961914
epoch: 140 training_loss 1.8022732818126679 test_loss: 1.9767929077148438
epoch: 141 training_loss 1.6292304635047912 test_loss: 1.672462272644043
epoch: 142 training_loss 1.7644706416130065 test_loss: 1.8185905456542968
epoch: 143 training_loss 1.7338144397735595 test_loss: 1.94061336517334
epoch: 144 training_loss 1.7385657846927642 test_loss: 1.633643341064453
epoch: 145 training_loss 1.692253566980362 test_loss: 1.7163824081420898
epoch: 146 training_loss 1.7601027047634126 test_loss: 1.775670051574707
epoch: 147 training_loss 1.7108532476425171 test_loss: 1.8519279479980468
epoch: 148 training_loss 1.6296334034204483 test_loss: 1.6186288833618163
epoch: 149 training_loss 1.6365198588371277 test_loss: 1.749776268005371
134.99013137495496
episode: 0 training return: tensor(211.9807, device='cuda:0')
episode: 1 training return: tensor(220.5624, device='cuda:0')
episode: 2 training return: tensor(218.5346, device='cuda:0')
episode: 3 training return: tensor(225.2957, device='cuda:0')
epoch: 1 test_true_pfm: 132.26758530944412 sim_pfm: 205.8315360793844
episode: 4 training return: tensor(226.6758, device='cuda:0')
episode: 5 training return: tensor(213.3903, device='cuda:0')
episode: 6 training return: tensor(227.8186, device='cuda:0')
episode: 7 training return: tensor(210.1872, device='cuda:0')
epoch: 2 test_true_pfm: 126.2671073988787 sim_pfm: 211.37274325522594
episode: 8 training return: tensor(222.9861, device='cuda:0')
episode: 9 training return: tensor(213.8144, device='cuda:0')
episode: 10 training return: tensor(211.4539, device='cuda:0')
episode: 11 training return: tensor(222.4595, device='cuda:0')
epoch: 3 test_true_pfm: 127.44729444158028 sim_pfm: 206.5677526072017
episode: 12 training return: tensor(224.0540, device='cuda:0')
episode: 13 training return: tensor(212.0116, device='cuda:0')
episode: 14 training return: tensor(217.7196, device='cuda:0')
episode: 15 training return: tensor(215.7038, device='cuda:0')
epoch: 4 test_true_pfm: 123.27711675711862 sim_pfm: 205.70099299965077
episode: 16 training return: tensor(203.8829, device='cuda:0')
episode: 17 training return: tensor(212.2064, device='cuda:0')
episode: 18 training return: tensor(209.4391, device='cuda:0')
episode: 19 training return: tensor(211.5530, device='cuda:0')
epoch: 5 test_true_pfm: 129.93967089315922 sim_pfm: 227.76641676006838
episode: 20 training return: tensor(221.8544, device='cuda:0')
episode: 21 training return: tensor(238.4816, device='cuda:0')
episode: 22 training return: tensor(219.0956, device='cuda:0')
episode: 23 training return: tensor(223.5160, device='cuda:0')
epoch: 6 test_true_pfm: 129.632451829862 sim_pfm: 226.44410587681924
episode: 24 training return: tensor(226.4965, device='cuda:0')
episode: 25 training return: tensor(219.8584, device='cuda:0')
episode: 26 training return: tensor(230.6671, device='cuda:0')
episode: 27 training return: tensor(226.7892, device='cuda:0')
epoch: 7 test_true_pfm: 130.7613769572885 sim_pfm: 218.91009193841018
episode: 28 training return: tensor(220.1993, device='cuda:0')
episode: 29 training return: tensor(221.2791, device='cuda:0')
episode: 30 training return: tensor(227.5409, device='cuda:0')
episode: 31 training return: tensor(218.4509, device='cuda:0')
epoch: 8 test_true_pfm: 132.91048584128663 sim_pfm: 228.2683391355851
episode: 32 training return: tensor(224.2326, device='cuda:0')
episode: 33 training return: tensor(219.8359, device='cuda:0')
episode: 34 training return: tensor(227.5605, device='cuda:0')
episode: 35 training return: tensor(226.8593, device='cuda:0')
epoch: 9 test_true_pfm: 130.6340771283125 sim_pfm: 224.64843925907627
episode: 36 training return: tensor(222.4905, device='cuda:0')
episode: 37 training return: tensor(224.3019, device='cuda:0')
episode: 38 training return: tensor(213.9078, device='cuda:0')
episode: 39 training return: tensor(217.7387, device='cuda:0')
epoch: 10 test_true_pfm: 131.4960692931881 sim_pfm: 229.82557116218376
episode: 40 training return: tensor(220.9725, device='cuda:0')
episode: 41 training return: tensor(234.8874, device='cuda:0')
episode: 42 training return: tensor(233.2951, device='cuda:0')
episode: 43 training return: tensor(222.6300, device='cuda:0')
epoch: 11 test_true_pfm: 132.10028083890808 sim_pfm: 216.86799229048773
episode: 44 training return: tensor(212.2713, device='cuda:0')
episode: 45 training return: tensor(225.3094, device='cuda:0')
episode: 46 training return: tensor(216.7135, device='cuda:0')
episode: 47 training return: tensor(222.1141, device='cuda:0')
epoch: 12 test_true_pfm: 130.1588045842356 sim_pfm: 224.9155808333773
episode: 48 training return: tensor(216.6415, device='cuda:0')
episode: 49 training return: tensor(229.5056, device='cuda:0')
episode: 50 training return: tensor(230.0546, device='cuda:0')
episode: 51 training return: tensor(227.7618, device='cuda:0')
epoch: 13 test_true_pfm: 131.44352957571823 sim_pfm: 221.39320524432696
episode: 52 training return: tensor(231.1021, device='cuda:0')
episode: 53 training return: tensor(234.6258, device='cuda:0')
episode: 54 training return: tensor(222.6273, device='cuda:0')
episode: 55 training return: tensor(233.1835, device='cuda:0')
epoch: 14 test_true_pfm: 129.05422535519693 sim_pfm: 228.5352008924412
episode: 56 training return: tensor(230.4938, device='cuda:0')
episode: 57 training return: tensor(225.0575, device='cuda:0')
episode: 58 training return: tensor(225.3978, device='cuda:0')
episode: 59 training return: tensor(217.7387, device='cuda:0')
epoch: 15 test_true_pfm: 129.9503251112987 sim_pfm: 232.0241169346147
episode: 60 training return: tensor(219.5288, device='cuda:0')
episode: 61 training return: tensor(234.0972, device='cuda:0')
episode: 62 training return: tensor(218.8577, device='cuda:0')
episode: 63 training return: tensor(221.4200, device='cuda:0')
epoch: 16 test_true_pfm: 130.93974295362983 sim_pfm: 225.417116201669
episode: 64 training return: tensor(222.7451, device='cuda:0')
episode: 65 training return: tensor(228.3701, device='cuda:0')
episode: 66 training return: tensor(223.6893, device='cuda:0')
episode: 67 training return: tensor(229.7520, device='cuda:0')
epoch: 17 test_true_pfm: 131.3899959988362 sim_pfm: 224.20448708063924
episode: 68 training return: tensor(222.1350, device='cuda:0')
episode: 69 training return: tensor(218.5872, device='cuda:0')
episode: 70 training return: tensor(224.1352, device='cuda:0')
episode: 71 training return: tensor(228.3551, device='cuda:0')
epoch: 18 test_true_pfm: 129.88244032751444 sim_pfm: 225.1207081096596
episode: 72 training return: tensor(224.0707, device='cuda:0')
episode: 73 training return: tensor(224.2860, device='cuda:0')
episode: 74 training return: tensor(232.2518, device='cuda:0')
episode: 75 training return: tensor(244.2653, device='cuda:0')
epoch: 19 test_true_pfm: 130.14166612088064 sim_pfm: 221.84292942831524
episode: 76 training return: tensor(233.1481, device='cuda:0')
episode: 77 training return: tensor(224.6905, device='cuda:0')
episode: 78 training return: tensor(215.6445, device='cuda:0')
episode: 79 training return: tensor(241.0480, device='cuda:0')
epoch: 20 test_true_pfm: 127.16257515351617 sim_pfm: 224.77422638643185
episode: 80 training return: tensor(228.0975, device='cuda:0')
episode: 81 training return: tensor(235.5049, device='cuda:0')
episode: 82 training return: tensor(221.9361, device='cuda:0')
episode: 83 training return: tensor(227.7535, device='cuda:0')
epoch: 21 test_true_pfm: 128.05179445481275 sim_pfm: 224.41220475716983
episode: 84 training return: tensor(228.0355, device='cuda:0')
episode: 85 training return: tensor(233.6213, device='cuda:0')
episode: 86 training return: tensor(244.3483, device='cuda:0')
episode: 87 training return: tensor(226.2659, device='cuda:0')
epoch: 22 test_true_pfm: 130.09640705888287 sim_pfm: 228.7476884257165
episode: 88 training return: tensor(221.7389, device='cuda:0')
episode: 89 training return: tensor(219.2519, device='cuda:0')
episode: 90 training return: tensor(242.9380, device='cuda:0')
episode: 91 training return: tensor(236.9415, device='cuda:0')
epoch: 23 test_true_pfm: 129.82600784029333 sim_pfm: 232.30809043438057
episode: 92 training return: tensor(233.3976, device='cuda:0')
episode: 93 training return: tensor(230.5614, device='cuda:0')
episode: 94 training return: tensor(229.7519, device='cuda:0')
episode: 95 training return: tensor(219.3775, device='cuda:0')
epoch: 24 test_true_pfm: 125.98616252803768 sim_pfm: 228.25857137261193
episode: 96 training return: tensor(225.2077, device='cuda:0')
episode: 97 training return: tensor(228.5289, device='cuda:0')
episode: 98 training return: tensor(218.8167, device='cuda:0')
episode: 99 training return: tensor(237.3552, device='cuda:0')
epoch: 25 test_true_pfm: 129.2472640437944 sim_pfm: 233.7469994136307
episode: 100 training return: tensor(230.1752, device='cuda:0')
episode: 101 training return: tensor(206.4230, device='cuda:0')
episode: 102 training return: tensor(253.5095, device='cuda:0')
episode: 103 training return: tensor(236.3275, device='cuda:0')
epoch: 26 test_true_pfm: 127.52227442097679 sim_pfm: 230.71186825227923
episode: 104 training return: tensor(221.6783, device='cuda:0')
episode: 105 training return: tensor(230.6750, device='cuda:0')
episode: 106 training return: tensor(228.5113, device='cuda:0')
episode: 107 training return: tensor(226.4513, device='cuda:0')
epoch: 27 test_true_pfm: 125.70185406685746 sim_pfm: 235.10179321938193
episode: 108 training return: tensor(226.5555, device='cuda:0')
episode: 109 training return: tensor(240.5877, device='cuda:0')
episode: 110 training return: tensor(220.5599, device='cuda:0')
episode: 111 training return: tensor(245.0934, device='cuda:0')
epoch: 28 test_true_pfm: 128.8052967924738 sim_pfm: 226.31224406340624
episode: 112 training return: tensor(239.7898, device='cuda:0')
episode: 113 training return: tensor(226.6622, device='cuda:0')
episode: 114 training return: tensor(232.4618, device='cuda:0')
episode: 115 training return: tensor(239.0771, device='cuda:0')
epoch: 29 test_true_pfm: 126.57716254305883 sim_pfm: 228.82900059055538
episode: 116 training return: tensor(242.6877, device='cuda:0')
episode: 117 training return: tensor(248.0218, device='cuda:0')
episode: 118 training return: tensor(226.6059, device='cuda:0')
episode: 119 training return: tensor(234.5642, device='cuda:0')
epoch: 30 test_true_pfm: 128.02856262895986 sim_pfm: 226.35839877153632
episode: 120 training return: tensor(226.0304, device='cuda:0')
episode: 121 training return: tensor(222.1324, device='cuda:0')
episode: 122 training return: tensor(231.7086, device='cuda:0')
episode: 123 training return: tensor(238.3859, device='cuda:0')
epoch: 31 test_true_pfm: 128.16074650250525 sim_pfm: 225.24565450557276
episode: 124 training return: tensor(236.2718, device='cuda:0')
episode: 125 training return: tensor(250.7615, device='cuda:0')
episode: 126 training return: tensor(223.7810, device='cuda:0')
episode: 127 training return: tensor(222.2993, device='cuda:0')
epoch: 32 test_true_pfm: 128.82687678182157 sim_pfm: 232.0169107644877
episode: 128 training return: tensor(213.9280, device='cuda:0')
episode: 129 training return: tensor(254.7717, device='cuda:0')
episode: 130 training return: tensor(244.8757, device='cuda:0')
episode: 131 training return: tensor(230.2983, device='cuda:0')
epoch: 33 test_true_pfm: 126.25398901615264 sim_pfm: 231.44853364942827
episode: 132 training return: tensor(230.1942, device='cuda:0')
episode: 133 training return: tensor(226.3371, device='cuda:0')
episode: 134 training return: tensor(229.3536, device='cuda:0')
episode: 135 training return: tensor(232.5441, device='cuda:0')
epoch: 34 test_true_pfm: 125.70147526654785 sim_pfm: 231.74861819564248
episode: 136 training return: tensor(237.2544, device='cuda:0')
episode: 137 training return: tensor(231.7262, device='cuda:0')
episode: 138 training return: tensor(233.2138, device='cuda:0')
episode: 139 training return: tensor(238.7489, device='cuda:0')
epoch: 35 test_true_pfm: 125.7622309852291 sim_pfm: 235.4015032497351
episode: 140 training return: tensor(227.9030, device='cuda:0')
episode: 141 training return: tensor(240.4503, device='cuda:0')
episode: 142 training return: tensor(241.7838, device='cuda:0')
episode: 143 training return: tensor(227.5130, device='cuda:0')
epoch: 36 test_true_pfm: 128.55095339522384 sim_pfm: 238.12020298118006
episode: 144 training return: tensor(231.9224, device='cuda:0')
episode: 145 training return: tensor(237.3794, device='cuda:0')
episode: 146 training return: tensor(233.7916, device='cuda:0')
episode: 147 training return: tensor(225.2205, device='cuda:0')
epoch: 37 test_true_pfm: 129.4031552933948 sim_pfm: 236.10844424045644
episode: 148 training return: tensor(238.3163, device='cuda:0')
episode: 149 training return: tensor(223.7206, device='cuda:0')
episode: 150 training return: tensor(235.8242, device='cuda:0')
episode: 151 training return: tensor(236.2886, device='cuda:0')
epoch: 38 test_true_pfm: 127.78439869232366 sim_pfm: 235.24364890200087
episode: 152 training return: tensor(220.0194, device='cuda:0')
episode: 153 training return: tensor(221.1593, device='cuda:0')
episode: 154 training return: tensor(233.6837, device='cuda:0')
episode: 155 training return: tensor(229.9939, device='cuda:0')
epoch: 39 test_true_pfm: 124.12512580264179 sim_pfm: 226.70495343153016
episode: 156 training return: tensor(225.9343, device='cuda:0')
episode: 157 training return: tensor(237.1691, device='cuda:0')
episode: 158 training return: tensor(230.8944, device='cuda:0')
episode: 159 training return: tensor(249.9058, device='cuda:0')
epoch: 40 test_true_pfm: 124.86614855181384 sim_pfm: 232.45310769607894
episode: 160 training return: tensor(219.6592, device='cuda:0')
episode: 161 training return: tensor(226.7307, device='cuda:0')
episode: 162 training return: tensor(229.1313, device='cuda:0')
episode: 163 training return: tensor(225.6349, device='cuda:0')
epoch: 41 test_true_pfm: 124.54964659818836 sim_pfm: 229.75656111977295
episode: 164 training return: tensor(246.4098, device='cuda:0')
episode: 165 training return: tensor(234.5139, device='cuda:0')
episode: 166 training return: tensor(226.3439, device='cuda:0')
episode: 167 training return: tensor(233.0084, device='cuda:0')
epoch: 42 test_true_pfm: 125.23669245766973 sim_pfm: 234.78932447237895
episode: 168 training return: tensor(230.4502, device='cuda:0')
episode: 169 training return: tensor(230.7141, device='cuda:0')
episode: 170 training return: tensor(234.4046, device='cuda:0')
episode: 171 training return: tensor(242.9971, device='cuda:0')
epoch: 43 test_true_pfm: 126.92840125404341 sim_pfm: 233.8132120557828
episode: 172 training return: tensor(224.6732, device='cuda:0')
episode: 173 training return: tensor(242.5438, device='cuda:0')
episode: 174 training return: tensor(220.4398, device='cuda:0')
episode: 175 training return: tensor(218.8250, device='cuda:0')
epoch: 44 test_true_pfm: 125.1586780663176 sim_pfm: 231.3207537566428
episode: 176 training return: tensor(228.5774, device='cuda:0')
episode: 177 training return: tensor(229.3002, device='cuda:0')
episode: 178 training return: tensor(229.8279, device='cuda:0')
episode: 179 training return: tensor(238.0261, device='cuda:0')
epoch: 45 test_true_pfm: 126.3984638503445 sim_pfm: 233.41466017067432
episode: 180 training return: tensor(234.5055, device='cuda:0')
episode: 181 training return: tensor(225.8522, device='cuda:0')
episode: 182 training return: tensor(226.9114, device='cuda:0')
episode: 183 training return: tensor(232.6106, device='cuda:0')
epoch: 46 test_true_pfm: 128.987496471315 sim_pfm: 227.44515959706624
episode: 184 training return: tensor(233.7851, device='cuda:0')
episode: 185 training return: tensor(232.9587, device='cuda:0')
episode: 186 training return: tensor(238.8905, device='cuda:0')
episode: 187 training return: tensor(231.3372, device='cuda:0')
epoch: 47 test_true_pfm: 127.07009328011804 sim_pfm: 230.18581513561077
episode: 188 training return: tensor(227.3737, device='cuda:0')
episode: 189 training return: tensor(229.4749, device='cuda:0')
episode: 190 training return: tensor(227.3447, device='cuda:0')
episode: 191 training return: tensor(226.2791, device='cuda:0')
epoch: 48 test_true_pfm: 127.74954582052582 sim_pfm: 234.24245162283768
episode: 192 training return: tensor(230.9659, device='cuda:0')
episode: 193 training return: tensor(245.8057, device='cuda:0')
episode: 194 training return: tensor(242.7779, device='cuda:0')
episode: 195 training return: tensor(232.2159, device='cuda:0')
epoch: 49 test_true_pfm: 127.9452743810812 sim_pfm: 233.71778638281393
episode: 196 training return: tensor(223.2284, device='cuda:0')
episode: 197 training return: tensor(231.4264, device='cuda:0')
episode: 198 training return: tensor(225.6525, device='cuda:0')
episode: 199 training return: tensor(222.8738, device='cuda:0')
epoch: 50 test_true_pfm: 129.54343081719293 sim_pfm: 233.1144386317581
episode: 200 training return: tensor(232.8566, device='cuda:0')
episode: 201 training return: tensor(227.2320, device='cuda:0')
episode: 202 training return: tensor(235.4398, device='cuda:0')
episode: 203 training return: tensor(229.4837, device='cuda:0')
epoch: 51 test_true_pfm: 124.71488000012812 sim_pfm: 234.42898504496551
episode: 204 training return: tensor(233.1606, device='cuda:0')
episode: 205 training return: tensor(238.0058, device='cuda:0')
episode: 206 training return: tensor(250.2576, device='cuda:0')
episode: 207 training return: tensor(234.0567, device='cuda:0')
epoch: 52 test_true_pfm: 124.47236198274656 sim_pfm: 233.91684090541676
episode: 208 training return: tensor(235.3546, device='cuda:0')
episode: 209 training return: tensor(234.8381, device='cuda:0')
episode: 210 training return: tensor(246.4888, device='cuda:0')
episode: 211 training return: tensor(251.1116, device='cuda:0')
epoch: 53 test_true_pfm: 126.01292829443557 sim_pfm: 235.28701173516455
episode: 212 training return: tensor(241.7183, device='cuda:0')
episode: 213 training return: tensor(238.1588, device='cuda:0')
episode: 214 training return: tensor(240.8050, device='cuda:0')
episode: 215 training return: tensor(234.8641, device='cuda:0')
epoch: 54 test_true_pfm: 133.59085220935162 sim_pfm: 233.415520781558
episode: 216 training return: tensor(244.8622, device='cuda:0')
episode: 217 training return: tensor(234.3435, device='cuda:0')
episode: 218 training return: tensor(228.4871, device='cuda:0')
episode: 219 training return: tensor(223.4063, device='cuda:0')
epoch: 55 test_true_pfm: 128.0247565460126 sim_pfm: 238.54915376034333
episode: 220 training return: tensor(240.9694, device='cuda:0')
episode: 221 training return: tensor(233.4681, device='cuda:0')
episode: 222 training return: tensor(233.1181, device='cuda:0')
episode: 223 training return: tensor(240.4221, device='cuda:0')
epoch: 56 test_true_pfm: 127.79977200217654 sim_pfm: 239.12250920332735
episode: 224 training return: tensor(244.9549, device='cuda:0')
episode: 225 training return: tensor(241.1108, device='cuda:0')
episode: 226 training return: tensor(227.3114, device='cuda:0')
episode: 227 training return: tensor(221.5291, device='cuda:0')
epoch: 57 test_true_pfm: 128.69188689459494 sim_pfm: 231.26232367517076
episode: 228 training return: tensor(239.4370, device='cuda:0')
episode: 229 training return: tensor(235.0236, device='cuda:0')
episode: 230 training return: tensor(248.4859, device='cuda:0')
episode: 231 training return: tensor(232.7192, device='cuda:0')
epoch: 58 test_true_pfm: 131.38714099511873 sim_pfm: 239.3262996768346
episode: 232 training return: tensor(228.9010, device='cuda:0')
episode: 233 training return: tensor(233.5642, device='cuda:0')
episode: 234 training return: tensor(234.8899, device='cuda:0')
episode: 235 training return: tensor(228.9060, device='cuda:0')
epoch: 59 test_true_pfm: 129.56845302033733 sim_pfm: 242.19420027787564
episode: 236 training return: tensor(235.0380, device='cuda:0')
episode: 237 training return: tensor(241.1026, device='cuda:0')
episode: 238 training return: tensor(236.2029, device='cuda:0')
episode: 239 training return: tensor(239.8205, device='cuda:0')
epoch: 60 test_true_pfm: 129.1362546464017 sim_pfm: 244.8076225323195
episode: 240 training return: tensor(231.6326, device='cuda:0')
episode: 241 training return: tensor(258.0231, device='cuda:0')
episode: 242 training return: tensor(238.9882, device='cuda:0')
episode: 243 training return: tensor(236.6610, device='cuda:0')
epoch: 61 test_true_pfm: 128.306723866246 sim_pfm: 245.16995009328238
episode: 244 training return: tensor(247.9024, device='cuda:0')
episode: 245 training return: tensor(234.8871, device='cuda:0')
episode: 246 training return: tensor(234.9346, device='cuda:0')
episode: 247 training return: tensor(242.5746, device='cuda:0')
epoch: 62 test_true_pfm: 127.33139757225767 sim_pfm: 241.980361830513
episode: 248 training return: tensor(241.8062, device='cuda:0')
episode: 249 training return: tensor(247.7141, device='cuda:0')
episode: 250 training return: tensor(244.1116, device='cuda:0')
episode: 251 training return: tensor(236.6910, device='cuda:0')
epoch: 63 test_true_pfm: 129.6482876383173 sim_pfm: 241.65449101772393
episode: 252 training return: tensor(239.8437, device='cuda:0')
episode: 253 training return: tensor(237.3912, device='cuda:0')
episode: 254 training return: tensor(224.5216, device='cuda:0')
episode: 255 training return: tensor(230.8105, device='cuda:0')
epoch: 64 test_true_pfm: 127.40573725984359 sim_pfm: 249.6990125918528
episode: 256 training return: tensor(232.1962, device='cuda:0')
episode: 257 training return: tensor(235.7281, device='cuda:0')
episode: 258 training return: tensor(234.3592, device='cuda:0')
episode: 259 training return: tensor(253.7437, device='cuda:0')
epoch: 65 test_true_pfm: 127.08184778486756 sim_pfm: 228.11154140697326
episode: 260 training return: tensor(233.1981, device='cuda:0')
episode: 261 training return: tensor(235.8857, device='cuda:0')
episode: 262 training return: tensor(246.0060, device='cuda:0')
episode: 263 training return: tensor(226.2355, device='cuda:0')
epoch: 66 test_true_pfm: 128.13602819606726 sim_pfm: 242.85247484602732
episode: 264 training return: tensor(234.9160, device='cuda:0')
episode: 265 training return: tensor(235.6143, device='cuda:0')
episode: 266 training return: tensor(234.8612, device='cuda:0')
episode: 267 training return: tensor(227.6917, device='cuda:0')
epoch: 67 test_true_pfm: 127.63038220608216 sim_pfm: 237.80258212651825
episode: 268 training return: tensor(235.8340, device='cuda:0')
episode: 269 training return: tensor(254.6203, device='cuda:0')
episode: 270 training return: tensor(235.3319, device='cuda:0')
episode: 271 training return: tensor(229.3818, device='cuda:0')
epoch: 68 test_true_pfm: 125.96510725877582 sim_pfm: 240.46803043431137
episode: 272 training return: tensor(227.2929, device='cuda:0')
episode: 273 training return: tensor(231.9800, device='cuda:0')
episode: 274 training return: tensor(225.7393, device='cuda:0')
episode: 275 training return: tensor(235.4472, device='cuda:0')
epoch: 69 test_true_pfm: 127.21477893130898 sim_pfm: 237.21032037490514
episode: 276 training return: tensor(250.7224, device='cuda:0')
episode: 277 training return: tensor(234.6603, device='cuda:0')
episode: 278 training return: tensor(228.6649, device='cuda:0')
episode: 279 training return: tensor(249.3608, device='cuda:0')
epoch: 70 test_true_pfm: 130.85755862019363 sim_pfm: 238.11169755981535
episode: 280 training return: tensor(246.4078, device='cuda:0')
episode: 281 training return: tensor(234.6169, device='cuda:0')
episode: 282 training return: tensor(234.6435, device='cuda:0')
episode: 283 training return: tensor(249.9434, device='cuda:0')
epoch: 71 test_true_pfm: 124.29058106899356 sim_pfm: 240.8265019630315
episode: 284 training return: tensor(227.2055, device='cuda:0')
episode: 285 training return: tensor(232.7370, device='cuda:0')
episode: 286 training return: tensor(243.5434, device='cuda:0')
episode: 287 training return: tensor(236.1019, device='cuda:0')
epoch: 72 test_true_pfm: 127.23515856104741 sim_pfm: 237.77848073818024
episode: 288 training return: tensor(233.2740, device='cuda:0')
episode: 289 training return: tensor(234.0086, device='cuda:0')
episode: 290 training return: tensor(227.2835, device='cuda:0')
episode: 291 training return: tensor(244.2133, device='cuda:0')
epoch: 73 test_true_pfm: 129.9991814694647 sim_pfm: 242.7310988878715
episode: 292 training return: tensor(249.5658, device='cuda:0')
episode: 293 training return: tensor(225.9167, device='cuda:0')
episode: 294 training return: tensor(222.9018, device='cuda:0')
episode: 295 training return: tensor(241.6341, device='cuda:0')
epoch: 74 test_true_pfm: 131.22543343103692 sim_pfm: 236.33907385884086
episode: 296 training return: tensor(244.1624, device='cuda:0')
episode: 297 training return: tensor(266.5142, device='cuda:0')
episode: 298 training return: tensor(242.0535, device='cuda:0')
episode: 299 training return: tensor(237.5429, device='cuda:0')
epoch: 75 test_true_pfm: 125.02732982661863 sim_pfm: 243.357306850655
episode: 300 training return: tensor(248.2713, device='cuda:0')
episode: 301 training return: tensor(239.8282, device='cuda:0')
episode: 302 training return: tensor(237.8467, device='cuda:0')
episode: 303 training return: tensor(235.5867, device='cuda:0')
epoch: 76 test_true_pfm: 130.58230718022588 sim_pfm: 241.01131261893315
episode: 304 training return: tensor(225.1152, device='cuda:0')
episode: 305 training return: tensor(236.2407, device='cuda:0')
episode: 306 training return: tensor(260.6773, device='cuda:0')
episode: 307 training return: tensor(243.0703, device='cuda:0')
epoch: 77 test_true_pfm: 131.03821028607726 sim_pfm: 250.66430718426128
episode: 308 training return: tensor(227.4587, device='cuda:0')
episode: 309 training return: tensor(243.1441, device='cuda:0')
episode: 310 training return: tensor(237.7970, device='cuda:0')
episode: 311 training return: tensor(249.7889, device='cuda:0')
epoch: 78 test_true_pfm: 132.73156316264064 sim_pfm: 245.18789408307057
episode: 312 training return: tensor(238.0356, device='cuda:0')
episode: 313 training return: tensor(255.7346, device='cuda:0')
episode: 314 training return: tensor(254.7355, device='cuda:0')
episode: 315 training return: tensor(253.2153, device='cuda:0')
epoch: 79 test_true_pfm: 130.58335437301216 sim_pfm: 248.57191843427717
episode: 316 training return: tensor(228.1444, device='cuda:0')
episode: 317 training return: tensor(251.6743, device='cuda:0')
episode: 318 training return: tensor(252.7921, device='cuda:0')
episode: 319 training return: tensor(257.2164, device='cuda:0')
epoch: 80 test_true_pfm: 126.35954750179812 sim_pfm: 239.8108850846591
episode: 320 training return: tensor(245.2745, device='cuda:0')
episode: 321 training return: tensor(245.7359, device='cuda:0')
episode: 322 training return: tensor(237.3404, device='cuda:0')
episode: 323 training return: tensor(239.3755, device='cuda:0')
epoch: 81 test_true_pfm: 130.646392694634 sim_pfm: 237.4555650643073
episode: 324 training return: tensor(233.5180, device='cuda:0')
episode: 325 training return: tensor(248.5195, device='cuda:0')
episode: 326 training return: tensor(237.6577, device='cuda:0')
episode: 327 training return: tensor(244.4315, device='cuda:0')
epoch: 82 test_true_pfm: 133.4602295552542 sim_pfm: 254.71519900993445
episode: 328 training return: tensor(245.7607, device='cuda:0')
episode: 329 training return: tensor(253.7130, device='cuda:0')
episode: 330 training return: tensor(232.8582, device='cuda:0')
episode: 331 training return: tensor(228.1529, device='cuda:0')
epoch: 83 test_true_pfm: 129.90289756032817 sim_pfm: 244.9994890641421
episode: 332 training return: tensor(254.5482, device='cuda:0')
episode: 333 training return: tensor(232.2544, device='cuda:0')
episode: 334 training return: tensor(258.7843, device='cuda:0')
episode: 335 training return: tensor(249.2517, device='cuda:0')
epoch: 84 test_true_pfm: 131.257591490404 sim_pfm: 245.71774318846875
episode: 336 training return: tensor(245.7694, device='cuda:0')
episode: 337 training return: tensor(250.2868, device='cuda:0')
episode: 338 training return: tensor(247.8196, device='cuda:0')
episode: 339 training return: tensor(221.7827, device='cuda:0')
epoch: 85 test_true_pfm: 130.31806062198834 sim_pfm: 242.37906284410855
episode: 340 training return: tensor(226.5821, device='cuda:0')
episode: 341 training return: tensor(228.4405, device='cuda:0')
episode: 342 training return: tensor(236.2070, device='cuda:0')
episode: 343 training return: tensor(249.3695, device='cuda:0')
epoch: 86 test_true_pfm: 130.81395355670114 sim_pfm: 248.00964064687724
episode: 344 training return: tensor(256.4043, device='cuda:0')
episode: 345 training return: tensor(235.2314, device='cuda:0')
episode: 346 training return: tensor(262.3564, device='cuda:0')
episode: 347 training return: tensor(245.9427, device='cuda:0')
epoch: 87 test_true_pfm: 129.63682586888422 sim_pfm: 255.2017781346105
episode: 348 training return: tensor(240.3227, device='cuda:0')
episode: 349 training return: tensor(244.4300, device='cuda:0')
episode: 350 training return: tensor(253.4071, device='cuda:0')
episode: 351 training return: tensor(260.8102, device='cuda:0')
epoch: 88 test_true_pfm: 127.12669282176921 sim_pfm: 248.15013578265206
episode: 352 training return: tensor(253.9431, device='cuda:0')
episode: 353 training return: tensor(227.5227, device='cuda:0')
episode: 354 training return: tensor(258.2662, device='cuda:0')
episode: 355 training return: tensor(238.9257, device='cuda:0')
epoch: 89 test_true_pfm: 131.95215152092769 sim_pfm: 250.48907895281445
episode: 356 training return: tensor(256.6673, device='cuda:0')
episode: 357 training return: tensor(246.1355, device='cuda:0')
episode: 358 training return: tensor(237.8790, device='cuda:0')
episode: 359 training return: tensor(225.1070, device='cuda:0')
epoch: 90 test_true_pfm: 124.92623669469569 sim_pfm: 236.59741452911402
episode: 360 training return: tensor(224.9465, device='cuda:0')
episode: 361 training return: tensor(248.9323, device='cuda:0')
episode: 362 training return: tensor(232.5395, device='cuda:0')
episode: 363 training return: tensor(238.7756, device='cuda:0')
epoch: 91 test_true_pfm: 128.37941854636455 sim_pfm: 241.01596514142003
episode: 364 training return: tensor(239.3173, device='cuda:0')
episode: 365 training return: tensor(233.5351, device='cuda:0')
episode: 366 training return: tensor(242.0115, device='cuda:0')
episode: 367 training return: tensor(255.5877, device='cuda:0')
epoch: 92 test_true_pfm: 130.96931130440154 sim_pfm: 240.64583830782212
episode: 368 training return: tensor(247.4980, device='cuda:0')
episode: 369 training return: tensor(224.7712, device='cuda:0')
episode: 370 training return: tensor(231.1615, device='cuda:0')
episode: 371 training return: tensor(267.2835, device='cuda:0')
epoch: 93 test_true_pfm: 131.52844324062934 sim_pfm: 246.94829572311136
episode: 372 training return: tensor(239.8562, device='cuda:0')
episode: 373 training return: tensor(235.7407, device='cuda:0')
episode: 374 training return: tensor(244.2755, device='cuda:0')
episode: 375 training return: tensor(255.5094, device='cuda:0')
epoch: 94 test_true_pfm: 131.21048586443942 sim_pfm: 249.2507493749028
episode: 376 training return: tensor(233.8672, device='cuda:0')
episode: 377 training return: tensor(243.8009, device='cuda:0')
episode: 378 training return: tensor(255.0166, device='cuda:0')
episode: 379 training return: tensor(236.6962, device='cuda:0')
epoch: 95 test_true_pfm: 129.59856502619238 sim_pfm: 248.5317608612764
episode: 380 training return: tensor(240.2756, device='cuda:0')
episode: 381 training return: tensor(249.7598, device='cuda:0')
episode: 382 training return: tensor(235.3400, device='cuda:0')
episode: 383 training return: tensor(257.4663, device='cuda:0')
epoch: 96 test_true_pfm: 128.34401847355193 sim_pfm: 253.87495877038455
episode: 384 training return: tensor(251.2485, device='cuda:0')
episode: 385 training return: tensor(254.8727, device='cuda:0')
episode: 386 training return: tensor(230.0093, device='cuda:0')
episode: 387 training return: tensor(238.3533, device='cuda:0')
epoch: 97 test_true_pfm: 131.6799487740372 sim_pfm: 259.4734312665358
episode: 388 training return: tensor(255.8130, device='cuda:0')
episode: 389 training return: tensor(241.5949, device='cuda:0')
episode: 390 training return: tensor(240.7079, device='cuda:0')
episode: 391 training return: tensor(249.6933, device='cuda:0')
epoch: 98 test_true_pfm: 128.1468467300803 sim_pfm: 242.8997829090804
episode: 392 training return: tensor(221.1558, device='cuda:0')
episode: 393 training return: tensor(251.2393, device='cuda:0')
episode: 394 training return: tensor(259.3680, device='cuda:0')
episode: 395 training return: tensor(241.0717, device='cuda:0')
epoch: 99 test_true_pfm: 131.3944075519959 sim_pfm: 244.52910781491082
episode: 396 training return: tensor(235.2005, device='cuda:0')
episode: 397 training return: tensor(252.3678, device='cuda:0')
episode: 398 training return: tensor(227.7004, device='cuda:0')
episode: 399 training return: tensor(246.5933, device='cuda:0')
epoch: 100 test_true_pfm: 128.7255948152486 sim_pfm: 247.19492082163924
episode: 400 training return: tensor(243.9336, device='cuda:0')
episode: 401 training return: tensor(248.5785, device='cuda:0')
episode: 402 training return: tensor(253.9272, device='cuda:0')
episode: 403 training return: tensor(251.8538, device='cuda:0')
epoch: 101 test_true_pfm: 131.75252235648418 sim_pfm: 244.63542360332795
episode: 404 training return: tensor(228.2928, device='cuda:0')
episode: 405 training return: tensor(246.7664, device='cuda:0')
episode: 406 training return: tensor(226.5710, device='cuda:0')
episode: 407 training return: tensor(254.8413, device='cuda:0')
epoch: 102 test_true_pfm: 130.18486817287157 sim_pfm: 246.60342912881168
episode: 408 training return: tensor(239.0193, device='cuda:0')
episode: 409 training return: tensor(247.1864, device='cuda:0')
episode: 410 training return: tensor(254.5229, device='cuda:0')
episode: 411 training return: tensor(234.3538, device='cuda:0')
epoch: 103 test_true_pfm: 129.78525373487815 sim_pfm: 245.6847457561642
episode: 412 training return: tensor(251.1643, device='cuda:0')
episode: 413 training return: tensor(241.2564, device='cuda:0')
episode: 414 training return: tensor(257.6447, device='cuda:0')
episode: 415 training return: tensor(263.9851, device='cuda:0')
epoch: 104 test_true_pfm: 125.32540140181638 sim_pfm: 251.8791444040835
episode: 416 training return: tensor(244.2178, device='cuda:0')
episode: 417 training return: tensor(253.9024, device='cuda:0')
episode: 418 training return: tensor(259.2324, device='cuda:0')
episode: 419 training return: tensor(250.7456, device='cuda:0')
epoch: 105 test_true_pfm: 129.0770222119363 sim_pfm: 242.83443595402642
episode: 420 training return: tensor(240.1806, device='cuda:0')
episode: 421 training return: tensor(239.0572, device='cuda:0')
episode: 422 training return: tensor(262.6675, device='cuda:0')
episode: 423 training return: tensor(246.6721, device='cuda:0')
epoch: 106 test_true_pfm: 132.33745428304178 sim_pfm: 239.50934129308445
episode: 424 training return: tensor(229.4961, device='cuda:0')
episode: 425 training return: tensor(240.3357, device='cuda:0')
episode: 426 training return: tensor(237.5619, device='cuda:0')
episode: 427 training return: tensor(248.2638, device='cuda:0')
epoch: 107 test_true_pfm: 131.58449237640525 sim_pfm: 248.54304178923485
episode: 428 training return: tensor(254.4081, device='cuda:0')
episode: 429 training return: tensor(236.6024, device='cuda:0')
episode: 430 training return: tensor(255.2393, device='cuda:0')
episode: 431 training return: tensor(252.6108, device='cuda:0')
epoch: 108 test_true_pfm: 127.64997973641593 sim_pfm: 247.6965525705833
episode: 432 training return: tensor(238.0429, device='cuda:0')
episode: 433 training return: tensor(254.7960, device='cuda:0')
episode: 434 training return: tensor(260.6427, device='cuda:0')
episode: 435 training return: tensor(244.0043, device='cuda:0')
epoch: 109 test_true_pfm: 131.12861365760293 sim_pfm: 244.25340851254296
episode: 436 training return: tensor(258.7094, device='cuda:0')
episode: 437 training return: tensor(255.4559, device='cuda:0')
episode: 438 training return: tensor(257.8284, device='cuda:0')
episode: 439 training return: tensor(238.1385, device='cuda:0')
epoch: 110 test_true_pfm: 129.56622155121667 sim_pfm: 241.8663864401169
episode: 440 training return: tensor(241.1555, device='cuda:0')
episode: 441 training return: tensor(262.1457, device='cuda:0')
episode: 442 training return: tensor(232.3055, device='cuda:0')
episode: 443 training return: tensor(241.3075, device='cuda:0')
epoch: 111 test_true_pfm: 131.46157131477645 sim_pfm: 257.70971606071106
episode: 444 training return: tensor(236.0644, device='cuda:0')
episode: 445 training return: tensor(258.1106, device='cuda:0')
episode: 446 training return: tensor(260.6584, device='cuda:0')
episode: 447 training return: tensor(239.5790, device='cuda:0')
epoch: 112 test_true_pfm: 131.1437214164343 sim_pfm: 243.8151594284922
episode: 448 training return: tensor(249.8500, device='cuda:0')
episode: 449 training return: tensor(252.4341, device='cuda:0')
episode: 450 training return: tensor(253.6983, device='cuda:0')
episode: 451 training return: tensor(258.0986, device='cuda:0')
epoch: 113 test_true_pfm: 131.71058939601298 sim_pfm: 247.0332922280184
episode: 452 training return: tensor(246.2680, device='cuda:0')
episode: 453 training return: tensor(235.0731, device='cuda:0')
episode: 454 training return: tensor(239.9637, device='cuda:0')
episode: 455 training return: tensor(235.5592, device='cuda:0')
epoch: 114 test_true_pfm: 130.14920316846005 sim_pfm: 244.66728856731788
episode: 456 training return: tensor(246.4546, device='cuda:0')
episode: 457 training return: tensor(237.0186, device='cuda:0')
episode: 458 training return: tensor(238.3422, device='cuda:0')
episode: 459 training return: tensor(245.1813, device='cuda:0')
epoch: 115 test_true_pfm: 131.94712251955892 sim_pfm: 236.97407910741168
episode: 460 training return: tensor(260.6353, device='cuda:0')
episode: 461 training return: tensor(246.7049, device='cuda:0')
episode: 462 training return: tensor(235.6393, device='cuda:0')
episode: 463 training return: tensor(267.6772, device='cuda:0')
epoch: 116 test_true_pfm: 131.99165986598388 sim_pfm: 245.79576978581608
episode: 464 training return: tensor(253.3245, device='cuda:0')
episode: 465 training return: tensor(247.4132, device='cuda:0')
episode: 466 training return: tensor(252.7601, device='cuda:0')
episode: 467 training return: tensor(233.9860, device='cuda:0')
epoch: 117 test_true_pfm: 128.05505550676807 sim_pfm: 253.19755233059405
episode: 468 training return: tensor(244.2328, device='cuda:0')
episode: 469 training return: tensor(235.4545, device='cuda:0')
episode: 470 training return: tensor(233.7207, device='cuda:0')
episode: 471 training return: tensor(240.7259, device='cuda:0')
epoch: 118 test_true_pfm: 127.3883498349335 sim_pfm: 248.02644831172657
episode: 472 training return: tensor(230.2309, device='cuda:0')
episode: 473 training return: tensor(247.8394, device='cuda:0')
episode: 474 training return: tensor(244.9715, device='cuda:0')
episode: 475 training return: tensor(241.5807, device='cuda:0')
epoch: 119 test_true_pfm: 128.85591904549918 sim_pfm: 252.3331920639379
episode: 476 training return: tensor(252.2912, device='cuda:0')
episode: 477 training return: tensor(251.2675, device='cuda:0')
episode: 478 training return: tensor(255.2777, device='cuda:0')
episode: 479 training return: tensor(234.6391, device='cuda:0')
epoch: 120 test_true_pfm: 130.1815620977449 sim_pfm: 251.4643918757327
episode: 480 training return: tensor(241.0292, device='cuda:0')
episode: 481 training return: tensor(238.8264, device='cuda:0')
episode: 482 training return: tensor(261.4062, device='cuda:0')
episode: 483 training return: tensor(262.8388, device='cuda:0')
epoch: 121 test_true_pfm: 129.68311338546394 sim_pfm: 239.2649671095889
episode: 484 training return: tensor(246.7578, device='cuda:0')
episode: 485 training return: tensor(234.1183, device='cuda:0')
episode: 486 training return: tensor(244.2907, device='cuda:0')
episode: 487 training return: tensor(249.4425, device='cuda:0')
epoch: 122 test_true_pfm: 126.14711639323589 sim_pfm: 247.4874755886849
episode: 488 training return: tensor(253.7106, device='cuda:0')
episode: 489 training return: tensor(246.5585, device='cuda:0')
episode: 490 training return: tensor(239.1390, device='cuda:0')
episode: 491 training return: tensor(233.9797, device='cuda:0')
epoch: 123 test_true_pfm: 127.43474745965213 sim_pfm: 251.5054588396917
episode: 492 training return: tensor(254.1410, device='cuda:0')
episode: 493 training return: tensor(246.5911, device='cuda:0')
episode: 494 training return: tensor(239.5965, device='cuda:0')
episode: 495 training return: tensor(233.8996, device='cuda:0')
epoch: 124 test_true_pfm: 129.2601097257636 sim_pfm: 261.38866025038294
episode: 496 training return: tensor(245.6523, device='cuda:0')
episode: 497 training return: tensor(239.9256, device='cuda:0')
episode: 498 training return: tensor(247.6305, device='cuda:0')
episode: 499 training return: tensor(255.8192, device='cuda:0')
epoch: 125 test_true_pfm: 133.039070729695 sim_pfm: 261.5750920994906
episode: 500 training return: tensor(260.9967, device='cuda:0')
episode: 501 training return: tensor(255.2237, device='cuda:0')
episode: 502 training return: tensor(250.2931, device='cuda:0')
episode: 503 training return: tensor(262.9124, device='cuda:0')
epoch: 126 test_true_pfm: 132.67763064143693 sim_pfm: 258.98089659663384
episode: 504 training return: tensor(250.4990, device='cuda:0')
episode: 505 training return: tensor(260.4869, device='cuda:0')
episode: 506 training return: tensor(243.2845, device='cuda:0')
episode: 507 training return: tensor(245.9499, device='cuda:0')
epoch: 127 test_true_pfm: 130.48264561888752 sim_pfm: 260.838717453985
episode: 508 training return: tensor(267.1301, device='cuda:0')
episode: 509 training return: tensor(245.6525, device='cuda:0')
episode: 510 training return: tensor(254.0661, device='cuda:0')
episode: 511 training return: tensor(231.4764, device='cuda:0')
epoch: 128 test_true_pfm: 130.33050012984216 sim_pfm: 239.34289984330536
episode: 512 training return: tensor(239.5522, device='cuda:0')
episode: 513 training return: tensor(258.4968, device='cuda:0')
episode: 514 training return: tensor(254.1303, device='cuda:0')
episode: 515 training return: tensor(236.5122, device='cuda:0')
epoch: 129 test_true_pfm: 130.4939273605907 sim_pfm: 251.86991584558274
episode: 516 training return: tensor(260.4665, device='cuda:0')
episode: 517 training return: tensor(249.6661, device='cuda:0')
episode: 518 training return: tensor(257.1467, device='cuda:0')
episode: 519 training return: tensor(235.0087, device='cuda:0')
epoch: 130 test_true_pfm: 134.06845745969343 sim_pfm: 242.2299789873883
episode: 520 training return: tensor(226.2654, device='cuda:0')
episode: 521 training return: tensor(235.3866, device='cuda:0')
episode: 522 training return: tensor(257.2889, device='cuda:0')
episode: 523 training return: tensor(242.0936, device='cuda:0')
epoch: 131 test_true_pfm: 129.72486225413783 sim_pfm: 266.51066590179687
episode: 524 training return: tensor(258.7897, device='cuda:0')
episode: 525 training return: tensor(240.6868, device='cuda:0')
episode: 526 training return: tensor(243.6631, device='cuda:0')
episode: 527 training return: tensor(246.3694, device='cuda:0')
epoch: 132 test_true_pfm: 126.00516340520524 sim_pfm: 252.32491446795757
episode: 528 training return: tensor(237.1762, device='cuda:0')
episode: 529 training return: tensor(242.4848, device='cuda:0')
episode: 530 training return: tensor(251.4211, device='cuda:0')
episode: 531 training return: tensor(261.6815, device='cuda:0')
epoch: 133 test_true_pfm: 132.04089522496392 sim_pfm: 258.51909189012366
episode: 532 training return: tensor(242.8208, device='cuda:0')
episode: 533 training return: tensor(257.8673, device='cuda:0')
episode: 534 training return: tensor(264.3394, device='cuda:0')
episode: 535 training return: tensor(244.5126, device='cuda:0')
epoch: 134 test_true_pfm: 129.81156725420402 sim_pfm: 247.68618300147355
episode: 536 training return: tensor(244.5412, device='cuda:0')
episode: 537 training return: tensor(263.1743, device='cuda:0')
episode: 538 training return: tensor(269.0628, device='cuda:0')
episode: 539 training return: tensor(253.3874, device='cuda:0')
epoch: 135 test_true_pfm: 129.3626271897968 sim_pfm: 246.27444346041884
episode: 540 training return: tensor(255.3817, device='cuda:0')
episode: 541 training return: tensor(261.0269, device='cuda:0')
episode: 542 training return: tensor(254.9706, device='cuda:0')
episode: 543 training return: tensor(255.8668, device='cuda:0')
epoch: 136 test_true_pfm: 127.5726176509277 sim_pfm: 252.83126763433103
episode: 544 training return: tensor(256.9748, device='cuda:0')
episode: 545 training return: tensor(245.1642, device='cuda:0')
episode: 546 training return: tensor(244.2308, device='cuda:0')
episode: 547 training return: tensor(257.5313, device='cuda:0')
epoch: 137 test_true_pfm: 129.2877656598434 sim_pfm: 249.64801694033667
episode: 548 training return: tensor(249.5449, device='cuda:0')
episode: 549 training return: tensor(261.0847, device='cuda:0')
episode: 550 training return: tensor(256.2522, device='cuda:0')
episode: 551 training return: tensor(239.9198, device='cuda:0')
epoch: 138 test_true_pfm: 129.10500177232242 sim_pfm: 245.68489872339995
episode: 552 training return: tensor(239.5736, device='cuda:0')
episode: 553 training return: tensor(231.2017, device='cuda:0')
episode: 554 training return: tensor(226.5473, device='cuda:0')
episode: 555 training return: tensor(263.4953, device='cuda:0')
epoch: 139 test_true_pfm: 131.97334392867023 sim_pfm: 248.93258948365693
episode: 556 training return: tensor(246.9494, device='cuda:0')
episode: 557 training return: tensor(234.7068, device='cuda:0')
episode: 558 training return: tensor(247.5926, device='cuda:0')
episode: 559 training return: tensor(261.9368, device='cuda:0')
epoch: 140 test_true_pfm: 130.2008536889714 sim_pfm: 247.96053207374644
episode: 560 training return: tensor(262.0274, device='cuda:0')
episode: 561 training return: tensor(250.0008, device='cuda:0')
episode: 562 training return: tensor(240.7727, device='cuda:0')
episode: 563 training return: tensor(245.7520, device='cuda:0')
epoch: 141 test_true_pfm: 135.30489941337277 sim_pfm: 254.97573398083915
episode: 564 training return: tensor(244.6476, device='cuda:0')
episode: 565 training return: tensor(260.9378, device='cuda:0')
episode: 566 training return: tensor(254.2545, device='cuda:0')
episode: 567 training return: tensor(259.0347, device='cuda:0')
epoch: 142 test_true_pfm: 126.92616830678676 sim_pfm: 245.320827402093
episode: 568 training return: tensor(239.9057, device='cuda:0')
episode: 569 training return: tensor(245.8046, device='cuda:0')
episode: 570 training return: tensor(258.8683, device='cuda:0')
episode: 571 training return: tensor(258.2025, device='cuda:0')
epoch: 143 test_true_pfm: 132.25037617359484 sim_pfm: 260.83735382578453
episode: 572 training return: tensor(242.1629, device='cuda:0')
episode: 573 training return: tensor(250.5163, device='cuda:0')
episode: 574 training return: tensor(255.8910, device='cuda:0')
episode: 575 training return: tensor(252.4964, device='cuda:0')
epoch: 144 test_true_pfm: 131.2155235990488 sim_pfm: 260.88781867832296
episode: 576 training return: tensor(256.3291, device='cuda:0')
episode: 577 training return: tensor(271.6081, device='cuda:0')
episode: 578 training return: tensor(227.1608, device='cuda:0')
episode: 579 training return: tensor(240.8879, device='cuda:0')
epoch: 145 test_true_pfm: 131.51130766484505 sim_pfm: 253.7171287803445
episode: 580 training return: tensor(259.3875, device='cuda:0')
episode: 581 training return: tensor(255.5717, device='cuda:0')
episode: 582 training return: tensor(254.3453, device='cuda:0')
episode: 583 training return: tensor(261.3136, device='cuda:0')
epoch: 146 test_true_pfm: 130.87382144993597 sim_pfm: 255.06401904572266
episode: 584 training return: tensor(267.1736, device='cuda:0')
episode: 585 training return: tensor(246.5686, device='cuda:0')
episode: 586 training return: tensor(267.5349, device='cuda:0')
episode: 587 training return: tensor(240.8590, device='cuda:0')
epoch: 147 test_true_pfm: 131.43260543198454 sim_pfm: 258.7031650404795
episode: 588 training return: tensor(250.0468, device='cuda:0')
episode: 589 training return: tensor(256.8878, device='cuda:0')
episode: 590 training return: tensor(262.9459, device='cuda:0')
episode: 591 training return: tensor(236.4708, device='cuda:0')
epoch: 148 test_true_pfm: 130.79327367086404 sim_pfm: 246.01328757569428
episode: 592 training return: tensor(232.5201, device='cuda:0')
episode: 593 training return: tensor(251.6509, device='cuda:0')
episode: 594 training return: tensor(260.7009, device='cuda:0')
episode: 595 training return: tensor(252.3285, device='cuda:0')
epoch: 149 test_true_pfm: 132.80240827024 sim_pfm: 246.4364971616189
episode: 596 training return: tensor(233.7222, device='cuda:0')
episode: 597 training return: tensor(268.3366, device='cuda:0')
episode: 598 training return: tensor(276.0866, device='cuda:0')
episode: 599 training return: tensor(253.6357, device='cuda:0')
epoch: 150 test_true_pfm: 130.53976922561526 sim_pfm: 247.75504852996673
