['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '3', '--data', '30000', '--sub']
epoch: 0 training_loss 0.2774528510123491 test_loss: 0.19690101146697997
epoch: 1 training_loss 0.1685078752040863 test_loss: 0.14808326959609985
epoch: 2 training_loss 0.14301876921206713 test_loss: 0.12821861505508422
epoch: 3 training_loss 0.13106155790388585 test_loss: 0.13156375885009766
epoch: 4 training_loss 0.1282054967060685 test_loss: 0.1256813645362854
epoch: 5 training_loss 0.11934484291821718 test_loss: 0.11602606773376464
epoch: 6 training_loss 0.11263129577040672 test_loss: 0.1202346682548523
epoch: 7 training_loss 0.11277446657419204 test_loss: 0.11546417474746704
epoch: 8 training_loss 0.11050758279860019 test_loss: 0.12124485969543457
epoch: 9 training_loss 0.11236189592629671 test_loss: 0.12228684425354004
epoch: 10 training_loss 0.10667963910847902 test_loss: 0.1082759976387024
epoch: 11 training_loss 0.10684691336005926 test_loss: 0.10503219366073609
epoch: 12 training_loss 0.10129603620618582 test_loss: 0.09730397462844849
epoch: 13 training_loss 0.09259376592934132 test_loss: 0.09678890705108642
epoch: 14 training_loss 0.10571296511217952 test_loss: 0.10155189037322998
epoch: 15 training_loss 0.10198915604501962 test_loss: 0.10319539308547973
epoch: 16 training_loss 0.10195953141897916 test_loss: 0.09742166996002197
epoch: 17 training_loss 0.09913728959858417 test_loss: 0.12068519592285157
epoch: 18 training_loss 0.09552531233057379 test_loss: 0.1051476240158081
epoch: 19 training_loss 0.1009118477255106 test_loss: 0.10702604055404663
epoch: 20 training_loss 0.10020937029272318 test_loss: 0.10124400854110718
epoch: 21 training_loss 0.09815144754946231 test_loss: 0.09422131776809692
epoch: 22 training_loss 0.10152106057852507 test_loss: 0.10036113262176513
epoch: 23 training_loss 0.09041198182851076 test_loss: 0.09484453797340393
epoch: 24 training_loss 0.09326848868280649 test_loss: 0.11818342208862305
epoch: 25 training_loss 0.09508158320561051 test_loss: 0.09388704895973206
epoch: 26 training_loss 0.09836162922903896 test_loss: 0.10939180850982666
epoch: 27 training_loss 0.0998757653310895 test_loss: 0.10555822849273681
epoch: 28 training_loss 0.08809489030390978 test_loss: 0.09462381601333618
epoch: 29 training_loss 0.09300070604309439 test_loss: 0.10249117612838746
epoch: 30 training_loss 0.09395734455436468 test_loss: 0.1056124210357666
epoch: 31 training_loss 0.09298146680928766 test_loss: 0.08330453038215638
epoch: 32 training_loss 0.08877775743603707 test_loss: 0.10494781732559204
epoch: 33 training_loss 0.09449551774188876 test_loss: 0.11464052200317383
epoch: 34 training_loss 0.09783858170732856 test_loss: 0.10256102085113525
epoch: 35 training_loss 0.09074898324906826 test_loss: 0.11602059602737427
epoch: 36 training_loss 0.0940169789083302 test_loss: 0.10309182405471802
epoch: 37 training_loss 0.0980713551491499 test_loss: 0.12080520391464233
epoch: 38 training_loss 0.0853107737749815 test_loss: 0.10565941333770752
epoch: 39 training_loss 0.08897996056824922 test_loss: 0.11421457529067994
epoch: 40 training_loss 0.09482090633362532 test_loss: 0.09747064113616943
epoch: 41 training_loss 0.09297399310395121 test_loss: 0.10817983150482177
epoch: 42 training_loss 0.09625518649816513 test_loss: 0.10136356353759765
epoch: 43 training_loss 0.0925861094892025 test_loss: 0.09200184941291809
epoch: 44 training_loss 0.09153358617797494 test_loss: 0.08530068993568421
epoch: 45 training_loss 0.09283256581053138 test_loss: 0.09903092384338379
epoch: 46 training_loss 0.09160397186875344 test_loss: 0.09954139590263367
epoch: 47 training_loss 0.08978189907968044 test_loss: 0.09914531707763671
epoch: 48 training_loss 0.08590866262093186 test_loss: 0.106632399559021
epoch: 49 training_loss 0.0886158400401473 test_loss: 0.0936073899269104
epoch: 50 training_loss 0.0895839612558484 test_loss: 0.11313276290893555
epoch: 51 training_loss 0.0918385799229145 test_loss: 0.09828640818595887
epoch: 52 training_loss 0.09396471144631505 test_loss: 0.10257799625396728
epoch: 53 training_loss 0.09437995797023177 test_loss: 0.09889246821403504
epoch: 54 training_loss 0.09013505406677723 test_loss: 0.09313101768493652
epoch: 55 training_loss 0.08360980957746506 test_loss: 0.09753865599632264
epoch: 56 training_loss 0.08786458916962146 test_loss: 0.12153286933898926
epoch: 57 training_loss 0.09098181605339051 test_loss: 0.0911052942276001
epoch: 58 training_loss 0.09417230553925038 test_loss: 0.10888051986694336
epoch: 59 training_loss 0.08981098975986242 test_loss: 0.0987766444683075
epoch: 60 training_loss 0.08573374373838305 test_loss: 0.10270546674728394
epoch: 61 training_loss 0.09232246477156877 test_loss: 0.09837832450866699
epoch: 62 training_loss 0.08957322518341243 test_loss: 0.0921625018119812
epoch: 63 training_loss 0.09076165815815329 test_loss: 0.09238827824592591
epoch: 64 training_loss 0.09036122845485807 test_loss: 0.08390777111053467
epoch: 65 training_loss 0.08401952454820276 test_loss: 0.1213446855545044
epoch: 66 training_loss 0.09043920133262873 test_loss: 0.0959142029285431
epoch: 67 training_loss 0.08643325988203288 test_loss: 0.09118463993072509
epoch: 68 training_loss 0.08447360644116998 test_loss: 0.10180004835128784
epoch: 69 training_loss 0.08704134522005916 test_loss: 0.09809038639068604
epoch: 70 training_loss 0.08221482891589403 test_loss: 0.09397856593132019
epoch: 71 training_loss 0.09322029474191368 test_loss: 0.0945061445236206
epoch: 72 training_loss 0.09470159944146872 test_loss: 0.09632185101509094
epoch: 73 training_loss 0.0882582238689065 test_loss: 0.10161499977111817
epoch: 74 training_loss 0.08534511217847467 test_loss: 0.08953961133956909
epoch: 75 training_loss 0.0876569264754653 test_loss: 0.0869364857673645
epoch: 76 training_loss 0.08806877860799432 test_loss: 0.10131220817565918
epoch: 77 training_loss 0.08731081934645772 test_loss: 0.08793189525604247
epoch: 78 training_loss 0.08246160491369664 test_loss: 0.09473666548728943
epoch: 79 training_loss 0.0937307195365429 test_loss: 0.10681661367416381
epoch: 80 training_loss 0.09204051671549678 test_loss: 0.10413696765899658
epoch: 81 training_loss 0.08652357582002879 test_loss: 0.08779705166816712
epoch: 82 training_loss 0.08555584305897355 test_loss: 0.12455049753189087
epoch: 83 training_loss 0.08121615482494235 test_loss: 0.11292437314987183
epoch: 84 training_loss 0.08862161029130221 test_loss: 0.08951915502548217
epoch: 85 training_loss 0.08786597667261958 test_loss: 0.10607835054397582
epoch: 86 training_loss 0.0838771322928369 test_loss: 0.08148092031478882
epoch: 87 training_loss 0.08934420023113489 test_loss: 0.1020920753479004
epoch: 88 training_loss 0.08689138753339648 test_loss: 0.10530238151550293
epoch: 89 training_loss 0.08986968357115983 test_loss: 0.08714464902877808
epoch: 90 training_loss 0.08809696815907955 test_loss: 0.10478202104568482
epoch: 91 training_loss 0.08108751134946943 test_loss: 0.09540323615074157
epoch: 92 training_loss 0.08644669111818075 test_loss: 0.0783378541469574
epoch: 93 training_loss 0.0838932241871953 test_loss: 0.09529620409011841
epoch: 94 training_loss 0.08158701054751873 test_loss: 0.09010662436485291
epoch: 95 training_loss 0.08372882286086679 test_loss: 0.1054728865623474
epoch: 96 training_loss 0.0861652489937842 test_loss: 0.11666630506515503
epoch: 97 training_loss 0.07733524240553379 test_loss: 0.10610448122024536
epoch: 98 training_loss 0.08571073915809393 test_loss: 0.11316385269165039
epoch: 99 training_loss 0.08365365508943796 test_loss: 0.09435980319976807
epoch: 100 training_loss 0.08802013512700796 test_loss: 0.08270409107208251
epoch: 101 training_loss 0.08828362924978136 test_loss: 0.0972745180130005
epoch: 102 training_loss 0.08795167315751314 test_loss: 0.10538705587387084
epoch: 103 training_loss 0.08197982734069229 test_loss: 0.0994346559047699
epoch: 104 training_loss 0.08653508398681879 test_loss: 0.09652963280677795
epoch: 105 training_loss 0.08826319335028529 test_loss: 0.0947379469871521
epoch: 106 training_loss 0.08129324423149228 test_loss: 0.09814940094947815
epoch: 107 training_loss 0.08604786079376936 test_loss: 0.10129547119140625
epoch: 108 training_loss 0.08261277684941888 test_loss: 0.08816251754760743
epoch: 109 training_loss 0.08906623382121324 test_loss: 0.11992323398590088
epoch: 110 training_loss 0.08268310066312551 test_loss: 0.08936139345169067
epoch: 111 training_loss 0.09338832199573517 test_loss: 0.09598392844200135
epoch: 112 training_loss 0.08181814262643457 test_loss: 0.08901101350784302
epoch: 113 training_loss 0.08731527967378497 test_loss: 0.09799967408180237
epoch: 114 training_loss 0.08542861398309469 test_loss: 0.10783361196517945
epoch: 115 training_loss 0.083183404058218 test_loss: 0.1159407377243042
epoch: 116 training_loss 0.08690065098926425 test_loss: 0.10487540960311889
epoch: 117 training_loss 0.08412103893235326 test_loss: 0.10637481212615967
epoch: 118 training_loss 0.08376223111525177 test_loss: 0.08017723560333252
epoch: 119 training_loss 0.0857384062372148 test_loss: 0.10759568214416504
epoch: 120 training_loss 0.0848052965849638 test_loss: 0.09905207753181458
epoch: 121 training_loss 0.0812799091823399 test_loss: 0.09938261508941651
epoch: 122 training_loss 0.08663782082498074 test_loss: 0.10966792106628417
epoch: 123 training_loss 0.08369551904499531 test_loss: 0.09359048008918762
epoch: 124 training_loss 0.0786726194806397 test_loss: 0.10044400691986084
epoch: 125 training_loss 0.08430658483877779 test_loss: 0.1091600775718689
epoch: 126 training_loss 0.08657382130622863 test_loss: 0.1041483759880066
epoch: 127 training_loss 0.08316055450588465 test_loss: 0.10411447286605835
epoch: 128 training_loss 0.08649656873196364 test_loss: 0.10118652582168579
epoch: 129 training_loss 0.08475199018605054 test_loss: 0.09906903505325318
epoch: 130 training_loss 0.09001452576369047 test_loss: 0.09293798208236695
epoch: 131 training_loss 0.08178213087841868 test_loss: 0.09072615504264832
epoch: 132 training_loss 0.08478940797969699 test_loss: 0.11074351072311402
epoch: 133 training_loss 0.08385134238749742 test_loss: 0.09803877472877502
epoch: 134 training_loss 0.08407652678899467 test_loss: 0.09671663641929626
epoch: 135 training_loss 0.08161701258271932 test_loss: 0.10109094381332398
epoch: 136 training_loss 0.07910433185286819 test_loss: 0.11113885641098023
epoch: 137 training_loss 0.07881565175950528 test_loss: 0.08968081474304199
epoch: 138 training_loss 0.07736942145973444 test_loss: 0.10857191085815429
epoch: 139 training_loss 0.08148137403652073 test_loss: 0.08110079765319825
epoch: 140 training_loss 0.07214480029419064 test_loss: 0.10012589693069458
epoch: 141 training_loss 0.08537872195243836 test_loss: 0.10247340202331542
epoch: 142 training_loss 0.08379120394587516 test_loss: 0.10632981061935425
epoch: 143 training_loss 0.07663088027387857 test_loss: 0.08176425695419312
epoch: 144 training_loss 0.08302703617140651 test_loss: 0.07910419702529907
epoch: 145 training_loss 0.0791390617378056 test_loss: 0.10020227432250976
epoch: 146 training_loss 0.08161422340199351 test_loss: 0.0954967737197876
epoch: 147 training_loss 0.0826751414500177 test_loss: 0.11328834295272827
epoch: 148 training_loss 0.07768097782507538 test_loss: 0.10334107875823975
epoch: 149 training_loss 0.08441388066858053 test_loss: 0.10439931154251099
epoch: 0 training_loss 42.35381254196167 test_loss: 21.167906188964842
epoch: 1 training_loss 17.383440723419188 test_loss: 14.31903076171875
epoch: 2 training_loss 12.708095607757569 test_loss: 11.417832946777343
epoch: 3 training_loss 10.595110778808595 test_loss: 10.097789001464843
epoch: 4 training_loss 9.11358603477478 test_loss: 8.832736206054687
epoch: 5 training_loss 8.353316378593444 test_loss: 8.162012481689453
epoch: 6 training_loss 7.649501624107361 test_loss: 7.628349304199219
epoch: 7 training_loss 7.2529816055297855 test_loss: 7.211427307128906
epoch: 8 training_loss 6.654669990539551 test_loss: 6.489247894287109
epoch: 9 training_loss 6.4109132194519045 test_loss: 6.557856750488281
epoch: 10 training_loss 6.074576683044434 test_loss: 6.051536941528321
epoch: 11 training_loss 5.8280617618560795 test_loss: 5.790167999267578
epoch: 12 training_loss 5.599807796478271 test_loss: 5.478800201416016
epoch: 13 training_loss 5.42724988937378 test_loss: 5.454084777832032
epoch: 14 training_loss 5.200096156597137 test_loss: 5.16186637878418
epoch: 15 training_loss 4.972781732082367 test_loss: 5.161602020263672
epoch: 16 training_loss 4.9386522221565246 test_loss: 4.835416412353515
epoch: 17 training_loss 4.782516016960144 test_loss: 4.826380920410156
epoch: 18 training_loss 4.591661801338196 test_loss: 4.652445220947266
epoch: 19 training_loss 4.624356734752655 test_loss: 4.597741317749024
epoch: 20 training_loss 4.3191432285308835 test_loss: 4.489670944213867
epoch: 21 training_loss 4.331457555294037 test_loss: 4.538278579711914
epoch: 22 training_loss 4.250807163715362 test_loss: 4.274750137329102
epoch: 23 training_loss 4.179880828857422 test_loss: 4.191984558105469
epoch: 24 training_loss 4.034656715393067 test_loss: 4.1879524230957035
epoch: 25 training_loss 3.9614788150787352 test_loss: 4.0045616149902346
epoch: 26 training_loss 4.076994690895081 test_loss: 4.04552001953125
epoch: 27 training_loss 3.8398412966728213 test_loss: 3.9227069854736327
epoch: 28 training_loss 3.875819571018219 test_loss: 3.766989898681641
epoch: 29 training_loss 3.7452741742134092 test_loss: 3.7668197631835936
epoch: 30 training_loss 3.743589379787445 test_loss: 3.853000259399414
epoch: 31 training_loss 3.6809258580207826 test_loss: 3.6912559509277343
epoch: 32 training_loss 3.5671871066093446 test_loss: 3.788558578491211
epoch: 33 training_loss 3.550586447715759 test_loss: 3.692919921875
epoch: 34 training_loss 3.548000431060791 test_loss: 3.54425163269043
epoch: 35 training_loss 3.450370900630951 test_loss: 3.560520553588867
epoch: 36 training_loss 3.572577815055847 test_loss: 3.435835266113281
epoch: 37 training_loss 3.4156917238235476 test_loss: 3.446417236328125
epoch: 38 training_loss 3.384190309047699 test_loss: 3.3904937744140624
epoch: 39 training_loss 3.264421124458313 test_loss: 3.5983692169189454
epoch: 40 training_loss 3.270897078514099 test_loss: 3.2809589385986326
epoch: 41 training_loss 3.2572568821907044 test_loss: 3.2025177001953127
epoch: 42 training_loss 3.2789464735984803 test_loss: 3.336224365234375
epoch: 43 training_loss 3.196933047771454 test_loss: 3.186500358581543
epoch: 44 training_loss 3.217898724079132 test_loss: 3.2684036254882813
epoch: 45 training_loss 3.114089012145996 test_loss: 3.273354339599609
epoch: 46 training_loss 3.1916531109809876 test_loss: 3.213827133178711
epoch: 47 training_loss 3.124507842063904 test_loss: 3.261372756958008
epoch: 48 training_loss 3.0851753878593446 test_loss: 3.2353981018066404
epoch: 49 training_loss 3.0854738187789916 test_loss: 3.119200325012207
epoch: 50 training_loss 3.0300025939941406 test_loss: 2.9670061111450194
epoch: 51 training_loss 3.045571572780609 test_loss: 3.23768310546875
epoch: 52 training_loss 2.9355321526527405 test_loss: 3.1221813201904296
epoch: 53 training_loss 2.9793693017959595 test_loss: 3.0299104690551757
epoch: 54 training_loss 2.9561972975730897 test_loss: 3.0986698150634764
epoch: 55 training_loss 3.0263761019706727 test_loss: 3.1066768646240233
epoch: 56 training_loss 2.8763365745544434 test_loss: 3.0031761169433593
epoch: 57 training_loss 2.8971907091140747 test_loss: 3.0839517593383787
epoch: 58 training_loss 2.8493550753593446 test_loss: 2.8557527542114256
epoch: 59 training_loss 2.8345764219760894 test_loss: 2.900543785095215
epoch: 60 training_loss 2.8524712681770326 test_loss: 3.0075103759765627
epoch: 61 training_loss 2.8831960129737855 test_loss: 2.9849660873413084
epoch: 62 training_loss 2.8166369366645814 test_loss: 2.937977409362793
epoch: 63 training_loss 2.742362141609192 test_loss: 3.0090410232543947
epoch: 64 training_loss 2.7870752882957457 test_loss: 2.804757308959961
epoch: 65 training_loss 2.8080788373947145 test_loss: 2.8749338150024415
epoch: 66 training_loss 2.8414164447784422 test_loss: 2.7765153884887694
epoch: 67 training_loss 2.797908251285553 test_loss: 2.990222358703613
epoch: 68 training_loss 2.75649644613266 test_loss: 2.7358076095581056
epoch: 69 training_loss 2.7254809498786927 test_loss: 2.8776599884033205
epoch: 70 training_loss 2.640843712091446 test_loss: 2.7415369033813475
epoch: 71 training_loss 2.710210565328598 test_loss: 2.8367284774780273
epoch: 72 training_loss 2.6828470838069918 test_loss: 2.805680847167969
epoch: 73 training_loss 2.6678771674633026 test_loss: 2.6973903656005858
epoch: 74 training_loss 2.6570831763744356 test_loss: 2.9148157119750975
epoch: 75 training_loss 2.637583291530609 test_loss: 2.6362871170043944
epoch: 76 training_loss 2.6425722885131835 test_loss: 2.7505170822143556
epoch: 77 training_loss 2.595004551410675 test_loss: 2.6324369430541994
epoch: 78 training_loss 2.5633939909934997 test_loss: 2.7539209365844726
epoch: 79 training_loss 2.659069769382477 test_loss: 2.624404716491699
epoch: 80 training_loss 2.5865926599502562 test_loss: 2.6872413635253904
epoch: 81 training_loss 2.551019868850708 test_loss: 2.6319313049316406
epoch: 82 training_loss 2.523848810195923 test_loss: 2.6157697677612304
epoch: 83 training_loss 2.6049714350700377 test_loss: 2.696264457702637
epoch: 84 training_loss 2.5926660776138304 test_loss: 2.646685981750488
epoch: 85 training_loss 2.5355273032188417 test_loss: 2.854850959777832
epoch: 86 training_loss 2.5329377162456512 test_loss: 2.5693357467651365
epoch: 87 training_loss 2.521439006328583 test_loss: 2.555938148498535
epoch: 88 training_loss 2.486127495765686 test_loss: 2.447961616516113
epoch: 89 training_loss 2.480826027393341 test_loss: 2.6014785766601562
epoch: 90 training_loss 2.5047483503818513 test_loss: 2.583546447753906
epoch: 91 training_loss 2.5340379846096037 test_loss: 2.636037826538086
epoch: 92 training_loss 2.455854889154434 test_loss: 2.698053741455078
epoch: 93 training_loss 2.417748085260391 test_loss: 2.5362939834594727
epoch: 94 training_loss 2.5042947697639466 test_loss: 2.5914968490600585
epoch: 95 training_loss 2.504440574645996 test_loss: 2.593696403503418
epoch: 96 training_loss 2.4009843575954437 test_loss: 2.5204320907592774
epoch: 97 training_loss 2.4304108834266662 test_loss: 2.5745517730712892
epoch: 98 training_loss 2.4737480688095093 test_loss: 2.5648412704467773
epoch: 99 training_loss 2.50116583943367 test_loss: 2.431050491333008
epoch: 100 training_loss 2.440074018239975 test_loss: 2.4478404998779295
epoch: 101 training_loss 2.4461502373218535 test_loss: 2.5323036193847654
epoch: 102 training_loss 2.3871356046199796 test_loss: 2.5926414489746095
epoch: 103 training_loss 2.3877517116069793 test_loss: 2.585118865966797
epoch: 104 training_loss 2.4308567690849303 test_loss: 2.535373497009277
epoch: 105 training_loss 2.380040698051453 test_loss: 2.4919036865234374
epoch: 106 training_loss 2.4576250350475313 test_loss: 2.445020294189453
epoch: 107 training_loss 2.425471329689026 test_loss: 2.482234573364258
epoch: 108 training_loss 2.3749678182601928 test_loss: 2.5211830139160156
epoch: 109 training_loss 2.3418326151371 test_loss: 2.440194320678711
epoch: 110 training_loss 2.356492431163788 test_loss: 2.4365352630615233
epoch: 111 training_loss 2.3411495983600616 test_loss: 2.4237165451049805
epoch: 112 training_loss 2.3424536645412446 test_loss: 2.4244422912597656
epoch: 113 training_loss 2.3445506381988523 test_loss: 2.440705680847168
epoch: 114 training_loss 2.3090976274013517 test_loss: 2.4391895294189454
epoch: 115 training_loss 2.4029632592201233 test_loss: 2.5375091552734377
epoch: 116 training_loss 2.3717630922794344 test_loss: 2.5047348022460936
epoch: 117 training_loss 2.402729196548462 test_loss: 2.5039968490600586
epoch: 118 training_loss 2.325912067890167 test_loss: 2.402415084838867
epoch: 119 training_loss 2.3467873513698576 test_loss: 2.423051643371582
epoch: 120 training_loss 2.373551675081253 test_loss: 2.411952590942383
epoch: 121 training_loss 2.333220949172974 test_loss: 2.3269063949584963
epoch: 122 training_loss 2.295000891685486 test_loss: 2.4810245513916014
epoch: 123 training_loss 2.3129756474494934 test_loss: 2.3789220809936524
epoch: 124 training_loss 2.291886909008026 test_loss: 2.426339530944824
epoch: 125 training_loss 2.3317082965373994 test_loss: 2.4009798049926756
epoch: 126 training_loss 2.3263332760334015 test_loss: 2.2142967224121093
epoch: 127 training_loss 2.288500336408615 test_loss: 2.518509292602539
epoch: 128 training_loss 2.3021079325675964 test_loss: 2.3766502380371093
epoch: 129 training_loss 2.2548427212238313 test_loss: 2.265211296081543
epoch: 130 training_loss 2.2837399196624757 test_loss: 2.335740852355957
epoch: 131 training_loss 2.2571392035484314 test_loss: 2.3435037612915037
epoch: 132 training_loss 2.2373941957950594 test_loss: 2.3329822540283205
epoch: 133 training_loss 2.2463500928878783 test_loss: 2.3718542098999023
epoch: 134 training_loss 2.2980629062652587 test_loss: 2.3660106658935547
epoch: 135 training_loss 2.2795797097682953 test_loss: 2.377135467529297
epoch: 136 training_loss 2.265247517824173 test_loss: 2.347847747802734
epoch: 137 training_loss 2.256999281644821 test_loss: 2.3106662750244142
epoch: 138 training_loss 2.2228935348987577 test_loss: 2.242019462585449
epoch: 139 training_loss 2.283783328533173 test_loss: 2.3185752868652343
epoch: 140 training_loss 2.246860684156418 test_loss: 2.285704803466797
epoch: 141 training_loss 2.2242319738864897 test_loss: 2.4173254013061523
epoch: 142 training_loss 2.2089771485328673 test_loss: 2.333694076538086
epoch: 143 training_loss 2.254467247724533 test_loss: 2.319285774230957
epoch: 144 training_loss 2.2040576457977297 test_loss: 2.337721252441406
epoch: 145 training_loss 2.2634759294986724 test_loss: 2.327960395812988
epoch: 146 training_loss 2.1878721678256987 test_loss: 2.2232532501220703
epoch: 147 training_loss 2.186579291820526 test_loss: 2.3079105377197267
epoch: 148 training_loss 2.20476420879364 test_loss: 2.224483871459961
epoch: 149 training_loss 2.2273726892471313 test_loss: 2.260013389587402
2784.3429903656784
episode: 0 training return: tensor(139.9382, device='cuda:0')
episode: 1 training return: tensor(218.1517, device='cuda:0')
episode: 2 training return: tensor(-175.7586, device='cuda:0')
episode: 3 training return: tensor(-62.6557, device='cuda:0')
epoch: 1 test_true_pfm: 2173.3002704796218 sim_pfm: 211.196397394718
episode: 4 training return: tensor(194.4916, device='cuda:0')
episode: 5 training return: tensor(182.8084, device='cuda:0')
episode: 6 training return: tensor(-180.1705, device='cuda:0')
episode: 7 training return: tensor(36.7821, device='cuda:0')
epoch: 2 test_true_pfm: 2922.3172045528067 sim_pfm: 214.5475827526922
episode: 8 training return: tensor(170.6650, device='cuda:0')
episode: 9 training return: tensor(-303.6172, device='cuda:0')
episode: 10 training return: tensor(-188.5649, device='cuda:0')
episode: 11 training return: tensor(190.5075, device='cuda:0')
epoch: 3 test_true_pfm: 2282.935746798049 sim_pfm: -95.78974018260487
episode: 12 training return: tensor(-246.5907, device='cuda:0')
episode: 13 training return: tensor(313.0116, device='cuda:0')
episode: 14 training return: tensor(223.2445, device='cuda:0')
episode: 15 training return: tensor(-179.7777, device='cuda:0')
epoch: 4 test_true_pfm: 2572.1113294288643 sim_pfm: -142.38528816854037
episode: 16 training return: tensor(-223.2755, device='cuda:0')
episode: 17 training return: tensor(291.6069, device='cuda:0')
episode: 18 training return: tensor(241.7490, device='cuda:0')
episode: 19 training return: tensor(136.6612, device='cuda:0')
epoch: 5 test_true_pfm: 2777.2419160414206 sim_pfm: 104.84286522195907
episode: 20 training return: tensor(-7.7927, device='cuda:0')
episode: 21 training return: tensor(201.2569, device='cuda:0')
episode: 22 training return: tensor(-218.7626, device='cuda:0')
episode: 23 training return: tensor(250.4700, device='cuda:0')
epoch: 6 test_true_pfm: 3088.7715491844046 sim_pfm: 80.85423620935762
episode: 24 training return: tensor(-229.1664, device='cuda:0')
episode: 25 training return: tensor(-96.5105, device='cuda:0')
episode: 26 training return: tensor(251.2263, device='cuda:0')
episode: 27 training return: tensor(227.0800, device='cuda:0')
epoch: 7 test_true_pfm: 2159.063071592817 sim_pfm: -99.32574412053994
episode: 28 training return: tensor(-228.6374, device='cuda:0')
episode: 29 training return: tensor(55.8693, device='cuda:0')
episode: 30 training return: tensor(26.0897, device='cuda:0')
episode: 31 training return: tensor(-331.4250, device='cuda:0')
epoch: 8 test_true_pfm: 2850.4446418464854 sim_pfm: -219.98412994671767
episode: 32 training return: tensor(56.0506, device='cuda:0')
episode: 33 training return: tensor(-313.2034, device='cuda:0')
episode: 34 training return: tensor(219.6553, device='cuda:0')
episode: 35 training return: tensor(-183.0051, device='cuda:0')
epoch: 9 test_true_pfm: 3314.9804618734884 sim_pfm: 171.5730700198425
episode: 36 training return: tensor(-287.3192, device='cuda:0')
episode: 37 training return: tensor(214.1980, device='cuda:0')
episode: 38 training return: tensor(-241.2877, device='cuda:0')
episode: 39 training return: tensor(242.3883, device='cuda:0')
epoch: 10 test_true_pfm: 2535.283988508331 sim_pfm: 87.04153933389655
episode: 40 training return: tensor(237.4923, device='cuda:0')
episode: 41 training return: tensor(128.9705, device='cuda:0')
episode: 42 training return: tensor(194.9653, device='cuda:0')
episode: 43 training return: tensor(204.7462, device='cuda:0')
epoch: 11 test_true_pfm: 2826.20787659997 sim_pfm: 76.0788236692703
episode: 44 training return: tensor(156.9822, device='cuda:0')
episode: 45 training return: tensor(251.0448, device='cuda:0')
episode: 46 training return: tensor(-183.8020, device='cuda:0')
episode: 47 training return: tensor(280.1974, device='cuda:0')
epoch: 12 test_true_pfm: 3321.7333203253875 sim_pfm: 35.32622110901866
episode: 48 training return: tensor(220.3978, device='cuda:0')
episode: 49 training return: tensor(144.0354, device='cuda:0')
episode: 50 training return: tensor(65.8713, device='cuda:0')
episode: 51 training return: tensor(266.3563, device='cuda:0')
epoch: 13 test_true_pfm: 3335.3593021802276 sim_pfm: 258.70973190802033
episode: 52 training return: tensor(245.0515, device='cuda:0')
episode: 53 training return: tensor(-154.2293, device='cuda:0')
episode: 54 training return: tensor(299.2141, device='cuda:0')
episode: 55 training return: tensor(252.1669, device='cuda:0')
epoch: 14 test_true_pfm: 2818.6820136450265 sim_pfm: 220.65368698065868
episode: 56 training return: tensor(259.0964, device='cuda:0')
episode: 57 training return: tensor(214.6586, device='cuda:0')
episode: 58 training return: tensor(245.2508, device='cuda:0')
episode: 59 training return: tensor(276.5269, device='cuda:0')
epoch: 15 test_true_pfm: 2852.003873302922 sim_pfm: 37.323638114340916
episode: 60 training return: tensor(202.2278, device='cuda:0')
episode: 61 training return: tensor(247.4010, device='cuda:0')
episode: 62 training return: tensor(171.7763, device='cuda:0')
episode: 63 training return: tensor(-323.4747, device='cuda:0')
epoch: 16 test_true_pfm: 3303.5325195280057 sim_pfm: 228.39578936159765
episode: 64 training return: tensor(278.4260, device='cuda:0')
episode: 65 training return: tensor(110.5270, device='cuda:0')
episode: 66 training return: tensor(142.0425, device='cuda:0')
episode: 67 training return: tensor(143.0404, device='cuda:0')
epoch: 17 test_true_pfm: 2874.3209028871443 sim_pfm: 241.9527396512955
episode: 68 training return: tensor(292.4464, device='cuda:0')
episode: 69 training return: tensor(257.5821, device='cuda:0')
episode: 70 training return: tensor(217.8544, device='cuda:0')
episode: 71 training return: tensor(176.4402, device='cuda:0')
epoch: 18 test_true_pfm: 3361.077039143441 sim_pfm: 66.23165939441726
episode: 72 training return: tensor(84.7088, device='cuda:0')
episode: 73 training return: tensor(-242.5793, device='cuda:0')
episode: 74 training return: tensor(-322.0217, device='cuda:0')
episode: 75 training return: tensor(-30.8171, device='cuda:0')
epoch: 19 test_true_pfm: 2790.496078478644 sim_pfm: 238.3571080423426
episode: 76 training return: tensor(190.4368, device='cuda:0')
episode: 77 training return: tensor(140.4007, device='cuda:0')
episode: 78 training return: tensor(-394.9781, device='cuda:0')
episode: 79 training return: tensor(290.4178, device='cuda:0')
epoch: 20 test_true_pfm: 3344.8849719688824 sim_pfm: 216.5626541961295
episode: 80 training return: tensor(244.1292, device='cuda:0')
episode: 81 training return: tensor(232.6125, device='cuda:0')
episode: 82 training return: tensor(-259.0982, device='cuda:0')
episode: 83 training return: tensor(284.3049, device='cuda:0')
epoch: 21 test_true_pfm: 3179.3544507013435 sim_pfm: 44.74242276958345
episode: 84 training return: tensor(14.2154, device='cuda:0')
episode: 85 training return: tensor(38.6106, device='cuda:0')
episode: 86 training return: tensor(236.0213, device='cuda:0')
episode: 87 training return: tensor(-169.3976, device='cuda:0')
epoch: 22 test_true_pfm: 3328.227264049566 sim_pfm: 158.57652948537725
episode: 88 training return: tensor(264.9129, device='cuda:0')
episode: 89 training return: tensor(269.8481, device='cuda:0')
episode: 90 training return: tensor(241.6003, device='cuda:0')
episode: 91 training return: tensor(142.7390, device='cuda:0')
epoch: 23 test_true_pfm: 3316.753943202007 sim_pfm: 231.5170447441245
episode: 92 training return: tensor(221.4547, device='cuda:0')
episode: 93 training return: tensor(222.3847, device='cuda:0')
episode: 94 training return: tensor(249.8325, device='cuda:0')
episode: 95 training return: tensor(251.7371, device='cuda:0')
epoch: 24 test_true_pfm: 3046.380113516627 sim_pfm: 225.4503341221522
episode: 96 training return: tensor(248.0706, device='cuda:0')
episode: 97 training return: tensor(-129.7685, device='cuda:0')
episode: 98 training return: tensor(-80.4141, device='cuda:0')
episode: 99 training return: tensor(245.4021, device='cuda:0')
epoch: 25 test_true_pfm: 2731.3424438265215 sim_pfm: 111.68257754612326
episode: 100 training return: tensor(231.3981, device='cuda:0')
episode: 101 training return: tensor(223.6530, device='cuda:0')
episode: 102 training return: tensor(241.8573, device='cuda:0')
episode: 103 training return: tensor(197.5602, device='cuda:0')
epoch: 26 test_true_pfm: 3210.139875314482 sim_pfm: 207.26390775056402
episode: 104 training return: tensor(-140.1114, device='cuda:0')
episode: 105 training return: tensor(-196.4854, device='cuda:0')
episode: 106 training return: tensor(238.3411, device='cuda:0')
episode: 107 training return: tensor(255.5991, device='cuda:0')
epoch: 27 test_true_pfm: 3351.5332270711347 sim_pfm: 209.59049090770228
episode: 108 training return: tensor(228.2174, device='cuda:0')
episode: 109 training return: tensor(287.2686, device='cuda:0')
episode: 110 training return: tensor(-94.1475, device='cuda:0')
episode: 111 training return: tensor(172.4580, device='cuda:0')
epoch: 28 test_true_pfm: 3181.079583761724 sim_pfm: 259.0161755979837
episode: 112 training return: tensor(-390.7565, device='cuda:0')
episode: 113 training return: tensor(263.5118, device='cuda:0')
episode: 114 training return: tensor(252.0801, device='cuda:0')
episode: 115 training return: tensor(-129.7200, device='cuda:0')
epoch: 29 test_true_pfm: 3314.2480630098344 sim_pfm: 271.9104128237038
episode: 116 training return: tensor(-359.7929, device='cuda:0')
episode: 117 training return: tensor(326.8567, device='cuda:0')
episode: 118 training return: tensor(206.5822, device='cuda:0')
episode: 119 training return: tensor(-210.0534, device='cuda:0')
epoch: 30 test_true_pfm: 3371.682046344551 sim_pfm: 207.26113139884546
episode: 120 training return: tensor(252.6084, device='cuda:0')
episode: 121 training return: tensor(-37.6392, device='cuda:0')
episode: 122 training return: tensor(99.4344, device='cuda:0')
episode: 123 training return: tensor(166.7413, device='cuda:0')
epoch: 31 test_true_pfm: 3357.4407898341537 sim_pfm: 6.3189099399120705
episode: 124 training return: tensor(209.9666, device='cuda:0')
episode: 125 training return: tensor(316.8713, device='cuda:0')
episode: 126 training return: tensor(202.1815, device='cuda:0')
episode: 127 training return: tensor(254.1425, device='cuda:0')
epoch: 32 test_true_pfm: 3337.9710405574638 sim_pfm: 258.0978798435244
episode: 128 training return: tensor(236.5309, device='cuda:0')
episode: 129 training return: tensor(189.8845, device='cuda:0')
episode: 130 training return: tensor(283.9710, device='cuda:0')
episode: 131 training return: tensor(-104.1243, device='cuda:0')
epoch: 33 test_true_pfm: 3459.6569838792725 sim_pfm: 257.5994187920296
episode: 132 training return: tensor(-259.1743, device='cuda:0')
episode: 133 training return: tensor(273.2722, device='cuda:0')
episode: 134 training return: tensor(263.4137, device='cuda:0')
episode: 135 training return: tensor(271.4217, device='cuda:0')
epoch: 34 test_true_pfm: 3297.3092853605126 sim_pfm: 239.64719583638362
episode: 136 training return: tensor(234.0533, device='cuda:0')
episode: 137 training return: tensor(-227.8180, device='cuda:0')
episode: 138 training return: tensor(317.2848, device='cuda:0')
episode: 139 training return: tensor(121.5421, device='cuda:0')
epoch: 35 test_true_pfm: 3288.007043579859 sim_pfm: 185.74126859908574
episode: 140 training return: tensor(268.1917, device='cuda:0')
episode: 141 training return: tensor(231.0838, device='cuda:0')
episode: 142 training return: tensor(285.0809, device='cuda:0')
episode: 143 training return: tensor(283.3594, device='cuda:0')
epoch: 36 test_true_pfm: 3097.634746545844 sim_pfm: 137.58166443983404
episode: 144 training return: tensor(180.6811, device='cuda:0')
episode: 145 training return: tensor(231.3389, device='cuda:0')
episode: 146 training return: tensor(189.8116, device='cuda:0')
episode: 147 training return: tensor(234.4120, device='cuda:0')
epoch: 37 test_true_pfm: 3403.0451446494612 sim_pfm: 235.76870064582909
episode: 148 training return: tensor(207.7870, device='cuda:0')
episode: 149 training return: tensor(239.6926, device='cuda:0')
episode: 150 training return: tensor(252.6240, device='cuda:0')
episode: 151 training return: tensor(237.2544, device='cuda:0')
epoch: 38 test_true_pfm: 3350.7893605016666 sim_pfm: 238.07374584279992
episode: 152 training return: tensor(199.8935, device='cuda:0')
episode: 153 training return: tensor(247.6711, device='cuda:0')
episode: 154 training return: tensor(230.2035, device='cuda:0')
episode: 155 training return: tensor(288.4127, device='cuda:0')
epoch: 39 test_true_pfm: 3346.8867793066797 sim_pfm: 252.57454174822973
episode: 156 training return: tensor(-164.7645, device='cuda:0')
episode: 157 training return: tensor(200.9469, device='cuda:0')
episode: 158 training return: tensor(205.7479, device='cuda:0')
episode: 159 training return: tensor(-461.6095, device='cuda:0')
epoch: 40 test_true_pfm: 2626.9097901826754 sim_pfm: 261.88361332938075
episode: 160 training return: tensor(215.6599, device='cuda:0')
episode: 161 training return: tensor(258.5892, device='cuda:0')
episode: 162 training return: tensor(234.5882, device='cuda:0')
episode: 163 training return: tensor(218.3260, device='cuda:0')
epoch: 41 test_true_pfm: 3335.452651462707 sim_pfm: 157.8991984803773
episode: 164 training return: tensor(186.7756, device='cuda:0')
episode: 165 training return: tensor(251.4640, device='cuda:0')
episode: 166 training return: tensor(302.0651, device='cuda:0')
episode: 167 training return: tensor(238.6683, device='cuda:0')
epoch: 42 test_true_pfm: 2907.8239924847926 sim_pfm: 292.2431735891344
episode: 168 training return: tensor(-89.2966, device='cuda:0')
episode: 169 training return: tensor(240.8226, device='cuda:0')
episode: 170 training return: tensor(191.2853, device='cuda:0')
episode: 171 training return: tensor(230.9930, device='cuda:0')
epoch: 43 test_true_pfm: 3343.2744095431613 sim_pfm: 271.321971324059
episode: 172 training return: tensor(310.8230, device='cuda:0')
episode: 173 training return: tensor(281.9959, device='cuda:0')
episode: 174 training return: tensor(-174.7013, device='cuda:0')
episode: 175 training return: tensor(224.8514, device='cuda:0')
epoch: 44 test_true_pfm: 3375.0008799494353 sim_pfm: 159.63143117776295
episode: 176 training return: tensor(314.9436, device='cuda:0')
episode: 177 training return: tensor(252.4364, device='cuda:0')
episode: 178 training return: tensor(197.1355, device='cuda:0')
episode: 179 training return: tensor(195.1947, device='cuda:0')
epoch: 45 test_true_pfm: 2862.1400533325327 sim_pfm: 238.81154600637578
episode: 180 training return: tensor(80.1922, device='cuda:0')
episode: 181 training return: tensor(196.4449, device='cuda:0')
episode: 182 training return: tensor(294.6049, device='cuda:0')
episode: 183 training return: tensor(302.5598, device='cuda:0')
epoch: 46 test_true_pfm: 3264.614077571012 sim_pfm: 231.84657485922799
episode: 184 training return: tensor(292.4243, device='cuda:0')
episode: 185 training return: tensor(231.5352, device='cuda:0')
episode: 186 training return: tensor(284.3187, device='cuda:0')
episode: 187 training return: tensor(204.5897, device='cuda:0')
epoch: 47 test_true_pfm: 3398.274809069337 sim_pfm: 294.06793887225405
episode: 188 training return: tensor(231.4814, device='cuda:0')
episode: 189 training return: tensor(240.1631, device='cuda:0')
episode: 190 training return: tensor(140.0603, device='cuda:0')
episode: 191 training return: tensor(272.9766, device='cuda:0')
epoch: 48 test_true_pfm: 3359.9066216697724 sim_pfm: 285.80780946978484
episode: 192 training return: tensor(-100.2445, device='cuda:0')
episode: 193 training return: tensor(254.7466, device='cuda:0')
episode: 194 training return: tensor(223.0086, device='cuda:0')
episode: 195 training return: tensor(-328.7557, device='cuda:0')
epoch: 49 test_true_pfm: 3061.2121645652624 sim_pfm: 269.3670224727927
episode: 196 training return: tensor(295.0924, device='cuda:0')
episode: 197 training return: tensor(304.9981, device='cuda:0')
episode: 198 training return: tensor(168.6340, device='cuda:0')
episode: 199 training return: tensor(261.3275, device='cuda:0')
epoch: 50 test_true_pfm: 3369.7178678575433 sim_pfm: 274.4649235899172
episode: 200 training return: tensor(228.2458, device='cuda:0')
episode: 201 training return: tensor(231.5762, device='cuda:0')
episode: 202 training return: tensor(262.5303, device='cuda:0')
episode: 203 training return: tensor(142.3738, device='cuda:0')
epoch: 51 test_true_pfm: 3259.4405145728647 sim_pfm: 237.1884980677763
episode: 204 training return: tensor(53.7902, device='cuda:0')
episode: 205 training return: tensor(-336.4199, device='cuda:0')
episode: 206 training return: tensor(-466.8266, device='cuda:0')
episode: 207 training return: tensor(27.1495, device='cuda:0')
epoch: 52 test_true_pfm: 3393.1536918034167 sim_pfm: 285.2977806468455
episode: 208 training return: tensor(233.3085, device='cuda:0')
episode: 209 training return: tensor(238.2850, device='cuda:0')
episode: 210 training return: tensor(242.2616, device='cuda:0')
episode: 211 training return: tensor(252.5017, device='cuda:0')
epoch: 53 test_true_pfm: 3336.8566063780218 sim_pfm: 192.40073240810307
episode: 212 training return: tensor(208.5600, device='cuda:0')
episode: 213 training return: tensor(281.8203, device='cuda:0')
episode: 214 training return: tensor(-362.5719, device='cuda:0')
episode: 215 training return: tensor(313.3821, device='cuda:0')
epoch: 54 test_true_pfm: 3453.274781658473 sim_pfm: 150.57997925258437
episode: 216 training return: tensor(224.5017, device='cuda:0')
episode: 217 training return: tensor(243.4425, device='cuda:0')
episode: 218 training return: tensor(229.7971, device='cuda:0')
episode: 219 training return: tensor(302.7367, device='cuda:0')
epoch: 55 test_true_pfm: 3379.978042119679 sim_pfm: 145.26531505788444
episode: 220 training return: tensor(311.0148, device='cuda:0')
episode: 221 training return: tensor(266.0597, device='cuda:0')
episode: 222 training return: tensor(292.0385, device='cuda:0')
episode: 223 training return: tensor(265.6429, device='cuda:0')
epoch: 56 test_true_pfm: 3408.0649606245966 sim_pfm: 319.53641039689927
episode: 224 training return: tensor(272.6401, device='cuda:0')
episode: 225 training return: tensor(303.7167, device='cuda:0')
episode: 226 training return: tensor(278.4946, device='cuda:0')
episode: 227 training return: tensor(298.5153, device='cuda:0')
epoch: 57 test_true_pfm: 2786.8598673117485 sim_pfm: 283.6906664412236
episode: 228 training return: tensor(241.8600, device='cuda:0')
episode: 229 training return: tensor(300.7487, device='cuda:0')
episode: 230 training return: tensor(238.0456, device='cuda:0')
episode: 231 training return: tensor(282.3599, device='cuda:0')
epoch: 58 test_true_pfm: 3341.480217111331 sim_pfm: 249.56330622640476
episode: 232 training return: tensor(254.4125, device='cuda:0')
episode: 233 training return: tensor(233.7169, device='cuda:0')
episode: 234 training return: tensor(352.9612, device='cuda:0')
episode: 235 training return: tensor(-164.9455, device='cuda:0')
epoch: 59 test_true_pfm: 3359.229857480337 sim_pfm: 257.1032268953859
episode: 236 training return: tensor(-252.7271, device='cuda:0')
episode: 237 training return: tensor(246.1045, device='cuda:0')
episode: 238 training return: tensor(-238.6040, device='cuda:0')
episode: 239 training return: tensor(257.2467, device='cuda:0')
epoch: 60 test_true_pfm: 3409.097538053366 sim_pfm: 296.09538208755356
episode: 240 training return: tensor(245.3907, device='cuda:0')
episode: 241 training return: tensor(225.6749, device='cuda:0')
episode: 242 training return: tensor(290.8839, device='cuda:0')
episode: 243 training return: tensor(301.1066, device='cuda:0')
epoch: 61 test_true_pfm: 3193.0494185261136 sim_pfm: 3.016185027877024
episode: 244 training return: tensor(-76.7578, device='cuda:0')
episode: 245 training return: tensor(212.4454, device='cuda:0')
episode: 246 training return: tensor(273.3523, device='cuda:0')
episode: 247 training return: tensor(224.0549, device='cuda:0')
epoch: 62 test_true_pfm: 3382.3515289772463 sim_pfm: 229.587513030041
episode: 248 training return: tensor(259.6388, device='cuda:0')
episode: 249 training return: tensor(-97.9459, device='cuda:0')
episode: 250 training return: tensor(221.6140, device='cuda:0')
episode: 251 training return: tensor(173.9091, device='cuda:0')
epoch: 63 test_true_pfm: 3366.405953992836 sim_pfm: 132.59873114085835
episode: 252 training return: tensor(262.1168, device='cuda:0')
episode: 253 training return: tensor(285.4072, device='cuda:0')
episode: 254 training return: tensor(323.8903, device='cuda:0')
episode: 255 training return: tensor(54.3186, device='cuda:0')
epoch: 64 test_true_pfm: 3448.782010379877 sim_pfm: 264.3613390070871
episode: 256 training return: tensor(270.6237, device='cuda:0')
episode: 257 training return: tensor(-80.2154, device='cuda:0')
episode: 258 training return: tensor(233.8717, device='cuda:0')
episode: 259 training return: tensor(229.5173, device='cuda:0')
epoch: 65 test_true_pfm: 3365.699028124602 sim_pfm: 253.9747675001951
episode: 260 training return: tensor(312.9616, device='cuda:0')
episode: 261 training return: tensor(-14.4153, device='cuda:0')
episode: 262 training return: tensor(-113.7463, device='cuda:0')
episode: 263 training return: tensor(-181.9008, device='cuda:0')
epoch: 66 test_true_pfm: 3435.024763220674 sim_pfm: 321.7375863101236
episode: 264 training return: tensor(253.2812, device='cuda:0')
episode: 265 training return: tensor(-151.7189, device='cuda:0')
episode: 266 training return: tensor(-220.4904, device='cuda:0')
episode: 267 training return: tensor(266.2900, device='cuda:0')
epoch: 67 test_true_pfm: 3366.093905163258 sim_pfm: 213.82681363029405
episode: 268 training return: tensor(296.4961, device='cuda:0')
episode: 269 training return: tensor(272.4625, device='cuda:0')
episode: 270 training return: tensor(263.8577, device='cuda:0')
episode: 271 training return: tensor(251.2422, device='cuda:0')
epoch: 68 test_true_pfm: 3380.926150323554 sim_pfm: 276.2020932128653
episode: 272 training return: tensor(234.9111, device='cuda:0')
episode: 273 training return: tensor(313.2441, device='cuda:0')
episode: 274 training return: tensor(245.8740, device='cuda:0')
episode: 275 training return: tensor(227.3901, device='cuda:0')
epoch: 69 test_true_pfm: 3378.2640198551676 sim_pfm: 152.72794225740168
episode: 276 training return: tensor(304.6938, device='cuda:0')
episode: 277 training return: tensor(255.8158, device='cuda:0')
episode: 278 training return: tensor(253.7838, device='cuda:0')
episode: 279 training return: tensor(6.7596, device='cuda:0')
epoch: 70 test_true_pfm: 3330.13848301225 sim_pfm: 257.310008522938
episode: 280 training return: tensor(171.8287, device='cuda:0')
episode: 281 training return: tensor(260.3587, device='cuda:0')
episode: 282 training return: tensor(-110.8121, device='cuda:0')
episode: 283 training return: tensor(311.3051, device='cuda:0')
epoch: 71 test_true_pfm: 3118.000523794976 sim_pfm: 135.56686398517922
episode: 284 training return: tensor(233.5180, device='cuda:0')
episode: 285 training return: tensor(230.4446, device='cuda:0')
episode: 286 training return: tensor(281.2016, device='cuda:0')
episode: 287 training return: tensor(242.6120, device='cuda:0')
epoch: 72 test_true_pfm: 3378.3370591678754 sim_pfm: 262.2815534445593
episode: 288 training return: tensor(-148.4796, device='cuda:0')
episode: 289 training return: tensor(207.5161, device='cuda:0')
episode: 290 training return: tensor(241.5019, device='cuda:0')
episode: 291 training return: tensor(285.8979, device='cuda:0')
epoch: 73 test_true_pfm: 3369.7440654839543 sim_pfm: 260.171464276762
episode: 292 training return: tensor(262.4207, device='cuda:0')
episode: 293 training return: tensor(237.2584, device='cuda:0')
episode: 294 training return: tensor(240.0164, device='cuda:0')
episode: 295 training return: tensor(-326.7963, device='cuda:0')
epoch: 74 test_true_pfm: 3142.748458861936 sim_pfm: 247.31367250830712
episode: 296 training return: tensor(243.0413, device='cuda:0')
episode: 297 training return: tensor(235.0370, device='cuda:0')
episode: 298 training return: tensor(269.8083, device='cuda:0')
episode: 299 training return: tensor(245.1450, device='cuda:0')
epoch: 75 test_true_pfm: 3392.7084826401297 sim_pfm: 299.6442647803536
episode: 300 training return: tensor(34.9716, device='cuda:0')
episode: 301 training return: tensor(297.9516, device='cuda:0')
episode: 302 training return: tensor(223.8015, device='cuda:0')
episode: 303 training return: tensor(366.6118, device='cuda:0')
epoch: 76 test_true_pfm: 3427.7677822750525 sim_pfm: 291.30341199601145
episode: 304 training return: tensor(299.7313, device='cuda:0')
episode: 305 training return: tensor(201.9305, device='cuda:0')
episode: 306 training return: tensor(-348.1375, device='cuda:0')
episode: 307 training return: tensor(138.3054, device='cuda:0')
epoch: 77 test_true_pfm: 3391.23926421722 sim_pfm: 162.98977631501234
episode: 308 training return: tensor(249.9413, device='cuda:0')
episode: 309 training return: tensor(219.1263, device='cuda:0')
episode: 310 training return: tensor(297.1733, device='cuda:0')
episode: 311 training return: tensor(319.2232, device='cuda:0')
epoch: 78 test_true_pfm: 3381.226591701434 sim_pfm: 268.7190033529575
episode: 312 training return: tensor(198.7889, device='cuda:0')
episode: 313 training return: tensor(210.6813, device='cuda:0')
episode: 314 training return: tensor(277.0727, device='cuda:0')
episode: 315 training return: tensor(205.7264, device='cuda:0')
epoch: 79 test_true_pfm: 3291.1466214672914 sim_pfm: 251.3272820043688
episode: 316 training return: tensor(263.2340, device='cuda:0')
episode: 317 training return: tensor(252.4288, device='cuda:0')
episode: 318 training return: tensor(297.8705, device='cuda:0')
episode: 319 training return: tensor(251.9700, device='cuda:0')
epoch: 80 test_true_pfm: 3127.9513484404415 sim_pfm: 278.0134619118956
episode: 320 training return: tensor(-110.9984, device='cuda:0')
episode: 321 training return: tensor(241.6293, device='cuda:0')
episode: 322 training return: tensor(221.2574, device='cuda:0')
episode: 323 training return: tensor(324.6011, device='cuda:0')
epoch: 81 test_true_pfm: 3416.144177645643 sim_pfm: 254.32041386637138
episode: 324 training return: tensor(-89.4032, device='cuda:0')
episode: 325 training return: tensor(299.5078, device='cuda:0')
episode: 326 training return: tensor(248.2226, device='cuda:0')
episode: 327 training return: tensor(219.2679, device='cuda:0')
epoch: 82 test_true_pfm: 3336.182890619412 sim_pfm: 240.52438315120526
episode: 328 training return: tensor(242.8775, device='cuda:0')
episode: 329 training return: tensor(-86.1324, device='cuda:0')
episode: 330 training return: tensor(297.9638, device='cuda:0')
episode: 331 training return: tensor(235.0825, device='cuda:0')
epoch: 83 test_true_pfm: 3361.331656463131 sim_pfm: 193.5112539388841
episode: 332 training return: tensor(238.2076, device='cuda:0')
episode: 333 training return: tensor(-90.1444, device='cuda:0')
episode: 334 training return: tensor(281.5814, device='cuda:0')
episode: 335 training return: tensor(184.9035, device='cuda:0')
epoch: 84 test_true_pfm: 3366.081341981311 sim_pfm: 213.36385352667034
episode: 336 training return: tensor(237.5780, device='cuda:0')
episode: 337 training return: tensor(248.8485, device='cuda:0')
episode: 338 training return: tensor(247.3879, device='cuda:0')
episode: 339 training return: tensor(-233.7615, device='cuda:0')
epoch: 85 test_true_pfm: 3349.3361656331545 sim_pfm: 225.9481947121385
episode: 340 training return: tensor(308.0315, device='cuda:0')
episode: 341 training return: tensor(238.9618, device='cuda:0')
episode: 342 training return: tensor(232.9203, device='cuda:0')
episode: 343 training return: tensor(251.4592, device='cuda:0')
epoch: 86 test_true_pfm: 3377.950789170473 sim_pfm: 275.1858795859832
episode: 344 training return: tensor(229.7930, device='cuda:0')
episode: 345 training return: tensor(-94.9420, device='cuda:0')
episode: 346 training return: tensor(200.0174, device='cuda:0')
episode: 347 training return: tensor(269.2814, device='cuda:0')
epoch: 87 test_true_pfm: 3356.0066868199174 sim_pfm: 244.09468559816014
episode: 348 training return: tensor(173.4004, device='cuda:0')
episode: 349 training return: tensor(223.4275, device='cuda:0')
episode: 350 training return: tensor(277.8200, device='cuda:0')
episode: 351 training return: tensor(257.1991, device='cuda:0')
epoch: 88 test_true_pfm: 3384.5616303224724 sim_pfm: 276.8960761716007
episode: 352 training return: tensor(176.9278, device='cuda:0')
episode: 353 training return: tensor(266.4648, device='cuda:0')
episode: 354 training return: tensor(271.4094, device='cuda:0')
episode: 355 training return: tensor(166.3396, device='cuda:0')
epoch: 89 test_true_pfm: 3407.3544419250006 sim_pfm: 264.3450463573681
episode: 356 training return: tensor(211.5996, device='cuda:0')
episode: 357 training return: tensor(196.7391, device='cuda:0')
episode: 358 training return: tensor(223.9594, device='cuda:0')
episode: 359 training return: tensor(142.9422, device='cuda:0')
epoch: 90 test_true_pfm: 3383.2468557164243 sim_pfm: 248.4453611089751
episode: 360 training return: tensor(15.6830, device='cuda:0')
episode: 361 training return: tensor(25.2296, device='cuda:0')
episode: 362 training return: tensor(303.8198, device='cuda:0')
episode: 363 training return: tensor(-107.6447, device='cuda:0')
epoch: 91 test_true_pfm: 3434.186650052412 sim_pfm: 278.7788870913403
episode: 364 training return: tensor(218.4489, device='cuda:0')
episode: 365 training return: tensor(57.5924, device='cuda:0')
episode: 366 training return: tensor(-26.3886, device='cuda:0')
episode: 367 training return: tensor(263.7312, device='cuda:0')
epoch: 92 test_true_pfm: 3411.1706070117343 sim_pfm: 257.8417721446701
episode: 368 training return: tensor(335.3316, device='cuda:0')
episode: 369 training return: tensor(195.4019, device='cuda:0')
episode: 370 training return: tensor(12.1398, device='cuda:0')
episode: 371 training return: tensor(227.3994, device='cuda:0')
epoch: 93 test_true_pfm: 3433.2782577906805 sim_pfm: 269.6439263857125
episode: 372 training return: tensor(269.7671, device='cuda:0')
episode: 373 training return: tensor(257.7292, device='cuda:0')
episode: 374 training return: tensor(257.3732, device='cuda:0')
episode: 375 training return: tensor(304.8297, device='cuda:0')
epoch: 94 test_true_pfm: 3401.442485358437 sim_pfm: 260.64496145862114
episode: 376 training return: tensor(325.1344, device='cuda:0')
episode: 377 training return: tensor(178.9456, device='cuda:0')
episode: 378 training return: tensor(220.3271, device='cuda:0')
episode: 379 training return: tensor(225.3584, device='cuda:0')
epoch: 95 test_true_pfm: 3399.187124379692 sim_pfm: 276.965782855618
episode: 380 training return: tensor(246.2967, device='cuda:0')
episode: 381 training return: tensor(290.4781, device='cuda:0')
episode: 382 training return: tensor(246.0638, device='cuda:0')
episode: 383 training return: tensor(249.9664, device='cuda:0')
epoch: 96 test_true_pfm: 3375.5407894084506 sim_pfm: 291.38574791875243
episode: 384 training return: tensor(163.8891, device='cuda:0')
episode: 385 training return: tensor(319.1656, device='cuda:0')
episode: 386 training return: tensor(-172.3916, device='cuda:0')
episode: 387 training return: tensor(290.2740, device='cuda:0')
epoch: 97 test_true_pfm: 3413.0063379369385 sim_pfm: 284.3677702458129
episode: 388 training return: tensor(318.4111, device='cuda:0')
episode: 389 training return: tensor(235.0273, device='cuda:0')
episode: 390 training return: tensor(228.9549, device='cuda:0')
episode: 391 training return: tensor(248.6065, device='cuda:0')
epoch: 98 test_true_pfm: 3396.4898201706333 sim_pfm: 254.43375422414587
episode: 392 training return: tensor(338.9907, device='cuda:0')
episode: 393 training return: tensor(238.1571, device='cuda:0')
episode: 394 training return: tensor(197.2454, device='cuda:0')
episode: 395 training return: tensor(234.3795, device='cuda:0')
epoch: 99 test_true_pfm: 3382.693496184904 sim_pfm: 246.46252517018002
episode: 396 training return: tensor(15.4297, device='cuda:0')
episode: 397 training return: tensor(-348.5686, device='cuda:0')
episode: 398 training return: tensor(223.9618, device='cuda:0')
episode: 399 training return: tensor(299.5161, device='cuda:0')
epoch: 100 test_true_pfm: 3404.3042225803424 sim_pfm: 247.04437908629188
episode: 400 training return: tensor(193.6820, device='cuda:0')
episode: 401 training return: tensor(-104.6394, device='cuda:0')
episode: 402 training return: tensor(329.3975, device='cuda:0')
episode: 403 training return: tensor(279.8329, device='cuda:0')
epoch: 101 test_true_pfm: 3310.3274315788117 sim_pfm: 207.4428061754928
episode: 404 training return: tensor(193.9710, device='cuda:0')
episode: 405 training return: tensor(280.0681, device='cuda:0')
episode: 406 training return: tensor(89.7391, device='cuda:0')
episode: 407 training return: tensor(240.8754, device='cuda:0')
epoch: 102 test_true_pfm: 3377.336195765289 sim_pfm: 231.77303890152447
episode: 408 training return: tensor(217.0991, device='cuda:0')
episode: 409 training return: tensor(238.4257, device='cuda:0')
episode: 410 training return: tensor(264.2821, device='cuda:0')
episode: 411 training return: tensor(-33.7176, device='cuda:0')
epoch: 103 test_true_pfm: 3352.8014105232737 sim_pfm: 254.16547773078977
episode: 412 training return: tensor(242.5970, device='cuda:0')
episode: 413 training return: tensor(214.2634, device='cuda:0')
episode: 414 training return: tensor(240.9460, device='cuda:0')
episode: 415 training return: tensor(166.8348, device='cuda:0')
epoch: 104 test_true_pfm: 3425.008736213771 sim_pfm: 244.311995708306
episode: 416 training return: tensor(-84.9516, device='cuda:0')
episode: 417 training return: tensor(-85.6361, device='cuda:0')
episode: 418 training return: tensor(22.2296, device='cuda:0')
episode: 419 training return: tensor(310.7083, device='cuda:0')
epoch: 105 test_true_pfm: 3325.124738967707 sim_pfm: 262.32448595636134
episode: 420 training return: tensor(241.3933, device='cuda:0')
episode: 421 training return: tensor(214.3482, device='cuda:0')
episode: 422 training return: tensor(226.5703, device='cuda:0')
episode: 423 training return: tensor(281.0450, device='cuda:0')
epoch: 106 test_true_pfm: 3435.781877355992 sim_pfm: 284.7255903947419
episode: 424 training return: tensor(218.9705, device='cuda:0')
episode: 425 training return: tensor(7.5781, device='cuda:0')
episode: 426 training return: tensor(214.0635, device='cuda:0')
episode: 427 training return: tensor(251.9063, device='cuda:0')
epoch: 107 test_true_pfm: 3361.1962726189377 sim_pfm: 223.12700441823108
episode: 428 training return: tensor(209.7526, device='cuda:0')
episode: 429 training return: tensor(303.1692, device='cuda:0')
episode: 430 training return: tensor(309.8106, device='cuda:0')
episode: 431 training return: tensor(224.3849, device='cuda:0')
epoch: 108 test_true_pfm: 3075.1205886742387 sim_pfm: 268.53131614018156
episode: 432 training return: tensor(271.6773, device='cuda:0')
episode: 433 training return: tensor(269.7971, device='cuda:0')
episode: 434 training return: tensor(284.1111, device='cuda:0')
episode: 435 training return: tensor(274.1969, device='cuda:0')
epoch: 109 test_true_pfm: 3376.2493138662066 sim_pfm: 148.08364233975104
episode: 436 training return: tensor(226.9616, device='cuda:0')
episode: 437 training return: tensor(302.4282, device='cuda:0')
episode: 438 training return: tensor(269.5741, device='cuda:0')
episode: 439 training return: tensor(241.2664, device='cuda:0')
epoch: 110 test_true_pfm: 3381.0023537971447 sim_pfm: 218.2306270842479
episode: 440 training return: tensor(332.6547, device='cuda:0')
episode: 441 training return: tensor(228.7401, device='cuda:0')
episode: 442 training return: tensor(253.7492, device='cuda:0')
episode: 443 training return: tensor(289.0481, device='cuda:0')
epoch: 111 test_true_pfm: 3315.482985540579 sim_pfm: 224.70544491614177
episode: 444 training return: tensor(249.5085, device='cuda:0')
episode: 445 training return: tensor(-328.3606, device='cuda:0')
episode: 446 training return: tensor(308.7305, device='cuda:0')
episode: 447 training return: tensor(255.8807, device='cuda:0')
epoch: 112 test_true_pfm: 3406.024869902701 sim_pfm: 265.5384915258813
episode: 448 training return: tensor(-101.2425, device='cuda:0')
episode: 449 training return: tensor(-138.4273, device='cuda:0')
episode: 450 training return: tensor(-27.2335, device='cuda:0')
episode: 451 training return: tensor(142.4077, device='cuda:0')
epoch: 113 test_true_pfm: 3341.1557496242126 sim_pfm: 270.86764844181016
episode: 452 training return: tensor(262.2870, device='cuda:0')
episode: 453 training return: tensor(282.4482, device='cuda:0')
episode: 454 training return: tensor(222.7448, device='cuda:0')
episode: 455 training return: tensor(247.2174, device='cuda:0')
epoch: 114 test_true_pfm: 3398.1143513883976 sim_pfm: 270.73192102471756
episode: 456 training return: tensor(304.0902, device='cuda:0')
episode: 457 training return: tensor(198.5745, device='cuda:0')
episode: 458 training return: tensor(301.4505, device='cuda:0')
episode: 459 training return: tensor(-15.3367, device='cuda:0')
epoch: 115 test_true_pfm: 3380.8138692121156 sim_pfm: 301.52521373551764
episode: 460 training return: tensor(286.1342, device='cuda:0')
episode: 461 training return: tensor(351.6004, device='cuda:0')
episode: 462 training return: tensor(91.3010, device='cuda:0')
episode: 463 training return: tensor(268.4027, device='cuda:0')
epoch: 116 test_true_pfm: 3413.594950970206 sim_pfm: 270.3223094500233
episode: 464 training return: tensor(184.5081, device='cuda:0')
episode: 465 training return: tensor(210.7153, device='cuda:0')
episode: 466 training return: tensor(243.6886, device='cuda:0')
episode: 467 training return: tensor(286.9974, device='cuda:0')
epoch: 117 test_true_pfm: 3387.953243182886 sim_pfm: 262.4495178790239
episode: 468 training return: tensor(270.3654, device='cuda:0')
episode: 469 training return: tensor(202.2318, device='cuda:0')
episode: 470 training return: tensor(269.1492, device='cuda:0')
episode: 471 training return: tensor(207.5025, device='cuda:0')
epoch: 118 test_true_pfm: 3409.589224860067 sim_pfm: 253.66789802041603
episode: 472 training return: tensor(315.1284, device='cuda:0')
episode: 473 training return: tensor(253.1878, device='cuda:0')
episode: 474 training return: tensor(237.9699, device='cuda:0')
episode: 475 training return: tensor(225.8731, device='cuda:0')
epoch: 119 test_true_pfm: 3350.6422678794966 sim_pfm: 289.6090122750611
episode: 476 training return: tensor(236.2649, device='cuda:0')
episode: 477 training return: tensor(225.0259, device='cuda:0')
episode: 478 training return: tensor(279.4239, device='cuda:0')
episode: 479 training return: tensor(234.6261, device='cuda:0')
epoch: 120 test_true_pfm: 3378.700741608011 sim_pfm: 279.6262767234778
episode: 480 training return: tensor(269.1995, device='cuda:0')
episode: 481 training return: tensor(276.1714, device='cuda:0')
episode: 482 training return: tensor(261.7438, device='cuda:0')
episode: 483 training return: tensor(235.0963, device='cuda:0')
epoch: 121 test_true_pfm: 3336.0935635267997 sim_pfm: 248.65550011064624
episode: 484 training return: tensor(216.7086, device='cuda:0')
episode: 485 training return: tensor(258.1614, device='cuda:0')
episode: 486 training return: tensor(279.1281, device='cuda:0')
episode: 487 training return: tensor(302.2261, device='cuda:0')
epoch: 122 test_true_pfm: 3337.6369338083437 sim_pfm: 237.03444696729034
episode: 488 training return: tensor(240.8139, device='cuda:0')
episode: 489 training return: tensor(292.1782, device='cuda:0')
episode: 490 training return: tensor(270.2335, device='cuda:0')
episode: 491 training return: tensor(303.2905, device='cuda:0')
epoch: 123 test_true_pfm: 3329.80779952126 sim_pfm: 250.58564013324212
episode: 492 training return: tensor(332.4071, device='cuda:0')
episode: 493 training return: tensor(233.1724, device='cuda:0')
episode: 494 training return: tensor(296.4130, device='cuda:0')
episode: 495 training return: tensor(263.7164, device='cuda:0')
epoch: 124 test_true_pfm: 3410.453468548237 sim_pfm: 272.7851693931734
episode: 496 training return: tensor(32.4539, device='cuda:0')
episode: 497 training return: tensor(276.0148, device='cuda:0')
episode: 498 training return: tensor(211.4442, device='cuda:0')
episode: 499 training return: tensor(269.4139, device='cuda:0')
epoch: 125 test_true_pfm: 3392.7569682863455 sim_pfm: 269.29175428931677
episode: 500 training return: tensor(213.6426, device='cuda:0')
episode: 501 training return: tensor(224.2477, device='cuda:0')
episode: 502 training return: tensor(337.8344, device='cuda:0')
episode: 503 training return: tensor(260.8590, device='cuda:0')
epoch: 126 test_true_pfm: 3076.672840986603 sim_pfm: 258.8395450770137
episode: 504 training return: tensor(268.6562, device='cuda:0')
episode: 505 training return: tensor(198.9249, device='cuda:0')
episode: 506 training return: tensor(303.1843, device='cuda:0')
episode: 507 training return: tensor(249.5747, device='cuda:0')
epoch: 127 test_true_pfm: 3350.9429545220632 sim_pfm: 232.3826191012437
episode: 508 training return: tensor(245.0703, device='cuda:0')
episode: 509 training return: tensor(-38.5217, device='cuda:0')
episode: 510 training return: tensor(64.6713, device='cuda:0')
episode: 511 training return: tensor(254.4942, device='cuda:0')
epoch: 128 test_true_pfm: 3372.4068645494967 sim_pfm: 270.0597782560314
episode: 512 training return: tensor(255.0223, device='cuda:0')
episode: 513 training return: tensor(283.8410, device='cuda:0')
episode: 514 training return: tensor(247.7038, device='cuda:0')
episode: 515 training return: tensor(294.0113, device='cuda:0')
epoch: 129 test_true_pfm: 3373.5441188149034 sim_pfm: 266.5075748438442
episode: 516 training return: tensor(245.8447, device='cuda:0')
episode: 517 training return: tensor(263.1747, device='cuda:0')
episode: 518 training return: tensor(336.3120, device='cuda:0')
episode: 519 training return: tensor(287.0122, device='cuda:0')
epoch: 130 test_true_pfm: 3363.4041860017974 sim_pfm: 304.8966025515401
episode: 520 training return: tensor(238.9639, device='cuda:0')
episode: 521 training return: tensor(286.9739, device='cuda:0')
episode: 522 training return: tensor(206.6496, device='cuda:0')
episode: 523 training return: tensor(254.3502, device='cuda:0')
epoch: 131 test_true_pfm: 3399.697624948407 sim_pfm: 258.31817281703115
episode: 524 training return: tensor(33.6050, device='cuda:0')
episode: 525 training return: tensor(269.6420, device='cuda:0')
episode: 526 training return: tensor(211.7053, device='cuda:0')
episode: 527 training return: tensor(272.8747, device='cuda:0')
epoch: 132 test_true_pfm: 2775.6903000811476 sim_pfm: 281.9067086193051
episode: 528 training return: tensor(262.9621, device='cuda:0')
episode: 529 training return: tensor(266.3520, device='cuda:0')
episode: 530 training return: tensor(213.4861, device='cuda:0')
episode: 531 training return: tensor(218.1102, device='cuda:0')
epoch: 133 test_true_pfm: 3395.531282851291 sim_pfm: 229.95760051098964
episode: 532 training return: tensor(255.3877, device='cuda:0')
episode: 533 training return: tensor(295.5453, device='cuda:0')
episode: 534 training return: tensor(250.6122, device='cuda:0')
episode: 535 training return: tensor(236.6360, device='cuda:0')
epoch: 134 test_true_pfm: 3399.9152177009214 sim_pfm: 334.812715283304
episode: 536 training return: tensor(198.3416, device='cuda:0')
episode: 537 training return: tensor(226.6241, device='cuda:0')
episode: 538 training return: tensor(284.6902, device='cuda:0')
episode: 539 training return: tensor(281.4840, device='cuda:0')
epoch: 135 test_true_pfm: 3394.385906105837 sim_pfm: 253.02402972855876
episode: 540 training return: tensor(308.6205, device='cuda:0')
episode: 541 training return: tensor(203.8110, device='cuda:0')
episode: 542 training return: tensor(269.5665, device='cuda:0')
episode: 543 training return: tensor(286.4805, device='cuda:0')
epoch: 136 test_true_pfm: 3365.3511077939343 sim_pfm: 294.87560145365813
episode: 544 training return: tensor(323.1244, device='cuda:0')
episode: 545 training return: tensor(297.0860, device='cuda:0')
episode: 546 training return: tensor(243.2761, device='cuda:0')
episode: 547 training return: tensor(241.9144, device='cuda:0')
epoch: 137 test_true_pfm: 3438.48080314849 sim_pfm: 274.888218495859
episode: 548 training return: tensor(298.8675, device='cuda:0')
episode: 549 training return: tensor(215.5647, device='cuda:0')
episode: 550 training return: tensor(259.8713, device='cuda:0')
episode: 551 training return: tensor(340.1281, device='cuda:0')
epoch: 138 test_true_pfm: 3390.1776709680494 sim_pfm: 282.66515565822675
episode: 552 training return: tensor(241.0818, device='cuda:0')
episode: 553 training return: tensor(288.3099, device='cuda:0')
episode: 554 training return: tensor(255.3444, device='cuda:0')
episode: 555 training return: tensor(231.9158, device='cuda:0')
epoch: 139 test_true_pfm: 3412.055773136643 sim_pfm: 251.394975749378
episode: 556 training return: tensor(238.8208, device='cuda:0')
episode: 557 training return: tensor(238.0393, device='cuda:0')
episode: 558 training return: tensor(186.2119, device='cuda:0')
episode: 559 training return: tensor(244.5232, device='cuda:0')
epoch: 140 test_true_pfm: 3370.8159323310406 sim_pfm: 263.63020331216586
episode: 560 training return: tensor(284.1026, device='cuda:0')
episode: 561 training return: tensor(244.4041, device='cuda:0')
episode: 562 training return: tensor(244.3971, device='cuda:0')
episode: 563 training return: tensor(243.9362, device='cuda:0')
epoch: 141 test_true_pfm: 3406.7006107486354 sim_pfm: 300.5494437196273
episode: 564 training return: tensor(309.5997, device='cuda:0')
episode: 565 training return: tensor(326.3666, device='cuda:0')
episode: 566 training return: tensor(300.8097, device='cuda:0')
episode: 567 training return: tensor(228.1800, device='cuda:0')
epoch: 142 test_true_pfm: 3140.7494570577087 sim_pfm: 274.3941468493237
episode: 568 training return: tensor(197.0097, device='cuda:0')
episode: 569 training return: tensor(270.4534, device='cuda:0')
episode: 570 training return: tensor(313.1113, device='cuda:0')
episode: 571 training return: tensor(-92.7418, device='cuda:0')
epoch: 143 test_true_pfm: 3397.2687114807845 sim_pfm: 252.53065542279123
episode: 572 training return: tensor(178.6880, device='cuda:0')
episode: 573 training return: tensor(250.2482, device='cuda:0')
episode: 574 training return: tensor(236.4683, device='cuda:0')
episode: 575 training return: tensor(292.0474, device='cuda:0')
epoch: 144 test_true_pfm: 3380.440646262514 sim_pfm: 268.97929801014834
episode: 576 training return: tensor(290.1653, device='cuda:0')
episode: 577 training return: tensor(258.1073, device='cuda:0')
episode: 578 training return: tensor(254.4285, device='cuda:0')
episode: 579 training return: tensor(212.5374, device='cuda:0')
epoch: 145 test_true_pfm: 3383.154590139613 sim_pfm: 274.547397732919
episode: 580 training return: tensor(183.4745, device='cuda:0')
episode: 581 training return: tensor(262.5621, device='cuda:0')
episode: 582 training return: tensor(273.1302, device='cuda:0')
episode: 583 training return: tensor(272.6721, device='cuda:0')
epoch: 146 test_true_pfm: 3408.3992798885106 sim_pfm: 78.50007390767375
episode: 584 training return: tensor(291.4291, device='cuda:0')
episode: 585 training return: tensor(257.8750, device='cuda:0')
episode: 586 training return: tensor(260.5628, device='cuda:0')
episode: 587 training return: tensor(262.0140, device='cuda:0')
epoch: 147 test_true_pfm: 3361.967480807896 sim_pfm: 253.54179213565658
episode: 588 training return: tensor(253.5109, device='cuda:0')
episode: 589 training return: tensor(257.3129, device='cuda:0')
episode: 590 training return: tensor(272.2439, device='cuda:0')
episode: 591 training return: tensor(258.3836, device='cuda:0')
epoch: 148 test_true_pfm: 3447.134863381887 sim_pfm: 283.8756549721681
episode: 592 training return: tensor(258.2732, device='cuda:0')
episode: 593 training return: tensor(248.1237, device='cuda:0')
episode: 594 training return: tensor(268.6774, device='cuda:0')
episode: 595 training return: tensor(259.2650, device='cuda:0')
epoch: 149 test_true_pfm: 3372.74360809911 sim_pfm: 259.52456350037636
episode: 596 training return: tensor(339.0877, device='cuda:0')
episode: 597 training return: tensor(279.0302, device='cuda:0')
episode: 598 training return: tensor(283.7046, device='cuda:0')
episode: 599 training return: tensor(258.1188, device='cuda:0')
epoch: 150 test_true_pfm: 3383.4077427169323 sim_pfm: 253.31119957572082
