['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'behavior', '--traj', 'medium', '--seed', '2', '--data', '3000']
epoch: 0 training_loss 0.2920928700268269 test_loss: 0.1155726432800293
epoch: 1 training_loss 0.14727989055216312 test_loss: 0.09113039970397949
epoch: 2 training_loss 0.11965588249266147 test_loss: 0.08160691857337951
epoch: 3 training_loss 0.09669541027396918 test_loss: 0.08048790693283081
epoch: 4 training_loss 0.08662672640755772 test_loss: 0.07500064373016357
epoch: 5 training_loss 0.07984642146155238 test_loss: 0.07441944479942322
epoch: 6 training_loss 0.07016121612861753 test_loss: 0.0727455735206604
epoch: 7 training_loss 0.06645093271508813 test_loss: 0.07969080209732056
epoch: 8 training_loss 0.06808976210653782 test_loss: 0.0837024211883545
epoch: 9 training_loss 0.06118644895032048 test_loss: 0.08741081357002259
epoch: 10 training_loss 0.05155004058033228 test_loss: 0.07909641861915588
epoch: 11 training_loss 0.04972300314344466 test_loss: 0.077572101354599
epoch: 12 training_loss 0.06127897456288338 test_loss: 0.08900309205055237
epoch: 13 training_loss 0.051999914357438684 test_loss: 0.07447148561477661
epoch: 14 training_loss 0.04357215485535562 test_loss: 0.07884613871574402
epoch: 15 training_loss 0.05305994256399572 test_loss: 0.08137482404708862
epoch: 16 training_loss 0.044702489515766504 test_loss: 0.0786604106426239
epoch: 17 training_loss 0.03932764858007431 test_loss: 0.0757129967212677
epoch: 18 training_loss 0.03708191895857453 test_loss: 0.081414133310318
epoch: 19 training_loss 0.03596876075491309 test_loss: 0.08538692593574523
epoch: 20 training_loss 0.039620483117178085 test_loss: 0.08172850012779236
epoch: 21 training_loss 0.03398968311958015 test_loss: 0.0887841522693634
epoch: 22 training_loss 0.030510442750528454 test_loss: 0.08366979360580444
epoch: 23 training_loss 0.0324023666116409 test_loss: 0.07897392511367798
epoch: 24 training_loss 0.026805557794868946 test_loss: 0.08087099194526673
epoch: 25 training_loss 0.022820595391094683 test_loss: 0.07049596905708314
epoch: 26 training_loss 0.020221746279858052 test_loss: 0.08113421201705932
epoch: 27 training_loss 0.020216954615898432 test_loss: 0.08998385667800904
epoch: 28 training_loss 0.017535573402419687 test_loss: 0.08043917417526245
epoch: 29 training_loss 0.03696197713259607 test_loss: 0.08219759464263916
epoch: 30 training_loss 0.02449021306820214 test_loss: 0.08058377504348754
epoch: 31 training_loss 0.017988074063323437 test_loss: 0.0906004250049591
epoch: 32 training_loss 0.01764224616577849 test_loss: 0.09408313632011414
epoch: 33 training_loss 0.022888271915726364 test_loss: 0.08738692998886108
epoch: 34 training_loss 0.02512604680843651 test_loss: 0.09110022783279419
epoch: 35 training_loss 0.021029414404183627 test_loss: 0.08557747602462769
epoch: 36 training_loss 0.01293139391578734 test_loss: 0.09048457145690918
epoch: 37 training_loss 0.014032193771563471 test_loss: 0.09364630579948426
epoch: 38 training_loss 0.01025898857973516 test_loss: 0.09239023923873901
epoch: 39 training_loss 0.009273640829487704 test_loss: 0.09086738228797912
epoch: 40 training_loss 0.010826825378462673 test_loss: 0.10556908845901489
epoch: 41 training_loss 0.04825138954911381 test_loss: 0.0940538763999939
epoch: 42 training_loss 0.02673073478974402 test_loss: 0.09579681754112243
epoch: 43 training_loss 0.01431180512998253 test_loss: 0.08968623280525208
epoch: 44 training_loss 0.010224024071358145 test_loss: 0.09340618252754211
epoch: 45 training_loss 0.007737204311415553 test_loss: 0.09417782425880432
epoch: 46 training_loss 0.008498081073630601 test_loss: 0.09521622657775879
epoch: 47 training_loss 0.008107438415754587 test_loss: 0.09196114540100098
epoch: 48 training_loss 0.007186948172748089 test_loss: 0.09926736354827881
epoch: 49 training_loss 0.006185350515879691 test_loss: 0.10007545948028565
epoch: 50 training_loss 0.005841194991953671 test_loss: 0.09480723738670349
epoch: 51 training_loss 0.005594376918161288 test_loss: 0.10395920276641846
epoch: 52 training_loss 0.005431242269696668 test_loss: 0.09889861941337585
epoch: 53 training_loss 0.005813300893642008 test_loss: 0.10072393417358398
epoch: 54 training_loss 0.005371425079647452 test_loss: 0.10151373147964478
epoch: 55 training_loss 0.005790899018757045 test_loss: 0.10718122720718384
epoch: 56 training_loss 0.10940260474802926 test_loss: 0.1393103241920471
epoch: 57 training_loss 0.061363544473424556 test_loss: 0.0803476095199585
epoch: 58 training_loss 0.021403530514799058 test_loss: 0.08668659329414367
epoch: 59 training_loss 0.014852861016988754 test_loss: 0.08852184414863587
epoch: 60 training_loss 0.010518593119923026 test_loss: 0.09935314655303955
epoch: 61 training_loss 0.008976816351059824 test_loss: 0.09328582882881165
epoch: 62 training_loss 0.007020611623302102 test_loss: 0.09209105372428894
epoch: 63 training_loss 0.007019489114172757 test_loss: 0.0883692443370819
epoch: 64 training_loss 0.005838340250775218 test_loss: 0.09776878952980042
epoch: 65 training_loss 0.00505587920662947 test_loss: 0.09480564594268799
epoch: 66 training_loss 0.004798929289681837 test_loss: 0.099233078956604
epoch: 67 training_loss 0.005212986372644082 test_loss: 0.09032244682312011
epoch: 68 training_loss 0.004326410583453253 test_loss: 0.09572490453720092
epoch: 69 training_loss 0.003969550424953922 test_loss: 0.10236010551452637
epoch: 70 training_loss 0.005851415228098631 test_loss: 0.10843708515167236
epoch: 71 training_loss 0.05892442318145186 test_loss: 0.12282919883728027
epoch: 72 training_loss 0.04436401626560837 test_loss: 0.10117770433425903
epoch: 73 training_loss 0.011120308306999505 test_loss: 0.08790016770362855
epoch: 74 training_loss 0.007376637733541429 test_loss: 0.09371574521064759
epoch: 75 training_loss 0.0060973082901909946 test_loss: 0.09354304075241089
epoch: 76 training_loss 0.004959198132855818 test_loss: 0.09043698310852051
epoch: 77 training_loss 0.004336141271051019 test_loss: 0.09229609370231628
epoch: 78 training_loss 0.004170321212150156 test_loss: 0.09512714743614196
epoch: 79 training_loss 0.0036172516900114717 test_loss: 0.09397801756858826
epoch: 80 training_loss 0.0038284731388557704 test_loss: 0.10189224481582641
epoch: 81 training_loss 0.0033118925138842315 test_loss: 0.09668222665786744
epoch: 82 training_loss 0.003173985902685672 test_loss: 0.0948527693748474
epoch: 83 training_loss 0.0030966688133776186 test_loss: 0.09804948568344116
epoch: 84 training_loss 0.0031264906004071238 test_loss: 0.10205836296081543
epoch: 85 training_loss 0.0028297363314777613 test_loss: 0.10255169868469238
epoch: 86 training_loss 0.002774806884699501 test_loss: 0.10289069414138793
epoch: 87 training_loss 0.0027013761579291894 test_loss: 0.10051720142364502
epoch: 88 training_loss 0.0025424641504650934 test_loss: 0.10387216806411743
epoch: 89 training_loss 0.0025603301508817823 test_loss: 0.11045278310775757
epoch: 90 training_loss 0.002470926867099479 test_loss: 0.1072658896446228
epoch: 91 training_loss 0.002583443342009559 test_loss: 0.10539458990097046
epoch: 92 training_loss 0.06528268891270272 test_loss: 0.28178284168243406
epoch: 93 training_loss 0.22919831961393355 test_loss: 0.12085658311843872
epoch: 94 training_loss 0.06339581582695246 test_loss: 0.09360433220863343
epoch: 95 training_loss 0.030336931785568594 test_loss: 0.0869587242603302
epoch: 96 training_loss 0.017395334294997157 test_loss: 0.08628262877464295
epoch: 97 training_loss 0.014950145736802369 test_loss: 0.08885630965232849
epoch: 98 training_loss 0.010650944295339287 test_loss: 0.08849431276321411
epoch: 99 training_loss 0.008930563067551703 test_loss: 0.0957703173160553
epoch: 100 training_loss 0.0069472152763046325 test_loss: 0.09639092087745667
epoch: 101 training_loss 0.005809411536902189 test_loss: 0.09823903441429138
epoch: 102 training_loss 0.006049684711033478 test_loss: 0.099921315908432
epoch: 103 training_loss 0.004942099591717124 test_loss: 0.10502557754516602
epoch: 104 training_loss 0.004567028835881501 test_loss: 0.10117267370223999
epoch: 105 training_loss 0.004028970570070669 test_loss: 0.10415271520614625
epoch: 106 training_loss 0.0038064937689341605 test_loss: 0.1023686408996582
epoch: 107 training_loss 0.0035960332106333226 test_loss: 0.10584253072738647
epoch: 108 training_loss 0.0038996792573016137 test_loss: 0.10646097660064698
epoch: 109 training_loss 0.0036866765469312666 test_loss: 0.11601253747940063
epoch: 110 training_loss 0.0032286136329639703 test_loss: 0.11155955791473389
epoch: 111 training_loss 0.0032198540028184654 test_loss: 0.11252402067184449
epoch: 112 training_loss 0.0028664941497845574 test_loss: 0.11027017831802369
epoch: 113 training_loss 0.0026893025019671768 test_loss: 0.12119807004928589
epoch: 114 training_loss 0.0029079612519126385 test_loss: 0.11641287803649902
epoch: 115 training_loss 0.0024282612028764562 test_loss: 0.1139637589454651
epoch: 116 training_loss 0.002410856809001416 test_loss: 0.11764343976974487
epoch: 117 training_loss 0.0023300510918488728 test_loss: 0.1258440136909485
epoch: 118 training_loss 0.0022184060540166683 test_loss: 0.11565227508544922
epoch: 119 training_loss 0.002207761321333237 test_loss: 0.11755454540252686
epoch: 120 training_loss 0.001913098730146885 test_loss: 0.11141771078109741
epoch: 121 training_loss 0.00209500684577506 test_loss: 0.11637871265411377
epoch: 122 training_loss 0.0024923744046827776 test_loss: 0.12055630683898926
epoch: 123 training_loss 0.002377425394370221 test_loss: 0.1174269437789917
epoch: 124 training_loss 0.001842296100803651 test_loss: 0.11953078508377075
epoch: 125 training_loss 0.0019107404342503286 test_loss: 0.11980139017105103
epoch: 126 training_loss 0.0019935584778431803 test_loss: 0.12510703802108764
epoch: 127 training_loss 0.05680076015822124 test_loss: 0.12673096656799315
epoch: 128 training_loss 0.07249424044974148 test_loss: 0.08558717966079712
epoch: 129 training_loss 0.030367328375577925 test_loss: 0.08373681902885437
epoch: 130 training_loss 0.018002466410398484 test_loss: 0.08952831029891968
epoch: 131 training_loss 0.009132334869354964 test_loss: 0.09782232642173767
epoch: 132 training_loss 0.006838063360191882 test_loss: 0.09826968312263488
epoch: 133 training_loss 0.005808263687649742 test_loss: 0.10491108894348145
epoch: 134 training_loss 0.004569670194759965 test_loss: 0.11353985071182252
epoch: 135 training_loss 0.003762099214363843 test_loss: 0.11334755420684814
epoch: 136 training_loss 0.003583340038312599 test_loss: 0.11119300127029419
epoch: 137 training_loss 0.003323638269212097 test_loss: 0.1150124192237854
epoch: 138 training_loss 0.0032235442835371942 test_loss: 0.11753689050674439
epoch: 139 training_loss 0.0028520637506153435 test_loss: 0.12455664873123169
epoch: 140 training_loss 0.0026049267104826866 test_loss: 0.12285888195037842
epoch: 141 training_loss 0.002612751352717169 test_loss: 0.12757314443588258
epoch: 142 training_loss 0.0025396374135743826 test_loss: 0.12129321098327636
epoch: 143 training_loss 0.0022670790296979246 test_loss: 0.12418545484542846
epoch: 144 training_loss 0.0021294783049961553 test_loss: 0.13102428913116454
epoch: 145 training_loss 0.0020180165767669676 test_loss: 0.12908639907836914
epoch: 146 training_loss 0.002202666932134889 test_loss: 0.12924805879592896
epoch: 147 training_loss 0.0018573543708771468 test_loss: 0.13839149475097656
epoch: 148 training_loss 0.0017594911641208456 test_loss: 0.12849044799804688
epoch: 149 training_loss 0.0019929108931683003 test_loss: 0.14009472131729125
epoch: 0 training_loss 50.41048992156983 test_loss: 11.577857208251952
epoch: 1 training_loss 18.823165168762205 test_loss: 7.1043701171875
epoch: 2 training_loss 13.36663504600525 test_loss: 5.484719848632812
epoch: 3 training_loss 10.594971475601197 test_loss: 4.446804428100586
epoch: 4 training_loss 8.621364755630493 test_loss: 3.7708072662353516
epoch: 5 training_loss 7.254865503311157 test_loss: 3.24328498840332
epoch: 6 training_loss 6.438644380569458 test_loss: 2.8868751525878906
epoch: 7 training_loss 5.830139222145081 test_loss: 2.626387023925781
epoch: 8 training_loss 5.336466031074524 test_loss: 2.4233169555664062
epoch: 9 training_loss 4.8260284566879275 test_loss: 2.2600643157958986
epoch: 10 training_loss 4.524816460609436 test_loss: 2.111604118347168
epoch: 11 training_loss 4.341397469043732 test_loss: 2.0399185180664063
epoch: 12 training_loss 4.084874536991119 test_loss: 1.9049518585205079
epoch: 13 training_loss 3.8224258184432984 test_loss: 1.8224895477294922
epoch: 14 training_loss 3.72500004529953 test_loss: 1.7512277603149413
epoch: 15 training_loss 3.50371484041214 test_loss: 1.6892461776733398
epoch: 16 training_loss 3.4802341866493225 test_loss: 1.6154890060424805
epoch: 17 training_loss 3.272798011302948 test_loss: 1.582416820526123
epoch: 18 training_loss 3.1835605621337892 test_loss: 1.5525002479553223
epoch: 19 training_loss 3.130001301765442 test_loss: 1.4879693031311034
epoch: 20 training_loss 2.9760812520980835 test_loss: 1.444350242614746
epoch: 21 training_loss 2.969005365371704 test_loss: 1.4276392936706543
epoch: 22 training_loss 2.849275677204132 test_loss: 1.3788500785827638
epoch: 23 training_loss 2.765517373085022 test_loss: 1.3421634674072265
epoch: 24 training_loss 2.7270373034477235 test_loss: 1.324361515045166
epoch: 25 training_loss 2.683710124492645 test_loss: 1.2995235443115234
epoch: 26 training_loss 2.638247091770172 test_loss: 1.268777561187744
epoch: 27 training_loss 2.580157451629639 test_loss: 1.255146598815918
epoch: 28 training_loss 2.501171042919159 test_loss: 1.2276497840881349
epoch: 29 training_loss 2.505904755592346 test_loss: 1.2068418502807616
epoch: 30 training_loss 2.4655845832824705 test_loss: 1.186893367767334
epoch: 31 training_loss 2.42066677570343 test_loss: 1.162980842590332
epoch: 32 training_loss 2.352871551513672 test_loss: 1.153751277923584
epoch: 33 training_loss 2.3495874643325805 test_loss: 1.1422712326049804
epoch: 34 training_loss 2.2754762673377993 test_loss: 1.1300948143005372
epoch: 35 training_loss 2.279886373281479 test_loss: 1.1182486534118652
epoch: 36 training_loss 2.2927381503582 test_loss: 1.1145803451538085
epoch: 37 training_loss 2.238721976280212 test_loss: 1.0898176193237306
epoch: 38 training_loss 2.203780287504196 test_loss: 1.0920737266540528
epoch: 39 training_loss 2.162870054244995 test_loss: 1.066206455230713
epoch: 40 training_loss 2.1847851169109345 test_loss: 1.0853360176086426
epoch: 41 training_loss 2.1710076463222503 test_loss: 1.0595312118530273
epoch: 42 training_loss 2.172262092828751 test_loss: 1.0547369003295899
epoch: 43 training_loss 2.0731814384460447 test_loss: 1.0227656364440918
epoch: 44 training_loss 2.129450190067291 test_loss: 1.018621826171875
epoch: 45 training_loss 2.0511304426193235 test_loss: 1.0042903900146485
epoch: 46 training_loss 2.039524768590927 test_loss: 1.011169719696045
epoch: 47 training_loss 2.0328492200374604 test_loss: 0.9953670501708984
epoch: 48 training_loss 1.999669349193573 test_loss: 0.9962478637695312
epoch: 49 training_loss 1.9725350475311278 test_loss: 0.9917835235595703
epoch: 50 training_loss 1.9428233194351197 test_loss: 0.9905003547668457
epoch: 51 training_loss 1.9564385628700256 test_loss: 0.9734389305114746
epoch: 52 training_loss 1.9369626235961914 test_loss: 0.978914737701416
epoch: 53 training_loss 1.917058584690094 test_loss: 0.9547251701354981
epoch: 54 training_loss 1.9072977995872498 test_loss: 0.9381464004516602
epoch: 55 training_loss 1.881890106201172 test_loss: 0.9560898780822754
epoch: 56 training_loss 1.8821927428245544 test_loss: 0.9405486106872558
epoch: 57 training_loss 1.8774787867069245 test_loss: 0.9547565460205079
epoch: 58 training_loss 1.8827631270885468 test_loss: 0.9327409744262696
epoch: 59 training_loss 1.8615534615516662 test_loss: 0.9179632186889648
epoch: 60 training_loss 1.8539186322689056 test_loss: 0.9385946273803711
epoch: 61 training_loss 1.8379028725624085 test_loss: 0.9266597747802734
epoch: 62 training_loss 1.8331765258312225 test_loss: 0.9135211944580078
epoch: 63 training_loss 1.8268994963169098 test_loss: 0.903316593170166
epoch: 64 training_loss 1.7948282885551452 test_loss: 0.8991920471191406
epoch: 65 training_loss 1.7976556718349457 test_loss: 0.9183341979980468
epoch: 66 training_loss 1.7888221716880799 test_loss: 0.904389762878418
epoch: 67 training_loss 1.7457172787189483 test_loss: 0.9129425048828125
epoch: 68 training_loss 1.76437828540802 test_loss: 0.8845212936401368
epoch: 69 training_loss 1.7669312000274657 test_loss: 0.886543083190918
epoch: 70 training_loss 1.7228772974014281 test_loss: 0.8752408027648926
epoch: 71 training_loss 1.738633337020874 test_loss: 0.8722167015075684
epoch: 72 training_loss 1.7232320129871368 test_loss: 0.8629899024963379
epoch: 73 training_loss 1.7172632575035096 test_loss: 0.862334156036377
epoch: 74 training_loss 1.715705270767212 test_loss: 0.8561868667602539
epoch: 75 training_loss 1.7050377583503724 test_loss: 0.8568590164184571
epoch: 76 training_loss 1.6927758610248567 test_loss: 0.8467860221862793
epoch: 77 training_loss 1.685676964521408 test_loss: 0.8651623725891113
epoch: 78 training_loss 1.697748475074768 test_loss: 0.8458330154418945
epoch: 79 training_loss 1.6712892520427705 test_loss: 0.8355823516845703
epoch: 80 training_loss 1.66605393409729 test_loss: 0.8363322257995606
epoch: 81 training_loss 1.671291309595108 test_loss: 0.835174560546875
epoch: 82 training_loss 1.6531155824661254 test_loss: 0.8314212799072266
epoch: 83 training_loss 1.6400148701667785 test_loss: 0.8292957305908203
epoch: 84 training_loss 1.6453538954257965 test_loss: 0.8241160392761231
epoch: 85 training_loss 1.6343411457538606 test_loss: 0.8221920967102051
epoch: 86 training_loss 1.646786358356476 test_loss: 0.830439281463623
epoch: 87 training_loss 1.6459521424770356 test_loss: 0.8239899635314941
epoch: 88 training_loss 1.6261076164245605 test_loss: 0.8340373992919922
epoch: 89 training_loss 1.6044241821765899 test_loss: 0.815925407409668
epoch: 90 training_loss 1.6108371341228485 test_loss: 0.8081446647644043
epoch: 91 training_loss 1.6357478451728822 test_loss: 0.8152387619018555
epoch: 92 training_loss 1.6038241350650788 test_loss: 0.8048970222473144
epoch: 93 training_loss 1.6166678714752196 test_loss: 0.8121172904968261
epoch: 94 training_loss 1.608563743829727 test_loss: 0.7945425033569335
epoch: 95 training_loss 1.5893190836906432 test_loss: 0.8296609878540039
epoch: 96 training_loss 1.574661157131195 test_loss: 0.8035327911376953
epoch: 97 training_loss 1.5687862861156463 test_loss: 0.7952518463134766
epoch: 98 training_loss 1.567560341358185 test_loss: 0.7979470729827881
epoch: 99 training_loss 1.583294906616211 test_loss: 0.7956498146057129
epoch: 100 training_loss 1.5826629710197448 test_loss: 0.8261985778808594
epoch: 101 training_loss 1.5649808061122894 test_loss: 0.7875215530395507
epoch: 102 training_loss 1.5803047323226929 test_loss: 0.7836177349090576
epoch: 103 training_loss 1.5586148798465729 test_loss: 0.7844520092010498
epoch: 104 training_loss 1.553415548801422 test_loss: 0.7869555950164795
epoch: 105 training_loss 1.5617710030078888 test_loss: 0.7866668224334716
epoch: 106 training_loss 1.5462830829620362 test_loss: 0.784804105758667
epoch: 107 training_loss 1.5342729413509368 test_loss: 0.7987072467803955
epoch: 108 training_loss 1.5547320938110352 test_loss: 0.7702273845672607
epoch: 109 training_loss 1.528481388092041 test_loss: 0.7669403553009033
epoch: 110 training_loss 1.542045772075653 test_loss: 0.7753374099731445
epoch: 111 training_loss 1.5266763365268707 test_loss: 0.7764294624328614
epoch: 112 training_loss 1.5210755670070648 test_loss: 0.7714992523193359
epoch: 113 training_loss 1.4988021957874298 test_loss: 0.7619624614715577
epoch: 114 training_loss 1.5103037858009338 test_loss: 0.7720223903656006
epoch: 115 training_loss 1.5119316017627715 test_loss: 0.7586199283599854
epoch: 116 training_loss 1.5174307608604432 test_loss: 0.7625422954559327
epoch: 117 training_loss 1.5073303914070129 test_loss: 0.7736181259155274
epoch: 118 training_loss 1.4875219225883485 test_loss: 0.7685210227966308
epoch: 119 training_loss 1.5111403489112853 test_loss: 0.7648327350616455
epoch: 120 training_loss 1.4830072832107544 test_loss: 0.7558237552642822
epoch: 121 training_loss 1.4931560897827147 test_loss: 0.7518799304962158
epoch: 122 training_loss 1.4882924830913544 test_loss: 0.7571753978729248
epoch: 123 training_loss 1.4836988639831543 test_loss: 0.7601507663726806
epoch: 124 training_loss 1.484981151819229 test_loss: 0.7514404773712158
epoch: 125 training_loss 1.4753594565391541 test_loss: 0.7551034450531006
epoch: 126 training_loss 1.4722487676143645 test_loss: 0.7448136329650878
epoch: 127 training_loss 1.4614297699928285 test_loss: 0.750859546661377
epoch: 128 training_loss 1.46485107421875 test_loss: 0.745904541015625
epoch: 129 training_loss 1.4664590537548066 test_loss: 0.7441680431365967
epoch: 130 training_loss 1.4722436106204986 test_loss: 0.7470835208892822
epoch: 131 training_loss 1.4535287427902222 test_loss: 0.7389365196228027
epoch: 132 training_loss 1.4548898231983185 test_loss: 0.7577454090118408
epoch: 133 training_loss 1.4455259239673615 test_loss: 0.7391219139099121
epoch: 134 training_loss 1.4553551304340362 test_loss: 0.730727243423462
epoch: 135 training_loss 1.4531108856201171 test_loss: 0.7355994701385498
epoch: 136 training_loss 1.448525425195694 test_loss: 0.7565694332122803
epoch: 137 training_loss 1.4583803558349608 test_loss: 0.7301577091217041
epoch: 138 training_loss 1.4473401856422425 test_loss: 0.7291777610778809
epoch: 139 training_loss 1.4477607321739197 test_loss: 0.7290853500366211
epoch: 140 training_loss 1.4284886741638183 test_loss: 0.7267491340637207
epoch: 141 training_loss 1.4414627957344055 test_loss: 0.7397653102874756
epoch: 142 training_loss 1.4390494561195373 test_loss: 0.7358184814453125
epoch: 143 training_loss 1.4323061001300812 test_loss: 0.7655442237854004
epoch: 144 training_loss 1.442392383813858 test_loss: 0.728114652633667
epoch: 145 training_loss 1.4198141968250275 test_loss: 0.7250229358673096
epoch: 146 training_loss 1.4204490029811858 test_loss: 0.7254445075988769
epoch: 147 training_loss 1.428724511861801 test_loss: 0.7258618831634521
epoch: 148 training_loss 1.4146674811840056 test_loss: 0.7215714931488038
epoch: 149 training_loss 1.4264187252521514 test_loss: 0.7380520820617675
5125.383839513996
episode: 0 training return: tensor(21.0376, device='cuda:0')
episode: 1 training return: tensor(-107.6354, device='cuda:0')
episode: 2 training return: tensor(-16.8238, device='cuda:0')
episode: 3 training return: tensor(132.5855, device='cuda:0')
epoch: 1 test_true_pfm: 5070.318950491368 sim_pfm: 204.55242699786322
episode: 4 training return: tensor(-53.6860, device='cuda:0')
episode: 5 training return: tensor(-24.7850, device='cuda:0')
episode: 6 training return: tensor(44.7289, device='cuda:0')
episode: 7 training return: tensor(-71.0852, device='cuda:0')
epoch: 2 test_true_pfm: 5039.475768927042 sim_pfm: 96.9794790444624
episode: 8 training return: tensor(42.1005, device='cuda:0')
episode: 9 training return: tensor(134.8766, device='cuda:0')
episode: 10 training return: tensor(-37.2343, device='cuda:0')
episode: 11 training return: tensor(-38.9025, device='cuda:0')
epoch: 3 test_true_pfm: 5039.1563155695685 sim_pfm: 80.44175153768931
episode: 12 training return: tensor(240.4585, device='cuda:0')
episode: 13 training return: tensor(-48.8404, device='cuda:0')
episode: 14 training return: tensor(61.7177, device='cuda:0')
episode: 15 training return: tensor(235.2770, device='cuda:0')
epoch: 4 test_true_pfm: 5044.340790321238 sim_pfm: 48.23681975117264
episode: 16 training return: tensor(78.8140, device='cuda:0')
episode: 17 training return: tensor(116.1183, device='cuda:0')
episode: 18 training return: tensor(93.2536, device='cuda:0')
episode: 19 training return: tensor(219.7348, device='cuda:0')
epoch: 5 test_true_pfm: 5119.889863499729 sim_pfm: 107.73059390764683
episode: 20 training return: tensor(16.4951, device='cuda:0')
episode: 21 training return: tensor(16.1272, device='cuda:0')
episode: 22 training return: tensor(136.1238, device='cuda:0')
episode: 23 training return: tensor(143.3128, device='cuda:0')
epoch: 6 test_true_pfm: 5127.404154176905 sim_pfm: 29.705891700771947
episode: 24 training return: tensor(243.8428, device='cuda:0')
episode: 25 training return: tensor(185.9946, device='cuda:0')
episode: 26 training return: tensor(164.0109, device='cuda:0')
episode: 27 training return: tensor(67.6784, device='cuda:0')
epoch: 7 test_true_pfm: 5157.351605021763 sim_pfm: 52.11412881241025
episode: 28 training return: tensor(260.5071, device='cuda:0')
episode: 29 training return: tensor(-40.7217, device='cuda:0')
episode: 30 training return: tensor(138.3719, device='cuda:0')
episode: 31 training return: tensor(271.1691, device='cuda:0')
epoch: 8 test_true_pfm: 4717.7549349672345 sim_pfm: 238.95276545123002
episode: 32 training return: tensor(190.4063, device='cuda:0')
episode: 33 training return: tensor(165.8742, device='cuda:0')
episode: 34 training return: tensor(155.1489, device='cuda:0')
episode: 35 training return: tensor(259.6610, device='cuda:0')
epoch: 9 test_true_pfm: 5193.277754258544 sim_pfm: 312.114971411104
episode: 36 training return: tensor(218.8442, device='cuda:0')
episode: 37 training return: tensor(48.2492, device='cuda:0')
episode: 38 training return: tensor(102.9051, device='cuda:0')
episode: 39 training return: tensor(112.7314, device='cuda:0')
epoch: 10 test_true_pfm: 5128.255736814289 sim_pfm: 140.38725578805315
episode: 40 training return: tensor(192.2053, device='cuda:0')
episode: 41 training return: tensor(226.4929, device='cuda:0')
episode: 42 training return: tensor(158.7526, device='cuda:0')
episode: 43 training return: tensor(15.3204, device='cuda:0')
epoch: 11 test_true_pfm: 5200.574978594713 sim_pfm: 194.01447176700458
episode: 44 training return: tensor(279.8972, device='cuda:0')
episode: 45 training return: tensor(141.7322, device='cuda:0')
episode: 46 training return: tensor(67.3059, device='cuda:0')
episode: 47 training return: tensor(11.7305, device='cuda:0')
epoch: 12 test_true_pfm: 5201.27053014666 sim_pfm: 293.30910014997545
episode: 48 training return: tensor(106.3520, device='cuda:0')
episode: 49 training return: tensor(294.4446, device='cuda:0')
episode: 50 training return: tensor(-46.0417, device='cuda:0')
episode: 51 training return: tensor(83.2576, device='cuda:0')
epoch: 13 test_true_pfm: 5220.653683582243 sim_pfm: 245.33353185715774
episode: 52 training return: tensor(272.9836, device='cuda:0')
episode: 53 training return: tensor(113.5636, device='cuda:0')
episode: 54 training return: tensor(56.7724, device='cuda:0')
episode: 55 training return: tensor(233.5757, device='cuda:0')
epoch: 14 test_true_pfm: 5216.640749885445 sim_pfm: 348.21948974019807
episode: 56 training return: tensor(103.0324, device='cuda:0')
episode: 57 training return: tensor(257.1645, device='cuda:0')
episode: 58 training return: tensor(197.8699, device='cuda:0')
episode: 59 training return: tensor(170.1768, device='cuda:0')
epoch: 15 test_true_pfm: 5272.4086651352845 sim_pfm: 381.2128237555735
episode: 60 training return: tensor(211.3425, device='cuda:0')
episode: 61 training return: tensor(154.9864, device='cuda:0')
episode: 62 training return: tensor(328.7951, device='cuda:0')
episode: 63 training return: tensor(253.5900, device='cuda:0')
epoch: 16 test_true_pfm: 5232.371401958194 sim_pfm: 334.43844286041957
episode: 64 training return: tensor(244.0856, device='cuda:0')
episode: 65 training return: tensor(256.2142, device='cuda:0')
episode: 66 training return: tensor(393.5534, device='cuda:0')
episode: 67 training return: tensor(445.0422, device='cuda:0')
epoch: 17 test_true_pfm: 5237.8436152792165 sim_pfm: 346.8812372853863
episode: 68 training return: tensor(308.8915, device='cuda:0')
episode: 69 training return: tensor(50.5539, device='cuda:0')
episode: 70 training return: tensor(322.4709, device='cuda:0')
episode: 71 training return: tensor(148.9405, device='cuda:0')
epoch: 18 test_true_pfm: 5219.32663635156 sim_pfm: 332.45480483754847
episode: 72 training return: tensor(217.0594, device='cuda:0')
episode: 73 training return: tensor(203.8902, device='cuda:0')
episode: 74 training return: tensor(160.0327, device='cuda:0')
episode: 75 training return: tensor(353.6850, device='cuda:0')
epoch: 19 test_true_pfm: 4215.936077844249 sim_pfm: 398.7524724584073
episode: 76 training return: tensor(134.5822, device='cuda:0')
episode: 77 training return: tensor(267.9977, device='cuda:0')
episode: 78 training return: tensor(309.2327, device='cuda:0')
episode: 79 training return: tensor(158.5315, device='cuda:0')
epoch: 20 test_true_pfm: 5378.819552537293 sim_pfm: 465.7238579791253
episode: 80 training return: tensor(347.6404, device='cuda:0')
episode: 81 training return: tensor(278.6512, device='cuda:0')
episode: 82 training return: tensor(125.6351, device='cuda:0')
episode: 83 training return: tensor(192.6946, device='cuda:0')
epoch: 21 test_true_pfm: 5414.207009980289 sim_pfm: 288.47000922448933
episode: 84 training return: tensor(347.6784, device='cuda:0')
episode: 85 training return: tensor(100.9254, device='cuda:0')
episode: 86 training return: tensor(372.0040, device='cuda:0')
episode: 87 training return: tensor(146.5583, device='cuda:0')
epoch: 22 test_true_pfm: 5286.25094559611 sim_pfm: 411.0163312165532
episode: 88 training return: tensor(307.6120, device='cuda:0')
episode: 89 training return: tensor(366.8391, device='cuda:0')
episode: 90 training return: tensor(404.3765, device='cuda:0')
episode: 91 training return: tensor(269.9509, device='cuda:0')
epoch: 23 test_true_pfm: 5429.495366570634 sim_pfm: 413.4644281172659
episode: 92 training return: tensor(319.0990, device='cuda:0')
episode: 93 training return: tensor(402.2680, device='cuda:0')
episode: 94 training return: tensor(214.2601, device='cuda:0')
episode: 95 training return: tensor(323.6698, device='cuda:0')
epoch: 24 test_true_pfm: 5360.586259736773 sim_pfm: 544.8048790463945
episode: 96 training return: tensor(289.5432, device='cuda:0')
episode: 97 training return: tensor(238.8609, device='cuda:0')
episode: 98 training return: tensor(245.1777, device='cuda:0')
episode: 99 training return: tensor(106.1309, device='cuda:0')
epoch: 25 test_true_pfm: 5331.633352532664 sim_pfm: 288.2778731651294
episode: 100 training return: tensor(261.2843, device='cuda:0')
episode: 101 training return: tensor(291.3973, device='cuda:0')
episode: 102 training return: tensor(285.8165, device='cuda:0')
episode: 103 training return: tensor(175.9478, device='cuda:0')
epoch: 26 test_true_pfm: 5472.238879581775 sim_pfm: 372.11183753584436
episode: 104 training return: tensor(315.8789, device='cuda:0')
episode: 105 training return: tensor(314.4804, device='cuda:0')
episode: 106 training return: tensor(366.9838, device='cuda:0')
episode: 107 training return: tensor(334.3436, device='cuda:0')
epoch: 27 test_true_pfm: 5353.131292672369 sim_pfm: 364.1451844009959
episode: 108 training return: tensor(492.6891, device='cuda:0')
episode: 109 training return: tensor(500.9629, device='cuda:0')
episode: 110 training return: tensor(389.9237, device='cuda:0')
episode: 111 training return: tensor(384.0534, device='cuda:0')
epoch: 28 test_true_pfm: 5445.0160944473755 sim_pfm: 371.41691525552113
episode: 112 training return: tensor(299.6086, device='cuda:0')
episode: 113 training return: tensor(463.9241, device='cuda:0')
episode: 114 training return: tensor(413.6037, device='cuda:0')
episode: 115 training return: tensor(239.9998, device='cuda:0')
epoch: 29 test_true_pfm: 5446.041943275561 sim_pfm: 524.8507751118547
episode: 116 training return: tensor(252.8085, device='cuda:0')
episode: 117 training return: tensor(86.0722, device='cuda:0')
episode: 118 training return: tensor(393.5238, device='cuda:0')
episode: 119 training return: tensor(384.6313, device='cuda:0')
epoch: 30 test_true_pfm: 5390.019137407444 sim_pfm: 440.4600341433349
episode: 120 training return: tensor(298.9081, device='cuda:0')
episode: 121 training return: tensor(232.6229, device='cuda:0')
episode: 122 training return: tensor(309.1837, device='cuda:0')
episode: 123 training return: tensor(162.2531, device='cuda:0')
epoch: 31 test_true_pfm: 5479.227298201074 sim_pfm: 423.3034631636304
episode: 124 training return: tensor(274.5614, device='cuda:0')
episode: 125 training return: tensor(302.7432, device='cuda:0')
episode: 126 training return: tensor(370.1158, device='cuda:0')
episode: 127 training return: tensor(424.5035, device='cuda:0')
epoch: 32 test_true_pfm: 5506.950438998191 sim_pfm: 449.5129939542773
episode: 128 training return: tensor(328.2774, device='cuda:0')
episode: 129 training return: tensor(370.6630, device='cuda:0')
episode: 130 training return: tensor(352.5290, device='cuda:0')
episode: 131 training return: tensor(223.6082, device='cuda:0')
epoch: 33 test_true_pfm: 5379.551783733446 sim_pfm: 445.7733927128332
episode: 132 training return: tensor(303.4716, device='cuda:0')
episode: 133 training return: tensor(482.1190, device='cuda:0')
episode: 134 training return: tensor(400.0397, device='cuda:0')
episode: 135 training return: tensor(206.1220, device='cuda:0')
epoch: 34 test_true_pfm: 5432.426749615558 sim_pfm: 377.78412445192225
episode: 136 training return: tensor(78.5833, device='cuda:0')
episode: 137 training return: tensor(243.1457, device='cuda:0')
episode: 138 training return: tensor(175.9609, device='cuda:0')
episode: 139 training return: tensor(369.8035, device='cuda:0')
epoch: 35 test_true_pfm: 5439.660561579956 sim_pfm: 464.4233799500701
episode: 140 training return: tensor(281.3027, device='cuda:0')
episode: 141 training return: tensor(489.5087, device='cuda:0')
episode: 142 training return: tensor(209.6966, device='cuda:0')
episode: 143 training return: tensor(246.9411, device='cuda:0')
epoch: 36 test_true_pfm: 5445.168020971791 sim_pfm: 405.0132701857656
episode: 144 training return: tensor(310.7325, device='cuda:0')
episode: 145 training return: tensor(355.2101, device='cuda:0')
episode: 146 training return: tensor(341.7324, device='cuda:0')
episode: 147 training return: tensor(444.9997, device='cuda:0')
epoch: 37 test_true_pfm: 5557.084876640482 sim_pfm: 487.885984241555
episode: 148 training return: tensor(518.5679, device='cuda:0')
episode: 149 training return: tensor(347.1388, device='cuda:0')
episode: 150 training return: tensor(483.6115, device='cuda:0')
episode: 151 training return: tensor(389.0808, device='cuda:0')
epoch: 38 test_true_pfm: 5515.005510069998 sim_pfm: 446.8743791246864
episode: 152 training return: tensor(303.4147, device='cuda:0')
episode: 153 training return: tensor(373.8051, device='cuda:0')
episode: 154 training return: tensor(170.7231, device='cuda:0')
episode: 155 training return: tensor(312.9066, device='cuda:0')
epoch: 39 test_true_pfm: 5532.838785859499 sim_pfm: 449.32608252639574
episode: 156 training return: tensor(286.4489, device='cuda:0')
episode: 157 training return: tensor(410.6190, device='cuda:0')
episode: 158 training return: tensor(497.0852, device='cuda:0')
episode: 159 training return: tensor(279.7795, device='cuda:0')
epoch: 40 test_true_pfm: 5450.9714602651875 sim_pfm: 488.9812013602738
episode: 160 training return: tensor(339.4522, device='cuda:0')
episode: 161 training return: tensor(338.9340, device='cuda:0')
episode: 162 training return: tensor(118.4266, device='cuda:0')
episode: 163 training return: tensor(390.2882, device='cuda:0')
epoch: 41 test_true_pfm: 5433.648560822089 sim_pfm: 466.37148661271203
episode: 164 training return: tensor(370.4547, device='cuda:0')
episode: 165 training return: tensor(325.0882, device='cuda:0')
episode: 166 training return: tensor(458.6447, device='cuda:0')
episode: 167 training return: tensor(250.2755, device='cuda:0')
epoch: 42 test_true_pfm: 5535.721865886701 sim_pfm: 465.95115252925706
episode: 168 training return: tensor(269.0506, device='cuda:0')
episode: 169 training return: tensor(377.0680, device='cuda:0')
episode: 170 training return: tensor(481.6781, device='cuda:0')
episode: 171 training return: tensor(349.8982, device='cuda:0')
epoch: 43 test_true_pfm: 5432.287993567924 sim_pfm: 369.6929907380448
episode: 172 training return: tensor(519.7617, device='cuda:0')
episode: 173 training return: tensor(361.5414, device='cuda:0')
episode: 174 training return: tensor(418.8441, device='cuda:0')
episode: 175 training return: tensor(398.3330, device='cuda:0')
epoch: 44 test_true_pfm: 5515.648365635246 sim_pfm: 526.6055139518963
episode: 176 training return: tensor(286.5726, device='cuda:0')
episode: 177 training return: tensor(408.1826, device='cuda:0')
episode: 178 training return: tensor(440.7439, device='cuda:0')
episode: 179 training return: tensor(383.8662, device='cuda:0')
epoch: 45 test_true_pfm: 5496.851472405295 sim_pfm: 518.7401532175718
episode: 180 training return: tensor(405.8940, device='cuda:0')
episode: 181 training return: tensor(333.3765, device='cuda:0')
episode: 182 training return: tensor(495.8643, device='cuda:0')
episode: 183 training return: tensor(504.7913, device='cuda:0')
epoch: 46 test_true_pfm: 5498.627032735884 sim_pfm: 544.4634133307263
episode: 184 training return: tensor(378.6779, device='cuda:0')
episode: 185 training return: tensor(337.2944, device='cuda:0')
episode: 186 training return: tensor(322.1606, device='cuda:0')
episode: 187 training return: tensor(335.8571, device='cuda:0')
epoch: 47 test_true_pfm: 5524.921177255182 sim_pfm: 483.0045670401305
episode: 188 training return: tensor(359.0421, device='cuda:0')
episode: 189 training return: tensor(412.5232, device='cuda:0')
episode: 190 training return: tensor(221.2814, device='cuda:0')
episode: 191 training return: tensor(505.6422, device='cuda:0')
epoch: 48 test_true_pfm: 5533.229862957538 sim_pfm: 538.5679286181694
episode: 192 training return: tensor(355.7318, device='cuda:0')
episode: 193 training return: tensor(196.3682, device='cuda:0')
episode: 194 training return: tensor(437.5275, device='cuda:0')
episode: 195 training return: tensor(356.7574, device='cuda:0')
epoch: 49 test_true_pfm: 5485.5678959810275 sim_pfm: 605.4423537755501
episode: 196 training return: tensor(425.9978, device='cuda:0')
episode: 197 training return: tensor(242.7233, device='cuda:0')
episode: 198 training return: tensor(469.3617, device='cuda:0')
episode: 199 training return: tensor(382.3975, device='cuda:0')
epoch: 50 test_true_pfm: 5502.016701885739 sim_pfm: 478.3771831831352
episode: 200 training return: tensor(378.8671, device='cuda:0')
episode: 201 training return: tensor(224.7633, device='cuda:0')
episode: 202 training return: tensor(188.2787, device='cuda:0')
episode: 203 training return: tensor(281.5345, device='cuda:0')
epoch: 51 test_true_pfm: 5618.5570944648025 sim_pfm: 461.6036997201154
episode: 204 training return: tensor(451.4731, device='cuda:0')
episode: 205 training return: tensor(423.2595, device='cuda:0')
episode: 206 training return: tensor(398.6230, device='cuda:0')
episode: 207 training return: tensor(515.9943, device='cuda:0')
epoch: 52 test_true_pfm: 5472.629779718674 sim_pfm: 485.6808205046691
episode: 208 training return: tensor(473.6020, device='cuda:0')
episode: 209 training return: tensor(298.3376, device='cuda:0')
episode: 210 training return: tensor(394.5835, device='cuda:0')
episode: 211 training return: tensor(435.5710, device='cuda:0')
epoch: 53 test_true_pfm: 5490.185519890194 sim_pfm: 550.1949752846267
episode: 212 training return: tensor(511.8657, device='cuda:0')
episode: 213 training return: tensor(461.7793, device='cuda:0')
episode: 214 training return: tensor(332.3835, device='cuda:0')
episode: 215 training return: tensor(262.8580, device='cuda:0')
epoch: 54 test_true_pfm: 5392.582111304175 sim_pfm: 582.528530234859
episode: 216 training return: tensor(549.6920, device='cuda:0')
episode: 217 training return: tensor(264.0989, device='cuda:0')
episode: 218 training return: tensor(314.4280, device='cuda:0')
episode: 219 training return: tensor(271.8772, device='cuda:0')
epoch: 55 test_true_pfm: 5536.812979915543 sim_pfm: 525.2754013767699
episode: 220 training return: tensor(375.0356, device='cuda:0')
episode: 221 training return: tensor(384.2284, device='cuda:0')
episode: 222 training return: tensor(468.6228, device='cuda:0')
episode: 223 training return: tensor(446.7495, device='cuda:0')
epoch: 56 test_true_pfm: 5478.819187749131 sim_pfm: 534.8462213503857
episode: 224 training return: tensor(423.5851, device='cuda:0')
episode: 225 training return: tensor(299.1055, device='cuda:0')
episode: 226 training return: tensor(353.8219, device='cuda:0')
episode: 227 training return: tensor(540.0261, device='cuda:0')
epoch: 57 test_true_pfm: 5536.968236412021 sim_pfm: 553.4667040304436
episode: 228 training return: tensor(482.1382, device='cuda:0')
episode: 229 training return: tensor(438.8541, device='cuda:0')
episode: 230 training return: tensor(315.8030, device='cuda:0')
episode: 231 training return: tensor(329.5104, device='cuda:0')
epoch: 58 test_true_pfm: 5521.152767449125 sim_pfm: 597.7786822461834
episode: 232 training return: tensor(441.8661, device='cuda:0')
episode: 233 training return: tensor(401.8999, device='cuda:0')
episode: 234 training return: tensor(320.0074, device='cuda:0')
episode: 235 training return: tensor(337.2633, device='cuda:0')
epoch: 59 test_true_pfm: 5510.998629525623 sim_pfm: 596.213790254745
episode: 236 training return: tensor(423.7444, device='cuda:0')
episode: 237 training return: tensor(411.3518, device='cuda:0')
episode: 238 training return: tensor(262.2025, device='cuda:0')
episode: 239 training return: tensor(25.6476, device='cuda:0')
epoch: 60 test_true_pfm: 5492.571668472382 sim_pfm: 507.1465694900835
episode: 240 training return: tensor(374.0641, device='cuda:0')
episode: 241 training return: tensor(407.9036, device='cuda:0')
episode: 242 training return: tensor(424.8889, device='cuda:0')
episode: 243 training return: tensor(422.4712, device='cuda:0')
epoch: 61 test_true_pfm: 5572.380701349041 sim_pfm: 535.7054321331283
episode: 244 training return: tensor(175.2351, device='cuda:0')
episode: 245 training return: tensor(391.6908, device='cuda:0')
episode: 246 training return: tensor(515.0252, device='cuda:0')
episode: 247 training return: tensor(226.4079, device='cuda:0')
epoch: 62 test_true_pfm: 5565.115935809492 sim_pfm: 575.7960643060118
episode: 248 training return: tensor(340.6117, device='cuda:0')
episode: 249 training return: tensor(496.3704, device='cuda:0')
episode: 250 training return: tensor(278.8234, device='cuda:0')
episode: 251 training return: tensor(513.8356, device='cuda:0')
epoch: 63 test_true_pfm: 5551.3625908333515 sim_pfm: 556.3582945676753
episode: 252 training return: tensor(215.5394, device='cuda:0')
episode: 253 training return: tensor(207.1283, device='cuda:0')
episode: 254 training return: tensor(267.3871, device='cuda:0')
episode: 255 training return: tensor(248.0816, device='cuda:0')
epoch: 64 test_true_pfm: 5504.9444430539215 sim_pfm: 527.5489584562989
episode: 256 training return: tensor(513.0881, device='cuda:0')
episode: 257 training return: tensor(372.6821, device='cuda:0')
episode: 258 training return: tensor(360.3516, device='cuda:0')
episode: 259 training return: tensor(286.3937, device='cuda:0')
epoch: 65 test_true_pfm: 5633.138021665684 sim_pfm: 592.9586161763096
episode: 260 training return: tensor(460.9362, device='cuda:0')
episode: 261 training return: tensor(518.7852, device='cuda:0')
episode: 262 training return: tensor(523.8412, device='cuda:0')
episode: 263 training return: tensor(551.8847, device='cuda:0')
epoch: 66 test_true_pfm: 5518.3922473999 sim_pfm: 522.5296335427556
episode: 264 training return: tensor(223.4663, device='cuda:0')
episode: 265 training return: tensor(404.0432, device='cuda:0')
episode: 266 training return: tensor(632.3981, device='cuda:0')
episode: 267 training return: tensor(477.0704, device='cuda:0')
epoch: 67 test_true_pfm: 5461.217930755237 sim_pfm: 508.48420704342425
episode: 268 training return: tensor(414.0002, device='cuda:0')
episode: 269 training return: tensor(464.8414, device='cuda:0')
episode: 270 training return: tensor(351.6849, device='cuda:0')
episode: 271 training return: tensor(500.9377, device='cuda:0')
epoch: 68 test_true_pfm: 5622.242355688504 sim_pfm: 437.3021716703661
episode: 272 training return: tensor(329.2098, device='cuda:0')
episode: 273 training return: tensor(444.8885, device='cuda:0')
episode: 274 training return: tensor(511.5736, device='cuda:0')
episode: 275 training return: tensor(500.0293, device='cuda:0')
epoch: 69 test_true_pfm: 5548.5982294348205 sim_pfm: 510.12451582610566
episode: 276 training return: tensor(445.0232, device='cuda:0')
episode: 277 training return: tensor(313.7284, device='cuda:0')
episode: 278 training return: tensor(440.3972, device='cuda:0')
episode: 279 training return: tensor(392.3995, device='cuda:0')
epoch: 70 test_true_pfm: 5467.821785684104 sim_pfm: 566.8330949624846
episode: 280 training return: tensor(473.0918, device='cuda:0')
episode: 281 training return: tensor(431.2670, device='cuda:0')
episode: 282 training return: tensor(587.5536, device='cuda:0')
episode: 283 training return: tensor(449.9274, device='cuda:0')
epoch: 71 test_true_pfm: 5566.086721517614 sim_pfm: 623.9487565732949
episode: 284 training return: tensor(416.7543, device='cuda:0')
episode: 285 training return: tensor(423.4832, device='cuda:0')
episode: 286 training return: tensor(392.1187, device='cuda:0')
episode: 287 training return: tensor(326.0645, device='cuda:0')
epoch: 72 test_true_pfm: 5560.472772002536 sim_pfm: 536.6717901533897
episode: 288 training return: tensor(369.0802, device='cuda:0')
episode: 289 training return: tensor(484.8333, device='cuda:0')
episode: 290 training return: tensor(476.7775, device='cuda:0')
episode: 291 training return: tensor(640.3684, device='cuda:0')
epoch: 73 test_true_pfm: 5461.452572739555 sim_pfm: 540.2524885283783
episode: 292 training return: tensor(431.9119, device='cuda:0')
episode: 293 training return: tensor(433.2317, device='cuda:0')
episode: 294 training return: tensor(256.6162, device='cuda:0')
episode: 295 training return: tensor(441.1368, device='cuda:0')
epoch: 74 test_true_pfm: 5574.368876562946 sim_pfm: 543.7427981194729
episode: 296 training return: tensor(422.9457, device='cuda:0')
episode: 297 training return: tensor(504.7018, device='cuda:0')
episode: 298 training return: tensor(164.2244, device='cuda:0')
episode: 299 training return: tensor(311.6752, device='cuda:0')
epoch: 75 test_true_pfm: 5544.489550494346 sim_pfm: 496.8544820980169
episode: 300 training return: tensor(495.8481, device='cuda:0')
episode: 301 training return: tensor(548.9114, device='cuda:0')
episode: 302 training return: tensor(441.3069, device='cuda:0')
episode: 303 training return: tensor(520.5173, device='cuda:0')
epoch: 76 test_true_pfm: 5567.966952545882 sim_pfm: 527.761152918858
episode: 304 training return: tensor(566.0864, device='cuda:0')
episode: 305 training return: tensor(443.7727, device='cuda:0')
episode: 306 training return: tensor(414.5461, device='cuda:0')
episode: 307 training return: tensor(457.4352, device='cuda:0')
epoch: 77 test_true_pfm: 5595.452190116393 sim_pfm: 551.5940069304391
episode: 308 training return: tensor(635.8796, device='cuda:0')
episode: 309 training return: tensor(427.2043, device='cuda:0')
episode: 310 training return: tensor(350.5994, device='cuda:0')
episode: 311 training return: tensor(434.2206, device='cuda:0')
epoch: 78 test_true_pfm: 5475.1303219281135 sim_pfm: 596.7169713385714
episode: 312 training return: tensor(421.4001, device='cuda:0')
episode: 313 training return: tensor(524.1976, device='cuda:0')
episode: 314 training return: tensor(413.2927, device='cuda:0')
episode: 315 training return: tensor(324.0753, device='cuda:0')
epoch: 79 test_true_pfm: 5563.275576340679 sim_pfm: 545.9855465370541
episode: 316 training return: tensor(560.0950, device='cuda:0')
episode: 317 training return: tensor(372.0036, device='cuda:0')
episode: 318 training return: tensor(317.0620, device='cuda:0')
episode: 319 training return: tensor(347.9263, device='cuda:0')
epoch: 80 test_true_pfm: 5593.5175795152145 sim_pfm: 600.7716142229425
episode: 320 training return: tensor(494.5500, device='cuda:0')
episode: 321 training return: tensor(585.5949, device='cuda:0')
episode: 322 training return: tensor(562.9792, device='cuda:0')
episode: 323 training return: tensor(535.3920, device='cuda:0')
epoch: 81 test_true_pfm: 5600.777859561508 sim_pfm: 574.658677720523
episode: 324 training return: tensor(461.6747, device='cuda:0')
episode: 325 training return: tensor(492.7767, device='cuda:0')
episode: 326 training return: tensor(527.0527, device='cuda:0')
episode: 327 training return: tensor(424.0184, device='cuda:0')
epoch: 82 test_true_pfm: 5536.383332918485 sim_pfm: 618.7025740268097
episode: 328 training return: tensor(548.8980, device='cuda:0')
episode: 329 training return: tensor(604.6967, device='cuda:0')
episode: 330 training return: tensor(532.0569, device='cuda:0')
episode: 331 training return: tensor(574.2606, device='cuda:0')
epoch: 83 test_true_pfm: 5583.322643875598 sim_pfm: 598.6848983237675
episode: 332 training return: tensor(315.3815, device='cuda:0')
episode: 333 training return: tensor(502.9837, device='cuda:0')
episode: 334 training return: tensor(313.2689, device='cuda:0')
episode: 335 training return: tensor(325.7953, device='cuda:0')
epoch: 84 test_true_pfm: 5574.3010456306465 sim_pfm: 583.6830252819345
episode: 336 training return: tensor(542.6768, device='cuda:0')
episode: 337 training return: tensor(601.8179, device='cuda:0')
episode: 338 training return: tensor(527.1984, device='cuda:0')
episode: 339 training return: tensor(379.9048, device='cuda:0')
epoch: 85 test_true_pfm: 5721.486497380002 sim_pfm: 592.5234477581301
episode: 340 training return: tensor(467.8297, device='cuda:0')
episode: 341 training return: tensor(501.3253, device='cuda:0')
episode: 342 training return: tensor(407.7792, device='cuda:0')
episode: 343 training return: tensor(492.9815, device='cuda:0')
epoch: 86 test_true_pfm: 5550.139865562297 sim_pfm: 584.6009038359238
episode: 344 training return: tensor(414.5289, device='cuda:0')
episode: 345 training return: tensor(491.9579, device='cuda:0')
episode: 346 training return: tensor(497.6210, device='cuda:0')
episode: 347 training return: tensor(444.7901, device='cuda:0')
epoch: 87 test_true_pfm: 5622.849236276714 sim_pfm: 657.4464527169863
episode: 348 training return: tensor(436.4028, device='cuda:0')
episode: 349 training return: tensor(306.9082, device='cuda:0')
episode: 350 training return: tensor(484.3292, device='cuda:0')
episode: 351 training return: tensor(404.4819, device='cuda:0')
epoch: 88 test_true_pfm: 5591.436949628055 sim_pfm: 556.6163276493511
episode: 352 training return: tensor(405.2334, device='cuda:0')
episode: 353 training return: tensor(159.9161, device='cuda:0')
episode: 354 training return: tensor(522.6476, device='cuda:0')
episode: 355 training return: tensor(513.0435, device='cuda:0')
epoch: 89 test_true_pfm: 5588.468876937158 sim_pfm: 447.9626823569415
episode: 356 training return: tensor(528.8785, device='cuda:0')
episode: 357 training return: tensor(418.7542, device='cuda:0')
episode: 358 training return: tensor(374.1885, device='cuda:0')
episode: 359 training return: tensor(417.3620, device='cuda:0')
epoch: 90 test_true_pfm: 5636.825691158308 sim_pfm: 445.0424804565652
episode: 360 training return: tensor(368.1235, device='cuda:0')
episode: 361 training return: tensor(575.4247, device='cuda:0')
episode: 362 training return: tensor(306.2012, device='cuda:0')
episode: 363 training return: tensor(625.3753, device='cuda:0')
epoch: 91 test_true_pfm: 5599.46296272894 sim_pfm: 617.0839976964247
episode: 364 training return: tensor(397.4064, device='cuda:0')
episode: 365 training return: tensor(487.6807, device='cuda:0')
episode: 366 training return: tensor(385.3130, device='cuda:0')
episode: 367 training return: tensor(533.2856, device='cuda:0')
epoch: 92 test_true_pfm: 5595.902019868828 sim_pfm: 558.8520352246705
episode: 368 training return: tensor(272.8623, device='cuda:0')
episode: 369 training return: tensor(457.8992, device='cuda:0')
episode: 370 training return: tensor(490.2247, device='cuda:0')
episode: 371 training return: tensor(482.9934, device='cuda:0')
epoch: 93 test_true_pfm: 5595.748233625313 sim_pfm: 539.2820779293155
episode: 372 training return: tensor(506.8995, device='cuda:0')
episode: 373 training return: tensor(387.8162, device='cuda:0')
episode: 374 training return: tensor(503.9028, device='cuda:0')
episode: 375 training return: tensor(475.4550, device='cuda:0')
epoch: 94 test_true_pfm: 5621.979212434464 sim_pfm: 661.1146305773873
episode: 376 training return: tensor(486.3392, device='cuda:0')
episode: 377 training return: tensor(436.5837, device='cuda:0')
episode: 378 training return: tensor(544.7260, device='cuda:0')
episode: 379 training return: tensor(395.0320, device='cuda:0')
epoch: 95 test_true_pfm: 5629.547365609735 sim_pfm: 622.245332786891
episode: 380 training return: tensor(418.3489, device='cuda:0')
episode: 381 training return: tensor(449.6434, device='cuda:0')
episode: 382 training return: tensor(501.1086, device='cuda:0')
episode: 383 training return: tensor(349.4485, device='cuda:0')
epoch: 96 test_true_pfm: 5599.919976217181 sim_pfm: 662.7212574511359
episode: 384 training return: tensor(335.5029, device='cuda:0')
episode: 385 training return: tensor(340.3254, device='cuda:0')
episode: 386 training return: tensor(517.8569, device='cuda:0')
episode: 387 training return: tensor(502.1055, device='cuda:0')
epoch: 97 test_true_pfm: 5550.764312829623 sim_pfm: 542.8716838095182
episode: 388 training return: tensor(447.8976, device='cuda:0')
episode: 389 training return: tensor(427.6797, device='cuda:0')
episode: 390 training return: tensor(456.8042, device='cuda:0')
episode: 391 training return: tensor(551.7513, device='cuda:0')
epoch: 98 test_true_pfm: 5582.455742548486 sim_pfm: 682.3838654519059
episode: 392 training return: tensor(234.5089, device='cuda:0')
episode: 393 training return: tensor(542.7184, device='cuda:0')
episode: 394 training return: tensor(483.7429, device='cuda:0')
episode: 395 training return: tensor(472.2154, device='cuda:0')
epoch: 99 test_true_pfm: 5686.372874771616 sim_pfm: 603.5565834254181
episode: 396 training return: tensor(467.9819, device='cuda:0')
episode: 397 training return: tensor(523.0641, device='cuda:0')
episode: 398 training return: tensor(233.4529, device='cuda:0')
episode: 399 training return: tensor(388.4659, device='cuda:0')
epoch: 100 test_true_pfm: 5594.332806819603 sim_pfm: 621.0516736177573
episode: 400 training return: tensor(462.3722, device='cuda:0')
episode: 401 training return: tensor(409.7823, device='cuda:0')
episode: 402 training return: tensor(401.9518, device='cuda:0')
episode: 403 training return: tensor(494.8693, device='cuda:0')
epoch: 101 test_true_pfm: 5675.137172091859 sim_pfm: 568.4522520692553
episode: 404 training return: tensor(513.9900, device='cuda:0')
episode: 405 training return: tensor(439.8452, device='cuda:0')
episode: 406 training return: tensor(425.7022, device='cuda:0')
episode: 407 training return: tensor(368.3674, device='cuda:0')
epoch: 102 test_true_pfm: 5603.881534797129 sim_pfm: 634.0531030249161
episode: 408 training return: tensor(458.4597, device='cuda:0')
episode: 409 training return: tensor(339.3549, device='cuda:0')
episode: 410 training return: tensor(325.1080, device='cuda:0')
episode: 411 training return: tensor(384.3410, device='cuda:0')
epoch: 103 test_true_pfm: 5591.747298951907 sim_pfm: 560.6535468957542
episode: 412 training return: tensor(407.9101, device='cuda:0')
episode: 413 training return: tensor(468.3704, device='cuda:0')
episode: 414 training return: tensor(536.3170, device='cuda:0')
episode: 415 training return: tensor(374.6819, device='cuda:0')
epoch: 104 test_true_pfm: 5578.125217978491 sim_pfm: 605.9000707379697
episode: 416 training return: tensor(447.0328, device='cuda:0')
episode: 417 training return: tensor(535.8966, device='cuda:0')
episode: 418 training return: tensor(472.9925, device='cuda:0')
episode: 419 training return: tensor(455.5402, device='cuda:0')
epoch: 105 test_true_pfm: 5635.449155805975 sim_pfm: 592.2173782961327
episode: 420 training return: tensor(511.5094, device='cuda:0')
episode: 421 training return: tensor(379.5543, device='cuda:0')
episode: 422 training return: tensor(560.3613, device='cuda:0')
episode: 423 training return: tensor(298.8339, device='cuda:0')
epoch: 106 test_true_pfm: 5568.5018641872 sim_pfm: 595.5613746320402
episode: 424 training return: tensor(474.0702, device='cuda:0')
episode: 425 training return: tensor(347.1815, device='cuda:0')
episode: 426 training return: tensor(317.1305, device='cuda:0')
episode: 427 training return: tensor(527.6431, device='cuda:0')
epoch: 107 test_true_pfm: 5701.04710537475 sim_pfm: 587.482282802191
episode: 428 training return: tensor(554.7523, device='cuda:0')
episode: 429 training return: tensor(428.9048, device='cuda:0')
episode: 430 training return: tensor(604.7310, device='cuda:0')
episode: 431 training return: tensor(483.9890, device='cuda:0')
epoch: 108 test_true_pfm: 5563.769509066016 sim_pfm: 677.4468938057931
episode: 432 training return: tensor(532.7416, device='cuda:0')
episode: 433 training return: tensor(405.7428, device='cuda:0')
episode: 434 training return: tensor(472.0822, device='cuda:0')
episode: 435 training return: tensor(544.2305, device='cuda:0')
epoch: 109 test_true_pfm: 5622.762916669712 sim_pfm: 550.869891768399
episode: 436 training return: tensor(463.9256, device='cuda:0')
episode: 437 training return: tensor(448.3767, device='cuda:0')
episode: 438 training return: tensor(567.9489, device='cuda:0')
episode: 439 training return: tensor(456.1201, device='cuda:0')
epoch: 110 test_true_pfm: 5522.588521253027 sim_pfm: 629.5867116290416
episode: 440 training return: tensor(527.7202, device='cuda:0')
episode: 441 training return: tensor(428.9135, device='cuda:0')
episode: 442 training return: tensor(471.4608, device='cuda:0')
episode: 443 training return: tensor(380.3549, device='cuda:0')
epoch: 111 test_true_pfm: 5635.10840750049 sim_pfm: 522.7525285534017
episode: 444 training return: tensor(314.4800, device='cuda:0')
episode: 445 training return: tensor(623.1513, device='cuda:0')
episode: 446 training return: tensor(511.4202, device='cuda:0')
episode: 447 training return: tensor(470.7242, device='cuda:0')
epoch: 112 test_true_pfm: 5563.268962786762 sim_pfm: 621.8596776456883
episode: 448 training return: tensor(309.7854, device='cuda:0')
episode: 449 training return: tensor(392.3689, device='cuda:0')
episode: 450 training return: tensor(434.8490, device='cuda:0')
episode: 451 training return: tensor(414.0316, device='cuda:0')
epoch: 113 test_true_pfm: 5592.491660616662 sim_pfm: 582.4406128735282
episode: 452 training return: tensor(377.1653, device='cuda:0')
episode: 453 training return: tensor(599.6406, device='cuda:0')
episode: 454 training return: tensor(603.8025, device='cuda:0')
episode: 455 training return: tensor(450.8876, device='cuda:0')
epoch: 114 test_true_pfm: 5631.629539370896 sim_pfm: 580.8203651161554
episode: 456 training return: tensor(445.2273, device='cuda:0')
episode: 457 training return: tensor(327.2697, device='cuda:0')
episode: 458 training return: tensor(347.8658, device='cuda:0')
episode: 459 training return: tensor(418.5269, device='cuda:0')
epoch: 115 test_true_pfm: 5597.598293968629 sim_pfm: 602.3380619400801
episode: 460 training return: tensor(396.3058, device='cuda:0')
episode: 461 training return: tensor(639.5371, device='cuda:0')
episode: 462 training return: tensor(552.5006, device='cuda:0')
episode: 463 training return: tensor(456.8049, device='cuda:0')
epoch: 116 test_true_pfm: 5592.017329609475 sim_pfm: 631.4535576712806
episode: 464 training return: tensor(597.3571, device='cuda:0')
episode: 465 training return: tensor(535.8980, device='cuda:0')
episode: 466 training return: tensor(564.3062, device='cuda:0')
episode: 467 training return: tensor(452.7722, device='cuda:0')
epoch: 117 test_true_pfm: 5619.176866838688 sim_pfm: 607.7430813581062
episode: 468 training return: tensor(330.5876, device='cuda:0')
episode: 469 training return: tensor(307.3290, device='cuda:0')
episode: 470 training return: tensor(516.7343, device='cuda:0')
episode: 471 training return: tensor(658.1177, device='cuda:0')
epoch: 118 test_true_pfm: 5617.33157352808 sim_pfm: 533.6249523715427
episode: 472 training return: tensor(342.3446, device='cuda:0')
episode: 473 training return: tensor(570.3235, device='cuda:0')
episode: 474 training return: tensor(579.3523, device='cuda:0')
episode: 475 training return: tensor(441.3700, device='cuda:0')
epoch: 119 test_true_pfm: 5721.214753618265 sim_pfm: 491.75500310449087
episode: 476 training return: tensor(528.2235, device='cuda:0')
episode: 477 training return: tensor(467.7198, device='cuda:0')
episode: 478 training return: tensor(407.8542, device='cuda:0')
episode: 479 training return: tensor(547.1432, device='cuda:0')
epoch: 120 test_true_pfm: 5710.583873295964 sim_pfm: 616.8263364216546
episode: 480 training return: tensor(619.0037, device='cuda:0')
episode: 481 training return: tensor(520.8188, device='cuda:0')
episode: 482 training return: tensor(533.2298, device='cuda:0')
episode: 483 training return: tensor(404.5255, device='cuda:0')
epoch: 121 test_true_pfm: 5604.10318779619 sim_pfm: 628.6150677872356
episode: 484 training return: tensor(453.9483, device='cuda:0')
episode: 485 training return: tensor(587.7417, device='cuda:0')
episode: 486 training return: tensor(391.4610, device='cuda:0')
episode: 487 training return: tensor(601.5867, device='cuda:0')
epoch: 122 test_true_pfm: 5679.126884887891 sim_pfm: 635.742214320538
episode: 488 training return: tensor(403.3690, device='cuda:0')
episode: 489 training return: tensor(487.4805, device='cuda:0')
episode: 490 training return: tensor(530.1997, device='cuda:0')
episode: 491 training return: tensor(457.5889, device='cuda:0')
epoch: 123 test_true_pfm: 5658.0931310262085 sim_pfm: 640.9624199944859
episode: 492 training return: tensor(417.2285, device='cuda:0')
episode: 493 training return: tensor(550.7283, device='cuda:0')
episode: 494 training return: tensor(409.8680, device='cuda:0')
episode: 495 training return: tensor(525.2133, device='cuda:0')
epoch: 124 test_true_pfm: 5622.3015582163025 sim_pfm: 647.5314579736829
episode: 496 training return: tensor(628.5902, device='cuda:0')
episode: 497 training return: tensor(496.1702, device='cuda:0')
episode: 498 training return: tensor(532.7165, device='cuda:0')
episode: 499 training return: tensor(195.3234, device='cuda:0')
epoch: 125 test_true_pfm: 5606.717323425187 sim_pfm: 685.1375981460636
episode: 500 training return: tensor(492.6156, device='cuda:0')
episode: 501 training return: tensor(558.9917, device='cuda:0')
episode: 502 training return: tensor(538.8898, device='cuda:0')
episode: 503 training return: tensor(464.1278, device='cuda:0')
epoch: 126 test_true_pfm: 5615.816513168914 sim_pfm: 623.4541828092964
episode: 504 training return: tensor(394.1325, device='cuda:0')
episode: 505 training return: tensor(476.5976, device='cuda:0')
episode: 506 training return: tensor(405.2408, device='cuda:0')
episode: 507 training return: tensor(556.3910, device='cuda:0')
epoch: 127 test_true_pfm: 5648.830428263213 sim_pfm: 629.5805488873157
episode: 508 training return: tensor(497.7690, device='cuda:0')
episode: 509 training return: tensor(394.0280, device='cuda:0')
episode: 510 training return: tensor(513.8458, device='cuda:0')
episode: 511 training return: tensor(434.8713, device='cuda:0')
epoch: 128 test_true_pfm: 5659.9528155177895 sim_pfm: 563.9421679508523
episode: 512 training return: tensor(406.9142, device='cuda:0')
episode: 513 training return: tensor(299.5279, device='cuda:0')
episode: 514 training return: tensor(249.1694, device='cuda:0')
episode: 515 training return: tensor(284.1755, device='cuda:0')
epoch: 129 test_true_pfm: 5685.504323957084 sim_pfm: 532.6448268025027
episode: 516 training return: tensor(382.1839, device='cuda:0')
episode: 517 training return: tensor(390.6412, device='cuda:0')
episode: 518 training return: tensor(538.8133, device='cuda:0')
episode: 519 training return: tensor(568.9038, device='cuda:0')
epoch: 130 test_true_pfm: 5664.970003512234 sim_pfm: 625.7461979313133
episode: 520 training return: tensor(532.4144, device='cuda:0')
episode: 521 training return: tensor(563.2162, device='cuda:0')
episode: 522 training return: tensor(493.3751, device='cuda:0')
episode: 523 training return: tensor(512.0237, device='cuda:0')
epoch: 131 test_true_pfm: 5647.8557856640255 sim_pfm: 562.2252619722858
episode: 524 training return: tensor(280.0859, device='cuda:0')
episode: 525 training return: tensor(520.4454, device='cuda:0')
episode: 526 training return: tensor(562.7654, device='cuda:0')
episode: 527 training return: tensor(624.4350, device='cuda:0')
epoch: 132 test_true_pfm: 5663.571191308759 sim_pfm: 625.7514570911104
episode: 528 training return: tensor(396.1944, device='cuda:0')
episode: 529 training return: tensor(474.3171, device='cuda:0')
episode: 530 training return: tensor(589.9327, device='cuda:0')
episode: 531 training return: tensor(555.0743, device='cuda:0')
epoch: 133 test_true_pfm: 5645.528105693014 sim_pfm: 611.7787946822646
episode: 532 training return: tensor(439.3100, device='cuda:0')
episode: 533 training return: tensor(524.8359, device='cuda:0')
episode: 534 training return: tensor(576.5861, device='cuda:0')
episode: 535 training return: tensor(706.7032, device='cuda:0')
epoch: 134 test_true_pfm: 5646.700018451144 sim_pfm: 600.7761156488559
episode: 536 training return: tensor(480.4897, device='cuda:0')
episode: 537 training return: tensor(648.1448, device='cuda:0')
episode: 538 training return: tensor(347.7798, device='cuda:0')
episode: 539 training return: tensor(581.7009, device='cuda:0')
epoch: 135 test_true_pfm: 5727.5958473425335 sim_pfm: 606.5315776232941
episode: 540 training return: tensor(515.7307, device='cuda:0')
episode: 541 training return: tensor(330.6043, device='cuda:0')
episode: 542 training return: tensor(417.3510, device='cuda:0')
episode: 543 training return: tensor(615.6712, device='cuda:0')
epoch: 136 test_true_pfm: 5669.796717543646 sim_pfm: 575.8488946681222
episode: 544 training return: tensor(579.5134, device='cuda:0')
episode: 545 training return: tensor(574.3984, device='cuda:0')
episode: 546 training return: tensor(483.1410, device='cuda:0')
episode: 547 training return: tensor(650.5449, device='cuda:0')
epoch: 137 test_true_pfm: 5637.44879353174 sim_pfm: 638.6845400634533
episode: 548 training return: tensor(586.5804, device='cuda:0')
episode: 549 training return: tensor(550.7263, device='cuda:0')
episode: 550 training return: tensor(572.1488, device='cuda:0')
episode: 551 training return: tensor(455.6073, device='cuda:0')
epoch: 138 test_true_pfm: 5650.215656867156 sim_pfm: 676.9673607742103
episode: 552 training return: tensor(531.1405, device='cuda:0')
episode: 553 training return: tensor(559.2491, device='cuda:0')
episode: 554 training return: tensor(637.0978, device='cuda:0')
episode: 555 training return: tensor(470.5934, device='cuda:0')
epoch: 139 test_true_pfm: 5722.736903204706 sim_pfm: 685.8351484568944
episode: 556 training return: tensor(632.1218, device='cuda:0')
episode: 557 training return: tensor(552.8962, device='cuda:0')
episode: 558 training return: tensor(457.2725, device='cuda:0')
episode: 559 training return: tensor(508.4671, device='cuda:0')
epoch: 140 test_true_pfm: 5710.0589906905125 sim_pfm: 680.2867432263447
episode: 560 training return: tensor(507.0609, device='cuda:0')
episode: 561 training return: tensor(529.8964, device='cuda:0')
episode: 562 training return: tensor(421.0004, device='cuda:0')
episode: 563 training return: tensor(497.4979, device='cuda:0')
epoch: 141 test_true_pfm: 5680.949510463558 sim_pfm: 633.6058279156374
episode: 564 training return: tensor(499.5885, device='cuda:0')
episode: 565 training return: tensor(605.0636, device='cuda:0')
episode: 566 training return: tensor(442.9591, device='cuda:0')
episode: 567 training return: tensor(542.3173, device='cuda:0')
epoch: 142 test_true_pfm: 5754.2139844404255 sim_pfm: 641.8668720719094
episode: 568 training return: tensor(448.8220, device='cuda:0')
episode: 569 training return: tensor(658.4417, device='cuda:0')
episode: 570 training return: tensor(579.8351, device='cuda:0')
episode: 571 training return: tensor(565.4649, device='cuda:0')
epoch: 143 test_true_pfm: 5617.771386439291 sim_pfm: 638.1276981834866
episode: 572 training return: tensor(589.2960, device='cuda:0')
episode: 573 training return: tensor(493.3704, device='cuda:0')
episode: 574 training return: tensor(503.1943, device='cuda:0')
episode: 575 training return: tensor(268.1812, device='cuda:0')
epoch: 144 test_true_pfm: 5689.912040131804 sim_pfm: 674.3631762679046
episode: 576 training return: tensor(550.1575, device='cuda:0')
episode: 577 training return: tensor(674.8995, device='cuda:0')
episode: 578 training return: tensor(568.6104, device='cuda:0')
episode: 579 training return: tensor(528.3187, device='cuda:0')
epoch: 145 test_true_pfm: 5728.323354144322 sim_pfm: 633.9219784919018
episode: 580 training return: tensor(658.7985, device='cuda:0')
episode: 581 training return: tensor(489.8013, device='cuda:0')
episode: 582 training return: tensor(468.8164, device='cuda:0')
episode: 583 training return: tensor(500.9174, device='cuda:0')
epoch: 146 test_true_pfm: 5610.813139256293 sim_pfm: 684.9779687354652
episode: 584 training return: tensor(422.3711, device='cuda:0')
episode: 585 training return: tensor(624.1680, device='cuda:0')
episode: 586 training return: tensor(578.7618, device='cuda:0')
episode: 587 training return: tensor(589.0519, device='cuda:0')
epoch: 147 test_true_pfm: 5718.237590614105 sim_pfm: 623.3824905424844
episode: 588 training return: tensor(614.3465, device='cuda:0')
episode: 589 training return: tensor(467.3664, device='cuda:0')
episode: 590 training return: tensor(443.1125, device='cuda:0')
episode: 591 training return: tensor(598.8970, device='cuda:0')
epoch: 148 test_true_pfm: 5606.528308946009 sim_pfm: 705.1035836696004
episode: 592 training return: tensor(571.9688, device='cuda:0')
episode: 593 training return: tensor(333.2783, device='cuda:0')
episode: 594 training return: tensor(515.6639, device='cuda:0')
episode: 595 training return: tensor(393.3993, device='cuda:0')
epoch: 149 test_true_pfm: 5623.06308545017 sim_pfm: 700.5839771072691
episode: 596 training return: tensor(497.6238, device='cuda:0')
episode: 597 training return: tensor(520.9163, device='cuda:0')
episode: 598 training return: tensor(392.9660, device='cuda:0')
episode: 599 training return: tensor(519.6341, device='cuda:0')
epoch: 150 test_true_pfm: 5705.865671178802 sim_pfm: 650.2113600840481
