['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '1', '--data', '3000']
epoch: 0 training_loss 0.2445554867386818 test_loss: 0.11033673286437988
epoch: 1 training_loss 0.2049065862596035 test_loss: 0.10109364986419678
epoch: 2 training_loss 0.2004636210203171 test_loss: 0.0973948895931244
epoch: 3 training_loss 0.1940630178898573 test_loss: 0.10111781358718872
epoch: 4 training_loss 0.18644057966768743 test_loss: 0.09110140204429626
epoch: 5 training_loss 0.18784316517412664 test_loss: 0.09263145327568054
epoch: 6 training_loss 0.1876933191716671 test_loss: 0.0963060736656189
epoch: 7 training_loss 0.19285154901444912 test_loss: 0.09184688925743104
epoch: 8 training_loss 0.17215433411300182 test_loss: 0.0935074508190155
epoch: 9 training_loss 0.17967052824795246 test_loss: 0.09384433031082154
epoch: 10 training_loss 0.17602245070040226 test_loss: 0.08953993320465088
epoch: 11 training_loss 0.17848195284605026 test_loss: 0.09403710961341857
epoch: 12 training_loss 0.182231378108263 test_loss: 0.09974347352981568
epoch: 13 training_loss 0.17833814106881618 test_loss: 0.09274386763572692
epoch: 14 training_loss 0.17880956456065178 test_loss: 0.09168457984924316
epoch: 15 training_loss 0.17518812380731105 test_loss: 0.09285152554512024
epoch: 16 training_loss 0.17830116391181947 test_loss: 0.09146894216537475
epoch: 17 training_loss 0.17127199247479438 test_loss: 0.09941480755805969
epoch: 18 training_loss 0.17281392440199853 test_loss: 0.09516187906265258
epoch: 19 training_loss 0.18085953772068022 test_loss: 0.09098146557807922
epoch: 20 training_loss 0.1767126753181219 test_loss: 0.09013584852218628
epoch: 21 training_loss 0.17072680056095124 test_loss: 0.08967972993850708
epoch: 22 training_loss 0.1673463873565197 test_loss: 0.0905378520488739
epoch: 23 training_loss 0.1816115441918373 test_loss: 0.09099138975143432
epoch: 24 training_loss 0.181695593893528 test_loss: 0.09566596150398254
epoch: 25 training_loss 0.17581353433430194 test_loss: 0.08737608790397644
epoch: 26 training_loss 0.17409821458160876 test_loss: 0.09775627851486206
epoch: 27 training_loss 0.16850040435791017 test_loss: 0.09920608997344971
epoch: 28 training_loss 0.17759650737047195 test_loss: 0.09710737466812133
epoch: 29 training_loss 0.17959812238812448 test_loss: 0.09257674217224121
epoch: 30 training_loss 0.17266041040420532 test_loss: 0.09034297466278077
epoch: 31 training_loss 0.16155179996043445 test_loss: 0.09939444065093994
epoch: 32 training_loss 0.17216545201838016 test_loss: 0.09378893375396728
epoch: 33 training_loss 0.16923307310789823 test_loss: 0.09188866019248962
epoch: 34 training_loss 0.172233093008399 test_loss: 0.08818706274032592
epoch: 35 training_loss 0.16925086349248886 test_loss: 0.0880927324295044
epoch: 36 training_loss 0.1682379975169897 test_loss: 0.09331626892089843
epoch: 37 training_loss 0.1741994635760784 test_loss: 0.09348503351211548
epoch: 38 training_loss 0.16292299062013627 test_loss: 0.10048322677612305
epoch: 39 training_loss 0.16484660364687442 test_loss: 0.0902867078781128
epoch: 40 training_loss 0.16468898460268974 test_loss: 0.09144234657287598
epoch: 41 training_loss 0.16918163307011128 test_loss: 0.0895090401172638
epoch: 42 training_loss 0.15887348003685475 test_loss: 0.09491074681282044
epoch: 43 training_loss 0.15931375838816167 test_loss: 0.09891154170036316
epoch: 44 training_loss 0.16358222618699073 test_loss: 0.09783181548118591
epoch: 45 training_loss 0.16281518563628197 test_loss: 0.09910155534744262
epoch: 46 training_loss 0.15555032305419444 test_loss: 0.09836132526397705
epoch: 47 training_loss 0.16622897945344448 test_loss: 0.09342609047889709
epoch: 48 training_loss 0.15429375234991313 test_loss: 0.10256718397140503
epoch: 49 training_loss 0.160490300655365 test_loss: 0.09923889636993408
epoch: 50 training_loss 0.15206013455986978 test_loss: 0.09573442339897156
epoch: 51 training_loss 0.15585083268582822 test_loss: 0.09093101620674134
epoch: 52 training_loss 0.16237213015556334 test_loss: 0.09576407670974732
epoch: 53 training_loss 0.1542249721288681 test_loss: 0.09968482851982116
epoch: 54 training_loss 0.1578821225464344 test_loss: 0.1022125244140625
epoch: 55 training_loss 0.1560053376853466 test_loss: 0.09017453789710998
epoch: 56 training_loss 0.15864719275385142 test_loss: 0.09088882803916931
epoch: 57 training_loss 0.15591879062354563 test_loss: 0.09434483647346496
epoch: 58 training_loss 0.14405155092477798 test_loss: 0.09556061029434204
epoch: 59 training_loss 0.15266017615795135 test_loss: 0.09536595344543457
epoch: 60 training_loss 0.15596088007092476 test_loss: 0.1054162859916687
epoch: 61 training_loss 0.1455483215302229 test_loss: 0.10028198957443238
epoch: 62 training_loss 0.14919280350208283 test_loss: 0.10071133375167847
epoch: 63 training_loss 0.14334036260843278 test_loss: 0.09757692813873291
epoch: 64 training_loss 0.15044851943850518 test_loss: 0.09985946416854859
epoch: 65 training_loss 0.1468174722790718 test_loss: 0.11069897413253785
epoch: 66 training_loss 0.15205278605222702 test_loss: 0.09983275532722473
epoch: 67 training_loss 0.14147747099399566 test_loss: 0.13047993183135986
epoch: 68 training_loss 0.14172713682055474 test_loss: 0.09783797860145568
epoch: 69 training_loss 0.1384939519315958 test_loss: 0.10256670713424683
epoch: 70 training_loss 0.14318724915385247 test_loss: 0.10248619318008423
epoch: 71 training_loss 0.13305510122328998 test_loss: 0.10843629837036133
epoch: 72 training_loss 0.13758266672492028 test_loss: 0.10307873487472534
epoch: 73 training_loss 0.13681390281766653 test_loss: 0.10257579088211059
epoch: 74 training_loss 0.13226198099553585 test_loss: 0.11390122175216674
epoch: 75 training_loss 0.12622345998883247 test_loss: 0.09719712734222412
epoch: 76 training_loss 0.12589547000825405 test_loss: 0.10555449724197388
epoch: 77 training_loss 0.1277030251920223 test_loss: 0.11269444227218628
epoch: 78 training_loss 0.12715561635792255 test_loss: 0.12190064191818237
epoch: 79 training_loss 0.1294282615184784 test_loss: 0.10099157094955444
epoch: 80 training_loss 0.12830966860055923 test_loss: 0.1005547285079956
epoch: 81 training_loss 0.12348609518259764 test_loss: 0.11464285850524902
epoch: 82 training_loss 0.12393910311162472 test_loss: 0.11477051973342896
epoch: 83 training_loss 0.12422715436667203 test_loss: 0.11904256343841553
epoch: 84 training_loss 0.11880188055336476 test_loss: 0.12321882247924805
epoch: 85 training_loss 0.12379238113760949 test_loss: 0.13344062566757203
epoch: 86 training_loss 0.11950241796672344 test_loss: 0.11705368757247925
epoch: 87 training_loss 0.11755861788988113 test_loss: 0.11498823165893554
epoch: 88 training_loss 0.12253123920410872 test_loss: 0.12774064540863037
epoch: 89 training_loss 0.11867728915065527 test_loss: 0.11660773754119873
epoch: 90 training_loss 0.1100170474499464 test_loss: 0.1252368688583374
epoch: 91 training_loss 0.11002188920974731 test_loss: 0.12249590158462524
epoch: 92 training_loss 0.11026271644979715 test_loss: 0.11706643104553223
epoch: 93 training_loss 0.1096280312165618 test_loss: 0.12182283401489258
epoch: 94 training_loss 0.10597514800727367 test_loss: 0.13101160526275635
epoch: 95 training_loss 0.0981935134716332 test_loss: 0.12239677906036377
epoch: 96 training_loss 0.1046406153962016 test_loss: 0.14049286842346193
epoch: 97 training_loss 0.10092549480497837 test_loss: 0.11717548370361328
epoch: 98 training_loss 0.10117885466665029 test_loss: 0.13540704250335694
epoch: 99 training_loss 0.09808804709464311 test_loss: 0.14919891357421874
epoch: 100 training_loss 0.0953317236341536 test_loss: 0.11834471225738526
epoch: 101 training_loss 0.09770675670355558 test_loss: 0.12362492084503174
epoch: 102 training_loss 0.0951656173542142 test_loss: 0.13528127670288087
epoch: 103 training_loss 0.09423212252557278 test_loss: 0.13256847858428955
epoch: 104 training_loss 0.08383797399699688 test_loss: 0.12968132495880128
epoch: 105 training_loss 0.08836166746914387 test_loss: 0.13542307615280152
epoch: 106 training_loss 0.08344488084316254 test_loss: 0.14918755292892455
epoch: 107 training_loss 0.09278776209801436 test_loss: 0.13046449422836304
epoch: 108 training_loss 0.08542056215927005 test_loss: 0.14718594551086425
epoch: 109 training_loss 0.09586712636053563 test_loss: 0.1541868805885315
epoch: 110 training_loss 0.08987908229231835 test_loss: 0.1373443126678467
epoch: 111 training_loss 0.08660557635128498 test_loss: 0.1493613600730896
epoch: 112 training_loss 0.08337074838578701 test_loss: 0.13541165590286255
epoch: 113 training_loss 0.08227374363690615 test_loss: 0.1337241291999817
epoch: 114 training_loss 0.0897326442040503 test_loss: 0.1249852180480957
epoch: 115 training_loss 0.07940376760438085 test_loss: 0.15416373014450074
epoch: 116 training_loss 0.07586398098617791 test_loss: 0.13445661067962647
epoch: 117 training_loss 0.07026465760543943 test_loss: 0.13321292400360107
epoch: 118 training_loss 0.07571957975625992 test_loss: 0.13915615081787108
epoch: 119 training_loss 0.07592396473977715 test_loss: 0.14268906116485597
epoch: 120 training_loss 0.0799422481097281 test_loss: 0.14532619714736938
epoch: 121 training_loss 0.07614127587527036 test_loss: 0.14155017137527465
epoch: 122 training_loss 0.07070935018360615 test_loss: 0.1455491900444031
epoch: 123 training_loss 0.06815457001328468 test_loss: 0.17252819538116454
epoch: 124 training_loss 0.06905083561316133 test_loss: 0.14061011075973512
epoch: 125 training_loss 0.07213824773207307 test_loss: 0.17923187017440795
epoch: 126 training_loss 0.07411095846444368 test_loss: 0.15637747049331666
epoch: 127 training_loss 0.06347017455846071 test_loss: 0.14713594913482667
epoch: 128 training_loss 0.07116150431334972 test_loss: 0.13175814151763915
epoch: 129 training_loss 0.06584759062156081 test_loss: 0.15138064622879027
epoch: 130 training_loss 0.06565870948135853 test_loss: 0.15916719436645507
epoch: 131 training_loss 0.07012496877461671 test_loss: 0.18247004747390747
epoch: 132 training_loss 0.06599693410098553 test_loss: 0.15248658657073974
epoch: 133 training_loss 0.05991545651108027 test_loss: 0.1775890588760376
epoch: 134 training_loss 0.06789853250607848 test_loss: 0.1559454083442688
epoch: 135 training_loss 0.058248572424054144 test_loss: 0.15034348964691163
epoch: 136 training_loss 0.05669290168210864 test_loss: 0.1962246060371399
epoch: 137 training_loss 0.054571562614291906 test_loss: 0.17644327878952026
epoch: 138 training_loss 0.056830411721020935 test_loss: 0.16381250619888305
epoch: 139 training_loss 0.05472538526169956 test_loss: 0.193274188041687
epoch: 140 training_loss 0.058800092469900846 test_loss: 0.16658668518066405
epoch: 141 training_loss 0.055945355650037526 test_loss: 0.20094513893127441
epoch: 142 training_loss 0.06412668215110898 test_loss: 0.19527634382247924
epoch: 143 training_loss 0.06042009625583887 test_loss: 0.18252196311950683
epoch: 144 training_loss 0.056639723144471646 test_loss: 0.1813984513282776
epoch: 145 training_loss 0.04740703230723739 test_loss: 0.18163011074066163
epoch: 146 training_loss 0.04816192683298141 test_loss: 0.1827932596206665
epoch: 147 training_loss 0.045903147840872405 test_loss: 0.16957937479019164
epoch: 148 training_loss 0.04665637624450028 test_loss: 0.17888383865356444
epoch: 149 training_loss 0.04894346443936229 test_loss: 0.181402325630188
epoch: 0 training_loss 8.227053766250611 test_loss: 2.234128952026367
epoch: 1 training_loss 3.7516377425193785 test_loss: 1.4908716201782226
epoch: 2 training_loss 2.7262181544303896 test_loss: 1.1161203384399414
epoch: 3 training_loss 2.1557227873802187 test_loss: 0.9503265380859375
epoch: 4 training_loss 1.8240091335773467 test_loss: 0.8052035331726074
epoch: 5 training_loss 1.6198869931697846 test_loss: 0.7501405239105224
epoch: 6 training_loss 1.4628823471069337 test_loss: 0.6746777057647705
epoch: 7 training_loss 1.3735032987594604 test_loss: 0.6326513290405273
epoch: 8 training_loss 1.3115451502799989 test_loss: 0.6091746330261231
epoch: 9 training_loss 1.228091412782669 test_loss: 0.5613224506378174
epoch: 10 training_loss 1.1695510268211364 test_loss: 0.555273962020874
epoch: 11 training_loss 1.1397625941038132 test_loss: 0.5206475257873535
epoch: 12 training_loss 1.0605703806877136 test_loss: 0.5132305145263671
epoch: 13 training_loss 1.0418442940711976 test_loss: 0.4962970733642578
epoch: 14 training_loss 1.006965892314911 test_loss: 0.48931279182434084
epoch: 15 training_loss 0.9836381131410599 test_loss: 0.4771214485168457
epoch: 16 training_loss 0.9517711049318314 test_loss: 0.45465621948242185
epoch: 17 training_loss 0.9405641931295395 test_loss: 0.4521523952484131
epoch: 18 training_loss 0.9147216540575027 test_loss: 0.45983328819274905
epoch: 19 training_loss 0.8886281204223633 test_loss: 0.436217212677002
epoch: 20 training_loss 0.8741629487276077 test_loss: 0.42300915718078613
epoch: 21 training_loss 0.863154159784317 test_loss: 0.4275677680969238
epoch: 22 training_loss 0.8458057296276092 test_loss: 0.4113770961761475
epoch: 23 training_loss 0.8361039739847184 test_loss: 0.40208754539489744
epoch: 24 training_loss 0.8275357925891876 test_loss: 0.39752461910247805
epoch: 25 training_loss 0.8079666048288345 test_loss: 0.401935338973999
epoch: 26 training_loss 0.7990088641643525 test_loss: 0.39377822875976565
epoch: 27 training_loss 0.784187907576561 test_loss: 0.37917556762695315
epoch: 28 training_loss 0.7847466492652893 test_loss: 0.3905844211578369
epoch: 29 training_loss 0.7603408753871918 test_loss: 0.3832385540008545
epoch: 30 training_loss 0.7660601335763931 test_loss: 0.37214467525482176
epoch: 31 training_loss 0.7376447451114655 test_loss: 0.3731783390045166
epoch: 32 training_loss 0.7465197330713272 test_loss: 0.3747788667678833
epoch: 33 training_loss 0.7403006541728974 test_loss: 0.36174798011779785
epoch: 34 training_loss 0.7194826799631119 test_loss: 0.3659153938293457
epoch: 35 training_loss 0.7157562351226807 test_loss: 0.3551468372344971
epoch: 36 training_loss 0.7015616393089295 test_loss: 0.35318970680236816
epoch: 37 training_loss 0.6990827310085297 test_loss: 0.34005050659179686
epoch: 38 training_loss 0.7045595318078994 test_loss: 0.3406670331954956
epoch: 39 training_loss 0.6848026430606842 test_loss: 0.3452749490737915
epoch: 40 training_loss 0.6882802438735962 test_loss: 0.34426805973052976
epoch: 41 training_loss 0.6970290845632553 test_loss: 0.3366283655166626
epoch: 42 training_loss 0.6695061308145523 test_loss: 0.34705047607421874
epoch: 43 training_loss 0.6679704427719116 test_loss: 0.3304142475128174
epoch: 44 training_loss 0.665751473903656 test_loss: 0.3286912202835083
epoch: 45 training_loss 0.6553956466913223 test_loss: 0.32882328033447267
epoch: 46 training_loss 0.6633144587278366 test_loss: 0.3525305509567261
epoch: 47 training_loss 0.652845978140831 test_loss: 0.3216820955276489
epoch: 48 training_loss 0.6394581520557403 test_loss: 0.33723423480987547
epoch: 49 training_loss 0.6573876172304154 test_loss: 0.3234928369522095
epoch: 50 training_loss 0.6409490889310837 test_loss: 0.32660617828369143
epoch: 51 training_loss 0.6386005204916 test_loss: 0.3397088527679443
epoch: 52 training_loss 0.634762561917305 test_loss: 0.31305718421936035
epoch: 53 training_loss 0.6325098508596421 test_loss: 0.3090892553329468
epoch: 54 training_loss 0.6281418603658676 test_loss: 0.31129591464996337
epoch: 55 training_loss 0.6238109982013702 test_loss: 0.3132759094238281
epoch: 56 training_loss 0.6316446346044541 test_loss: 0.34299707412719727
epoch: 57 training_loss 0.6218655782938004 test_loss: 0.302860951423645
epoch: 58 training_loss 0.6334595507383347 test_loss: 0.3179203271865845
epoch: 59 training_loss 0.6078047573566436 test_loss: 0.30328707695007323
epoch: 60 training_loss 0.6083908408880234 test_loss: 0.31210224628448485
epoch: 61 training_loss 0.5986459845304489 test_loss: 0.31249520778656004
epoch: 62 training_loss 0.6015914034843445 test_loss: 0.30233967304229736
epoch: 63 training_loss 0.6026978307962417 test_loss: 0.3066754102706909
epoch: 64 training_loss 0.5952402889728546 test_loss: 0.2996471881866455
epoch: 65 training_loss 0.595067029595375 test_loss: 0.3230299472808838
epoch: 66 training_loss 0.5900377607345582 test_loss: 0.28895912170410154
epoch: 67 training_loss 0.5915735900402069 test_loss: 0.30122787952423097
epoch: 68 training_loss 0.5855851274728775 test_loss: 0.2965648651123047
epoch: 69 training_loss 0.5883436286449433 test_loss: 0.29247269630432127
epoch: 70 training_loss 0.5854051458835602 test_loss: 0.28959605693817136
epoch: 71 training_loss 0.5696707788109779 test_loss: 0.2937068700790405
epoch: 72 training_loss 0.5890760764479637 test_loss: 0.30073933601379393
epoch: 73 training_loss 0.5883165752887726 test_loss: 0.2955306529998779
epoch: 74 training_loss 0.5708650985360145 test_loss: 0.284255313873291
epoch: 75 training_loss 0.5800537657737732 test_loss: 0.2902924299240112
epoch: 76 training_loss 0.5668546032905578 test_loss: 0.28517880439758303
epoch: 77 training_loss 0.5733230584859847 test_loss: 0.2823098421096802
epoch: 78 training_loss 0.557520791888237 test_loss: 0.2820161819458008
epoch: 79 training_loss 0.5604862189292907 test_loss: 0.28212902545928953
epoch: 80 training_loss 0.5487609678506851 test_loss: 0.2889585018157959
epoch: 81 training_loss 0.5562103441357613 test_loss: 0.288170313835144
epoch: 82 training_loss 0.5640512472391128 test_loss: 0.27959527969360354
epoch: 83 training_loss 0.5583521503210068 test_loss: 0.282426118850708
epoch: 84 training_loss 0.552427907884121 test_loss: 0.28146677017211913
epoch: 85 training_loss 0.5578080374002456 test_loss: 0.28089678287506104
epoch: 86 training_loss 0.5565188464522361 test_loss: 0.27872047424316404
epoch: 87 training_loss 0.5609665817022323 test_loss: 0.28370254039764403
epoch: 88 training_loss 0.5616815674304962 test_loss: 0.2810086488723755
epoch: 89 training_loss 0.5483109727501869 test_loss: 0.29769537448883054
epoch: 90 training_loss 0.5612892645597458 test_loss: 0.2808219432830811
epoch: 91 training_loss 0.5571840843558311 test_loss: 0.29245040416717527
epoch: 92 training_loss 0.5324232339859009 test_loss: 0.2749032020568848
epoch: 93 training_loss 0.5403700059652329 test_loss: 0.27271769046783445
epoch: 94 training_loss 0.5453032568097115 test_loss: 0.2777003288269043
epoch: 95 training_loss 0.5438949283957482 test_loss: 0.2821654319763184
epoch: 96 training_loss 0.5360852468013764 test_loss: 0.27204983234405516
epoch: 97 training_loss 0.5356251984834671 test_loss: 0.26851179599761965
epoch: 98 training_loss 0.5418771561980248 test_loss: 0.2691298723220825
epoch: 99 training_loss 0.5308627381920814 test_loss: 0.2710207462310791
epoch: 100 training_loss 0.5326179450750351 test_loss: 0.26873025894165037
epoch: 101 training_loss 0.5330169540643692 test_loss: 0.2640143156051636
epoch: 102 training_loss 0.5426544660329818 test_loss: 0.26934645175933836
epoch: 103 training_loss 0.5224260035157203 test_loss: 0.27015981674194334
epoch: 104 training_loss 0.5259194532036782 test_loss: 0.2756753206253052
epoch: 105 training_loss 0.5312268510460854 test_loss: 0.2777530670166016
epoch: 106 training_loss 0.521517943739891 test_loss: 0.2647725582122803
epoch: 107 training_loss 0.5202672725915909 test_loss: 0.26710219383239747
epoch: 108 training_loss 0.5267622947692872 test_loss: 0.27741732597351076
epoch: 109 training_loss 0.5307660537958145 test_loss: 0.2611116886138916
epoch: 110 training_loss 0.5255823844671249 test_loss: 0.26857473850250246
epoch: 111 training_loss 0.517811106145382 test_loss: 0.2664208173751831
epoch: 112 training_loss 0.5199824196100234 test_loss: 0.26335394382476807
epoch: 113 training_loss 0.5116495057940483 test_loss: 0.25706470012664795
epoch: 114 training_loss 0.5224428272247315 test_loss: 0.25601396560668943
epoch: 115 training_loss 0.509877316057682 test_loss: 0.2593077182769775
epoch: 116 training_loss 0.5110607323050499 test_loss: 0.2599998235702515
epoch: 117 training_loss 0.5180088755488396 test_loss: 0.2681313991546631
epoch: 118 training_loss 0.5161134254932404 test_loss: 0.2593491554260254
epoch: 119 training_loss 0.515992458164692 test_loss: 0.2727266073226929
epoch: 120 training_loss 0.5147504118084908 test_loss: 0.25435376167297363
epoch: 121 training_loss 0.5113662847876549 test_loss: 0.2531522035598755
epoch: 122 training_loss 0.5111050620675087 test_loss: 0.26913573741912844
epoch: 123 training_loss 0.5110383406281471 test_loss: 0.2606734991073608
epoch: 124 training_loss 0.5126209843158722 test_loss: 0.26050662994384766
epoch: 125 training_loss 0.5056637850403786 test_loss: 0.2547996759414673
epoch: 126 training_loss 0.5124303504824639 test_loss: 0.25350887775421144
epoch: 127 training_loss 0.5015891966223717 test_loss: 0.25681252479553224
epoch: 128 training_loss 0.502109482884407 test_loss: 0.26755752563476565
epoch: 129 training_loss 0.5152054134011269 test_loss: 0.25784413814544677
epoch: 130 training_loss 0.4945162004232407 test_loss: 0.2605458974838257
epoch: 131 training_loss 0.5067340528964996 test_loss: 0.2677447319030762
epoch: 132 training_loss 0.5045139923691749 test_loss: 0.2627730131149292
epoch: 133 training_loss 0.5062553396821022 test_loss: 0.2525451421737671
epoch: 134 training_loss 0.5036667916178703 test_loss: 0.25217633247375487
epoch: 135 training_loss 0.49536250591278075 test_loss: 0.24904518127441405
epoch: 136 training_loss 0.497438243329525 test_loss: 0.24729382991790771
epoch: 137 training_loss 0.48701578855514527 test_loss: 0.24994468688964844
epoch: 138 training_loss 0.49069666385650634 test_loss: 0.2518897533416748
epoch: 139 training_loss 0.48638469725847244 test_loss: 0.25451581478118895
epoch: 140 training_loss 0.4943181037902832 test_loss: 0.24929542541503907
epoch: 141 training_loss 0.49762972086668017 test_loss: 0.2463908910751343
epoch: 142 training_loss 0.5043031591176986 test_loss: 0.2621012210845947
epoch: 143 training_loss 0.4980707359313965 test_loss: 0.24596714973449707
epoch: 144 training_loss 0.48599303483963013 test_loss: 0.2604775905609131
epoch: 145 training_loss 0.4978919184207916 test_loss: 0.2496396780014038
epoch: 146 training_loss 0.49512426406145094 test_loss: 0.2522092819213867
epoch: 147 training_loss 0.49097550213336943 test_loss: 0.243925142288208
epoch: 148 training_loss 0.48757208287715914 test_loss: 0.25359082221984863
epoch: 149 training_loss 0.48671375423669816 test_loss: 0.2438061237335205
2526.8435681128894
episode: 0 training return: tensor(-208.4029, device='cuda:0')
episode: 1 training return: tensor(412.3228, device='cuda:0')
episode: 2 training return: tensor(-150.5479, device='cuda:0')
episode: 3 training return: tensor(79.9639, device='cuda:0')
epoch: 1 test_true_pfm: 1607.0298280932877 sim_pfm: -221.6573514782746
episode: 4 training return: tensor(25.8313, device='cuda:0')
episode: 5 training return: tensor(137.5424, device='cuda:0')
episode: 6 training return: tensor(-116.5656, device='cuda:0')
episode: 7 training return: tensor(410.8846, device='cuda:0')
epoch: 2 test_true_pfm: 1593.6804696653826 sim_pfm: -41.862797395481415
episode: 8 training return: tensor(351.9211, device='cuda:0')
episode: 9 training return: tensor(9.4838, device='cuda:0')
episode: 10 training return: tensor(94.9799, device='cuda:0')
episode: 11 training return: tensor(211.5641, device='cuda:0')
epoch: 3 test_true_pfm: 1324.4081873585342 sim_pfm: -135.95827121828916
episode: 12 training return: tensor(419.1279, device='cuda:0')
episode: 13 training return: tensor(-87.4931, device='cuda:0')
episode: 14 training return: tensor(440.1694, device='cuda:0')
episode: 15 training return: tensor(-60.1350, device='cuda:0')
epoch: 4 test_true_pfm: 2742.8590730367146 sim_pfm: 120.3255275436677
episode: 16 training return: tensor(370.9645, device='cuda:0')
episode: 17 training return: tensor(-64.6804, device='cuda:0')
episode: 18 training return: tensor(-54.5746, device='cuda:0')
episode: 19 training return: tensor(-71.9095, device='cuda:0')
epoch: 5 test_true_pfm: 3226.26399764463 sim_pfm: 224.2888328625219
episode: 20 training return: tensor(-77.6760, device='cuda:0')
episode: 21 training return: tensor(442.3923, device='cuda:0')
episode: 22 training return: tensor(-48.7373, device='cuda:0')
episode: 23 training return: tensor(398.4366, device='cuda:0')
epoch: 6 test_true_pfm: 2650.430667686555 sim_pfm: 138.28402748337248
episode: 24 training return: tensor(-144.8964, device='cuda:0')
episode: 25 training return: tensor(416.6716, device='cuda:0')
episode: 26 training return: tensor(294.3098, device='cuda:0')
episode: 27 training return: tensor(-125.6077, device='cuda:0')
epoch: 7 test_true_pfm: 2019.5157921752277 sim_pfm: 55.92408092548916
episode: 28 training return: tensor(121.3464, device='cuda:0')
episode: 29 training return: tensor(-134.6387, device='cuda:0')
episode: 30 training return: tensor(335.5179, device='cuda:0')
episode: 31 training return: tensor(-85.1309, device='cuda:0')
epoch: 8 test_true_pfm: 2463.6649580932212 sim_pfm: 204.72629725091005
episode: 32 training return: tensor(163.1501, device='cuda:0')
episode: 33 training return: tensor(-20.8106, device='cuda:0')
episode: 34 training return: tensor(75.9799, device='cuda:0')
episode: 35 training return: tensor(-71.0007, device='cuda:0')
epoch: 9 test_true_pfm: 2173.58830132949 sim_pfm: 48.34798395526983
episode: 36 training return: tensor(-77.2873, device='cuda:0')
episode: 37 training return: tensor(102.3515, device='cuda:0')
episode: 38 training return: tensor(411.1423, device='cuda:0')
episode: 39 training return: tensor(-16.3244, device='cuda:0')
epoch: 10 test_true_pfm: 2693.018071305807 sim_pfm: 156.4058270470705
episode: 40 training return: tensor(156.4186, device='cuda:0')
episode: 41 training return: tensor(420.1931, device='cuda:0')
episode: 42 training return: tensor(-103.6807, device='cuda:0')
episode: 43 training return: tensor(390.0168, device='cuda:0')
epoch: 11 test_true_pfm: 2726.9891527834593 sim_pfm: 161.6628510732941
episode: 44 training return: tensor(-59.2655, device='cuda:0')
episode: 45 training return: tensor(-81.8056, device='cuda:0')
episode: 46 training return: tensor(171.8047, device='cuda:0')
episode: 47 training return: tensor(31.3491, device='cuda:0')
epoch: 12 test_true_pfm: 2220.5761345222113 sim_pfm: -12.156692266386623
episode: 48 training return: tensor(423.1024, device='cuda:0')
episode: 49 training return: tensor(-54.7490, device='cuda:0')
episode: 50 training return: tensor(180.7423, device='cuda:0')
episode: 51 training return: tensor(394.4430, device='cuda:0')
epoch: 13 test_true_pfm: 1602.5502028990074 sim_pfm: 56.49095072585624
episode: 52 training return: tensor(286.1776, device='cuda:0')
episode: 53 training return: tensor(-12.9138, device='cuda:0')
episode: 54 training return: tensor(-38.0683, device='cuda:0')
episode: 55 training return: tensor(-100.5074, device='cuda:0')
epoch: 14 test_true_pfm: 2503.113880166575 sim_pfm: -103.74844840409544
episode: 56 training return: tensor(419.3055, device='cuda:0')
episode: 57 training return: tensor(428.1370, device='cuda:0')
episode: 58 training return: tensor(-77.8505, device='cuda:0')
episode: 59 training return: tensor(-58.4827, device='cuda:0')
epoch: 15 test_true_pfm: 2426.9187109075924 sim_pfm: 20.86332190653775
episode: 60 training return: tensor(-45.2180, device='cuda:0')
episode: 61 training return: tensor(-136.2796, device='cuda:0')
episode: 62 training return: tensor(-72.6586, device='cuda:0')
episode: 63 training return: tensor(-122.3982, device='cuda:0')
epoch: 16 test_true_pfm: 2867.2869646431113 sim_pfm: 67.93324554765907
episode: 64 training return: tensor(199.0352, device='cuda:0')
episode: 65 training return: tensor(-85.2090, device='cuda:0')
episode: 66 training return: tensor(-61.2317, device='cuda:0')
episode: 67 training return: tensor(29.5001, device='cuda:0')
epoch: 17 test_true_pfm: 1458.0285656869626 sim_pfm: 187.3014893847867
episode: 68 training return: tensor(344.1323, device='cuda:0')
episode: 69 training return: tensor(-39.8645, device='cuda:0')
episode: 70 training return: tensor(273.6657, device='cuda:0')
episode: 71 training return: tensor(218.2926, device='cuda:0')
epoch: 18 test_true_pfm: 2068.734659433107 sim_pfm: -3.811403592893233
episode: 72 training return: tensor(399.5685, device='cuda:0')
episode: 73 training return: tensor(175.9015, device='cuda:0')
episode: 74 training return: tensor(93.7122, device='cuda:0')
episode: 75 training return: tensor(-1.3276, device='cuda:0')
epoch: 19 test_true_pfm: 1918.1485293733888 sim_pfm: 132.65774235637704
episode: 76 training return: tensor(302.0425, device='cuda:0')
episode: 77 training return: tensor(16.4633, device='cuda:0')
episode: 78 training return: tensor(399.5573, device='cuda:0')
episode: 79 training return: tensor(396.2953, device='cuda:0')
epoch: 20 test_true_pfm: 1612.2041157114047 sim_pfm: 294.94995111587923
episode: 80 training return: tensor(462.3815, device='cuda:0')
episode: 81 training return: tensor(3.9604, device='cuda:0')
episode: 82 training return: tensor(407.6916, device='cuda:0')
episode: 83 training return: tensor(430.8281, device='cuda:0')
epoch: 21 test_true_pfm: 1614.674600166955 sim_pfm: -56.065670106598795
episode: 84 training return: tensor(136.4438, device='cuda:0')
episode: 85 training return: tensor(-59.0136, device='cuda:0')
episode: 86 training return: tensor(-6.0706, device='cuda:0')
episode: 87 training return: tensor(250.0120, device='cuda:0')
epoch: 22 test_true_pfm: 1679.5104720304128 sim_pfm: -17.065742356237024
episode: 88 training return: tensor(393.6812, device='cuda:0')
episode: 89 training return: tensor(57.3146, device='cuda:0')
episode: 90 training return: tensor(-9.6207, device='cuda:0')
episode: 91 training return: tensor(423.1888, device='cuda:0')
epoch: 23 test_true_pfm: 2603.4087949478785 sim_pfm: 277.72128677004366
episode: 92 training return: tensor(-15.6137, device='cuda:0')
episode: 93 training return: tensor(-56.6827, device='cuda:0')
episode: 94 training return: tensor(184.4212, device='cuda:0')
episode: 95 training return: tensor(168.8182, device='cuda:0')
epoch: 24 test_true_pfm: 2576.2966301068213 sim_pfm: -44.23358757570774
episode: 96 training return: tensor(264.9257, device='cuda:0')
episode: 97 training return: tensor(429.1165, device='cuda:0')
episode: 98 training return: tensor(22.3343, device='cuda:0')
episode: 99 training return: tensor(-58.5369, device='cuda:0')
epoch: 25 test_true_pfm: 1872.2630336420546 sim_pfm: 68.1109853820041
episode: 100 training return: tensor(33.2542, device='cuda:0')
episode: 101 training return: tensor(339.3372, device='cuda:0')
episode: 102 training return: tensor(5.6833, device='cuda:0')
episode: 103 training return: tensor(463.6180, device='cuda:0')
epoch: 26 test_true_pfm: 2751.4932614946215 sim_pfm: 98.78620450904903
episode: 104 training return: tensor(-61.0031, device='cuda:0')
episode: 105 training return: tensor(423.1797, device='cuda:0')
episode: 106 training return: tensor(422.5167, device='cuda:0')
episode: 107 training return: tensor(427.1339, device='cuda:0')
epoch: 27 test_true_pfm: 1628.0039108827068 sim_pfm: 89.53489109563331
episode: 108 training return: tensor(180.2740, device='cuda:0')
episode: 109 training return: tensor(-16.0972, device='cuda:0')
episode: 110 training return: tensor(8.9180, device='cuda:0')
episode: 111 training return: tensor(199.0121, device='cuda:0')
epoch: 28 test_true_pfm: 2363.987869638782 sim_pfm: 104.11580430483446
episode: 112 training return: tensor(-121.8598, device='cuda:0')
episode: 113 training return: tensor(144.0515, device='cuda:0')
episode: 114 training return: tensor(236.2103, device='cuda:0')
episode: 115 training return: tensor(-24.9256, device='cuda:0')
epoch: 29 test_true_pfm: 2202.8790290517923 sim_pfm: 117.22799294519548
episode: 116 training return: tensor(412.2148, device='cuda:0')
episode: 117 training return: tensor(238.5219, device='cuda:0')
episode: 118 training return: tensor(-108.7813, device='cuda:0')
episode: 119 training return: tensor(-19.8146, device='cuda:0')
epoch: 30 test_true_pfm: 1651.7143041294457 sim_pfm: 73.58846113762895
episode: 120 training return: tensor(21.1348, device='cuda:0')
episode: 121 training return: tensor(430.4166, device='cuda:0')
episode: 122 training return: tensor(306.7107, device='cuda:0')
episode: 123 training return: tensor(-117.9702, device='cuda:0')
epoch: 31 test_true_pfm: 2657.2343700195383 sim_pfm: -45.398083066723
episode: 124 training return: tensor(21.6165, device='cuda:0')
episode: 125 training return: tensor(-33.4156, device='cuda:0')
episode: 126 training return: tensor(-8.8653, device='cuda:0')
episode: 127 training return: tensor(460.0735, device='cuda:0')
epoch: 32 test_true_pfm: 2139.1592283215646 sim_pfm: 159.53633550893088
episode: 128 training return: tensor(415.4112, device='cuda:0')
episode: 129 training return: tensor(-23.3558, device='cuda:0')
episode: 130 training return: tensor(391.2802, device='cuda:0')
episode: 131 training return: tensor(421.2756, device='cuda:0')
epoch: 33 test_true_pfm: 2103.18842956659 sim_pfm: 184.93526627717074
episode: 132 training return: tensor(284.0498, device='cuda:0')
episode: 133 training return: tensor(51.8580, device='cuda:0')
episode: 134 training return: tensor(433.9240, device='cuda:0')
episode: 135 training return: tensor(443.5987, device='cuda:0')
epoch: 34 test_true_pfm: 2604.180164839316 sim_pfm: 222.89727580506587
episode: 136 training return: tensor(145.4740, device='cuda:0')
episode: 137 training return: tensor(446.5125, device='cuda:0')
episode: 138 training return: tensor(-29.0147, device='cuda:0')
episode: 139 training return: tensor(209.4944, device='cuda:0')
epoch: 35 test_true_pfm: 2214.073603932101 sim_pfm: 159.3591139453929
episode: 140 training return: tensor(338.4714, device='cuda:0')
episode: 141 training return: tensor(415.1012, device='cuda:0')
episode: 142 training return: tensor(192.9086, device='cuda:0')
episode: 143 training return: tensor(-12.4264, device='cuda:0')
epoch: 36 test_true_pfm: 1625.352852829167 sim_pfm: 127.9575614692779
episode: 144 training return: tensor(285.7101, device='cuda:0')
episode: 145 training return: tensor(297.4611, device='cuda:0')
episode: 146 training return: tensor(134.4120, device='cuda:0')
episode: 147 training return: tensor(-109.4523, device='cuda:0')
epoch: 37 test_true_pfm: 2245.7883303205813 sim_pfm: -17.54145015954661
episode: 148 training return: tensor(-9.9300, device='cuda:0')
episode: 149 training return: tensor(421.8329, device='cuda:0')
episode: 150 training return: tensor(175.1162, device='cuda:0')
episode: 151 training return: tensor(-22.7231, device='cuda:0')
epoch: 38 test_true_pfm: 2505.3973153572247 sim_pfm: 121.78516501847965
episode: 152 training return: tensor(461.2208, device='cuda:0')
episode: 153 training return: tensor(-23.8325, device='cuda:0')
episode: 154 training return: tensor(99.8290, device='cuda:0')
episode: 155 training return: tensor(26.5663, device='cuda:0')
epoch: 39 test_true_pfm: 2657.377098129668 sim_pfm: 239.91922971451035
episode: 156 training return: tensor(-36.9039, device='cuda:0')
episode: 157 training return: tensor(404.9211, device='cuda:0')
episode: 158 training return: tensor(191.7148, device='cuda:0')
episode: 159 training return: tensor(-20.9809, device='cuda:0')
epoch: 40 test_true_pfm: 1785.2836940792952 sim_pfm: 70.27659379108809
episode: 160 training return: tensor(427.5147, device='cuda:0')
episode: 161 training return: tensor(-35.6367, device='cuda:0')
episode: 162 training return: tensor(230.6049, device='cuda:0')
episode: 163 training return: tensor(281.3521, device='cuda:0')
epoch: 41 test_true_pfm: 2117.8257722105795 sim_pfm: 271.663559496752
episode: 164 training return: tensor(429.4435, device='cuda:0')
episode: 165 training return: tensor(418.2336, device='cuda:0')
episode: 166 training return: tensor(41.4337, device='cuda:0')
episode: 167 training return: tensor(420.9530, device='cuda:0')
epoch: 42 test_true_pfm: 2652.4460618627104 sim_pfm: 304.6216127256242
episode: 168 training return: tensor(242.8913, device='cuda:0')
episode: 169 training return: tensor(184.8779, device='cuda:0')
episode: 170 training return: tensor(455.4192, device='cuda:0')
episode: 171 training return: tensor(427.0169, device='cuda:0')
epoch: 43 test_true_pfm: 2517.6230490602006 sim_pfm: 424.4301941767141
episode: 172 training return: tensor(-65.3822, device='cuda:0')
episode: 173 training return: tensor(-21.0541, device='cuda:0')
episode: 174 training return: tensor(-37.8266, device='cuda:0')
episode: 175 training return: tensor(453.5416, device='cuda:0')
epoch: 44 test_true_pfm: 1604.8356928667665 sim_pfm: -20.525220919943724
episode: 176 training return: tensor(-85.1275, device='cuda:0')
episode: 177 training return: tensor(167.7777, device='cuda:0')
episode: 178 training return: tensor(433.7459, device='cuda:0')
episode: 179 training return: tensor(-17.1798, device='cuda:0')
epoch: 45 test_true_pfm: 2247.0843718160236 sim_pfm: 240.44802108364334
episode: 180 training return: tensor(391.6569, device='cuda:0')
episode: 181 training return: tensor(228.4713, device='cuda:0')
episode: 182 training return: tensor(330.2151, device='cuda:0')
episode: 183 training return: tensor(414.0792, device='cuda:0')
epoch: 46 test_true_pfm: 2659.1864306139955 sim_pfm: 91.48677605819346
episode: 184 training return: tensor(255.8951, device='cuda:0')
episode: 185 training return: tensor(421.5355, device='cuda:0')
episode: 186 training return: tensor(-12.8337, device='cuda:0')
episode: 187 training return: tensor(433.5116, device='cuda:0')
epoch: 47 test_true_pfm: 2156.6536022390783 sim_pfm: 137.41535676112593
episode: 188 training return: tensor(292.9560, device='cuda:0')
episode: 189 training return: tensor(-12.5292, device='cuda:0')
episode: 190 training return: tensor(349.2942, device='cuda:0')
episode: 191 training return: tensor(411.9132, device='cuda:0')
epoch: 48 test_true_pfm: 2143.0204573874485 sim_pfm: 132.93153910913193
episode: 192 training return: tensor(15.8872, device='cuda:0')
episode: 193 training return: tensor(70.0876, device='cuda:0')
episode: 194 training return: tensor(-49.1897, device='cuda:0')
episode: 195 training return: tensor(411.3725, device='cuda:0')
epoch: 49 test_true_pfm: 1804.8851853869735 sim_pfm: -13.970896625027914
episode: 196 training return: tensor(-57.8682, device='cuda:0')
episode: 197 training return: tensor(-38.6226, device='cuda:0')
episode: 198 training return: tensor(254.5421, device='cuda:0')
episode: 199 training return: tensor(12.5516, device='cuda:0')
epoch: 50 test_true_pfm: 2803.0334256571173 sim_pfm: 147.01128732444099
episode: 200 training return: tensor(426.1279, device='cuda:0')
episode: 201 training return: tensor(426.4240, device='cuda:0')
episode: 202 training return: tensor(392.1683, device='cuda:0')
episode: 203 training return: tensor(-31.6437, device='cuda:0')
epoch: 51 test_true_pfm: 2237.7698077722744 sim_pfm: 196.8207327101069
episode: 204 training return: tensor(348.5960, device='cuda:0')
episode: 205 training return: tensor(422.6397, device='cuda:0')
episode: 206 training return: tensor(422.0836, device='cuda:0')
episode: 207 training return: tensor(-14.7736, device='cuda:0')
epoch: 52 test_true_pfm: 1754.197491850133 sim_pfm: 37.18895926140249
episode: 208 training return: tensor(308.0719, device='cuda:0')
episode: 209 training return: tensor(-38.7898, device='cuda:0')
episode: 210 training return: tensor(164.4692, device='cuda:0')
episode: 211 training return: tensor(47.1194, device='cuda:0')
epoch: 53 test_true_pfm: 2789.3905840181556 sim_pfm: 127.84342732004977
episode: 212 training return: tensor(280.6531, device='cuda:0')
episode: 213 training return: tensor(-8.7671, device='cuda:0')
episode: 214 training return: tensor(180.7401, device='cuda:0')
episode: 215 training return: tensor(-97.3036, device='cuda:0')
epoch: 54 test_true_pfm: 2802.1612157726827 sim_pfm: 158.36016275023576
episode: 216 training return: tensor(283.4383, device='cuda:0')
episode: 217 training return: tensor(-80.2162, device='cuda:0')
episode: 218 training return: tensor(18.7128, device='cuda:0')
episode: 219 training return: tensor(-18.8593, device='cuda:0')
epoch: 55 test_true_pfm: 2406.8230650438272 sim_pfm: 26.937216320618365
episode: 220 training return: tensor(418.7850, device='cuda:0')
episode: 221 training return: tensor(243.6279, device='cuda:0')
episode: 222 training return: tensor(340.1700, device='cuda:0')
episode: 223 training return: tensor(422.3788, device='cuda:0')
epoch: 56 test_true_pfm: 2688.2826301221044 sim_pfm: -6.682702400139533
episode: 224 training return: tensor(6.8549, device='cuda:0')
episode: 225 training return: tensor(29.1586, device='cuda:0')
episode: 226 training return: tensor(419.3014, device='cuda:0')
episode: 227 training return: tensor(409.1062, device='cuda:0')
epoch: 57 test_true_pfm: 2155.4440386981537 sim_pfm: 162.28422490148418
episode: 228 training return: tensor(457.9993, device='cuda:0')
episode: 229 training return: tensor(408.2872, device='cuda:0')
episode: 230 training return: tensor(436.8859, device='cuda:0')
episode: 231 training return: tensor(392.2787, device='cuda:0')
epoch: 58 test_true_pfm: 2269.2570699418707 sim_pfm: 23.345234965653315
episode: 232 training return: tensor(409.9430, device='cuda:0')
episode: 233 training return: tensor(260.1183, device='cuda:0')
episode: 234 training return: tensor(418.0359, device='cuda:0')
episode: 235 training return: tensor(-32.0555, device='cuda:0')
epoch: 59 test_true_pfm: 1780.132519272053 sim_pfm: 148.93146105355117
episode: 236 training return: tensor(-24.6922, device='cuda:0')
episode: 237 training return: tensor(42.9131, device='cuda:0')
episode: 238 training return: tensor(441.1939, device='cuda:0')
episode: 239 training return: tensor(-65.2558, device='cuda:0')
epoch: 60 test_true_pfm: 2284.16314628225 sim_pfm: 91.10351685900241
episode: 240 training return: tensor(64.6960, device='cuda:0')
episode: 241 training return: tensor(34.7205, device='cuda:0')
episode: 242 training return: tensor(-30.9236, device='cuda:0')
episode: 243 training return: tensor(403.5214, device='cuda:0')
epoch: 61 test_true_pfm: 2319.524115235166 sim_pfm: 181.34494180513624
episode: 244 training return: tensor(417.4449, device='cuda:0')
episode: 245 training return: tensor(78.8384, device='cuda:0')
episode: 246 training return: tensor(-36.8645, device='cuda:0')
episode: 247 training return: tensor(416.1054, device='cuda:0')
epoch: 62 test_true_pfm: 1991.7116524670066 sim_pfm: 135.80732604430523
episode: 248 training return: tensor(429.5948, device='cuda:0')
episode: 249 training return: tensor(419.2043, device='cuda:0')
episode: 250 training return: tensor(172.5929, device='cuda:0')
episode: 251 training return: tensor(3.6663, device='cuda:0')
epoch: 63 test_true_pfm: 2744.798439210083 sim_pfm: 30.380648848057415
episode: 252 training return: tensor(433.3695, device='cuda:0')
episode: 253 training return: tensor(36.5097, device='cuda:0')
episode: 254 training return: tensor(379.7227, device='cuda:0')
episode: 255 training return: tensor(421.8391, device='cuda:0')
epoch: 64 test_true_pfm: 2563.111345387222 sim_pfm: 130.8205345214034
episode: 256 training return: tensor(270.1664, device='cuda:0')
episode: 257 training return: tensor(-22.7560, device='cuda:0')
episode: 258 training return: tensor(35.2052, device='cuda:0')
episode: 259 training return: tensor(335.5792, device='cuda:0')
epoch: 65 test_true_pfm: 2762.8051619552243 sim_pfm: 176.85010655208802
episode: 260 training return: tensor(423.5277, device='cuda:0')
episode: 261 training return: tensor(284.6135, device='cuda:0')
episode: 262 training return: tensor(427.8637, device='cuda:0')
episode: 263 training return: tensor(468.8290, device='cuda:0')
epoch: 66 test_true_pfm: 2123.4160144952225 sim_pfm: 118.1664325784077
episode: 264 training return: tensor(211.8872, device='cuda:0')
episode: 265 training return: tensor(-77.0852, device='cuda:0')
episode: 266 training return: tensor(443.1439, device='cuda:0')
episode: 267 training return: tensor(397.1031, device='cuda:0')
epoch: 67 test_true_pfm: 2713.4657777756697 sim_pfm: 13.460327595084285
episode: 268 training return: tensor(254.5029, device='cuda:0')
episode: 269 training return: tensor(432.7496, device='cuda:0')
episode: 270 training return: tensor(-77.3157, device='cuda:0')
episode: 271 training return: tensor(-34.5373, device='cuda:0')
epoch: 68 test_true_pfm: 2150.018745345795 sim_pfm: 322.7373592630029
episode: 272 training return: tensor(-47.4164, device='cuda:0')
episode: 273 training return: tensor(205.4495, device='cuda:0')
episode: 274 training return: tensor(421.5964, device='cuda:0')
episode: 275 training return: tensor(207.3206, device='cuda:0')
epoch: 69 test_true_pfm: 2242.515797415936 sim_pfm: 46.70007436627444
episode: 276 training return: tensor(220.5555, device='cuda:0')
episode: 277 training return: tensor(14.6963, device='cuda:0')
episode: 278 training return: tensor(401.9777, device='cuda:0')
episode: 279 training return: tensor(203.1552, device='cuda:0')
epoch: 70 test_true_pfm: 2307.814445223588 sim_pfm: 456.71254767450347
episode: 280 training return: tensor(40.4475, device='cuda:0')
episode: 281 training return: tensor(416.0197, device='cuda:0')
episode: 282 training return: tensor(466.4002, device='cuda:0')
episode: 283 training return: tensor(8.6409, device='cuda:0')
epoch: 71 test_true_pfm: 1739.6743158331585 sim_pfm: 258.98806719943724
episode: 284 training return: tensor(46.6015, device='cuda:0')
episode: 285 training return: tensor(407.2034, device='cuda:0')
episode: 286 training return: tensor(-69.6819, device='cuda:0')
episode: 287 training return: tensor(414.8684, device='cuda:0')
epoch: 72 test_true_pfm: 2718.3447086201218 sim_pfm: 381.3935282290331
episode: 288 training return: tensor(328.9156, device='cuda:0')
episode: 289 training return: tensor(409.4670, device='cuda:0')
episode: 290 training return: tensor(424.1954, device='cuda:0')
episode: 291 training return: tensor(-8.0389, device='cuda:0')
epoch: 73 test_true_pfm: 1966.137265622434 sim_pfm: 149.23070518612317
episode: 292 training return: tensor(228.2641, device='cuda:0')
episode: 293 training return: tensor(415.7420, device='cuda:0')
episode: 294 training return: tensor(358.9303, device='cuda:0')
episode: 295 training return: tensor(309.4656, device='cuda:0')
epoch: 74 test_true_pfm: 1893.5905936813451 sim_pfm: 66.15200076050435
episode: 296 training return: tensor(-27.9965, device='cuda:0')
episode: 297 training return: tensor(-18.6545, device='cuda:0')
episode: 298 training return: tensor(230.9566, device='cuda:0')
episode: 299 training return: tensor(277.2964, device='cuda:0')
epoch: 75 test_true_pfm: 3168.4904022651376 sim_pfm: 143.50817038400177
episode: 300 training return: tensor(187.0519, device='cuda:0')
episode: 301 training return: tensor(43.1548, device='cuda:0')
episode: 302 training return: tensor(438.0765, device='cuda:0')
episode: 303 training return: tensor(458.9311, device='cuda:0')
epoch: 76 test_true_pfm: 2551.483873716905 sim_pfm: 210.6779414732106
episode: 304 training return: tensor(-3.5785, device='cuda:0')
episode: 305 training return: tensor(43.8042, device='cuda:0')
episode: 306 training return: tensor(-39.6675, device='cuda:0')
episode: 307 training return: tensor(417.2868, device='cuda:0')
epoch: 77 test_true_pfm: 2751.4401285604904 sim_pfm: 148.62705104829124
episode: 308 training return: tensor(30.9267, device='cuda:0')
episode: 309 training return: tensor(184.0480, device='cuda:0')
episode: 310 training return: tensor(-46.1765, device='cuda:0')
episode: 311 training return: tensor(66.8209, device='cuda:0')
epoch: 78 test_true_pfm: 2377.5299540403244 sim_pfm: 324.45095388938597
episode: 312 training return: tensor(464.1772, device='cuda:0')
episode: 313 training return: tensor(271.5720, device='cuda:0')
episode: 314 training return: tensor(419.7887, device='cuda:0')
episode: 315 training return: tensor(412.2379, device='cuda:0')
epoch: 79 test_true_pfm: 1661.0512921805573 sim_pfm: 274.5589519929684
episode: 316 training return: tensor(406.2498, device='cuda:0')
episode: 317 training return: tensor(465.9334, device='cuda:0')
episode: 318 training return: tensor(459.3163, device='cuda:0')
episode: 319 training return: tensor(411.0582, device='cuda:0')
epoch: 80 test_true_pfm: 2715.073326301311 sim_pfm: 281.6702163935115
episode: 320 training return: tensor(64.3801, device='cuda:0')
episode: 321 training return: tensor(-13.5789, device='cuda:0')
episode: 322 training return: tensor(424.9823, device='cuda:0')
episode: 323 training return: tensor(43.6471, device='cuda:0')
epoch: 81 test_true_pfm: 2019.0417475479637 sim_pfm: 350.2649444253572
episode: 324 training return: tensor(264.2460, device='cuda:0')
episode: 325 training return: tensor(301.3163, device='cuda:0')
episode: 326 training return: tensor(431.3935, device='cuda:0')
episode: 327 training return: tensor(312.2507, device='cuda:0')
epoch: 82 test_true_pfm: 2293.262079434913 sim_pfm: 32.99734672622677
episode: 328 training return: tensor(85.1219, device='cuda:0')
episode: 329 training return: tensor(321.1240, device='cuda:0')
episode: 330 training return: tensor(209.6721, device='cuda:0')
episode: 331 training return: tensor(-14.6657, device='cuda:0')
epoch: 83 test_true_pfm: 2770.956820016872 sim_pfm: 183.6014885037827
episode: 332 training return: tensor(136.1134, device='cuda:0')
episode: 333 training return: tensor(410.0796, device='cuda:0')
episode: 334 training return: tensor(-87.2317, device='cuda:0')
episode: 335 training return: tensor(437.2572, device='cuda:0')
epoch: 84 test_true_pfm: 2154.9742346463354 sim_pfm: 77.04882526007714
episode: 336 training return: tensor(59.7025, device='cuda:0')
episode: 337 training return: tensor(470.9952, device='cuda:0')
episode: 338 training return: tensor(4.0909, device='cuda:0')
episode: 339 training return: tensor(104.0982, device='cuda:0')
epoch: 85 test_true_pfm: 2171.016162019776 sim_pfm: 151.66937713662628
episode: 340 training return: tensor(28.5983, device='cuda:0')
episode: 341 training return: tensor(426.4651, device='cuda:0')
episode: 342 training return: tensor(-2.3310, device='cuda:0')
episode: 343 training return: tensor(394.5287, device='cuda:0')
epoch: 86 test_true_pfm: 3117.9903902735837 sim_pfm: 287.97905398874235
episode: 344 training return: tensor(423.1772, device='cuda:0')
episode: 345 training return: tensor(-37.9068, device='cuda:0')
episode: 346 training return: tensor(313.5014, device='cuda:0')
episode: 347 training return: tensor(414.7306, device='cuda:0')
epoch: 87 test_true_pfm: 2665.863655214746 sim_pfm: 388.7269499752826
episode: 348 training return: tensor(422.4959, device='cuda:0')
episode: 349 training return: tensor(389.9019, device='cuda:0')
episode: 350 training return: tensor(58.8706, device='cuda:0')
episode: 351 training return: tensor(196.4029, device='cuda:0')
epoch: 88 test_true_pfm: 2167.252712301701 sim_pfm: 193.60102462372743
episode: 352 training return: tensor(272.9026, device='cuda:0')
episode: 353 training return: tensor(-35.7523, device='cuda:0')
episode: 354 training return: tensor(274.9145, device='cuda:0')
episode: 355 training return: tensor(423.9971, device='cuda:0')
epoch: 89 test_true_pfm: 2645.1029940066937 sim_pfm: 106.54883465279515
episode: 356 training return: tensor(214.8626, device='cuda:0')
episode: 357 training return: tensor(450.9772, device='cuda:0')
episode: 358 training return: tensor(360.1665, device='cuda:0')
episode: 359 training return: tensor(-47.1729, device='cuda:0')
epoch: 90 test_true_pfm: 1775.94042132017 sim_pfm: 75.4749438946407
episode: 360 training return: tensor(425.5516, device='cuda:0')
episode: 361 training return: tensor(230.4274, device='cuda:0')
episode: 362 training return: tensor(-69.5310, device='cuda:0')
episode: 363 training return: tensor(225.9566, device='cuda:0')
epoch: 91 test_true_pfm: 1871.438380831248 sim_pfm: 242.9868174799485
episode: 364 training return: tensor(-106.8419, device='cuda:0')
episode: 365 training return: tensor(314.2203, device='cuda:0')
episode: 366 training return: tensor(465.6528, device='cuda:0')
episode: 367 training return: tensor(463.5051, device='cuda:0')
epoch: 92 test_true_pfm: 3180.7653698398994 sim_pfm: 232.56992846824383
episode: 368 training return: tensor(434.6873, device='cuda:0')
episode: 369 training return: tensor(370.8096, device='cuda:0')
episode: 370 training return: tensor(-51.4294, device='cuda:0')
episode: 371 training return: tensor(410.3795, device='cuda:0')
epoch: 93 test_true_pfm: 2312.1954893237275 sim_pfm: 163.51329310370298
episode: 372 training return: tensor(340.4443, device='cuda:0')
episode: 373 training return: tensor(429.8096, device='cuda:0')
episode: 374 training return: tensor(232.9396, device='cuda:0')
episode: 375 training return: tensor(26.6185, device='cuda:0')
epoch: 94 test_true_pfm: 1720.134064895801 sim_pfm: 239.87875296466518
episode: 376 training return: tensor(105.2500, device='cuda:0')
episode: 377 training return: tensor(487.7055, device='cuda:0')
episode: 378 training return: tensor(341.9923, device='cuda:0')
episode: 379 training return: tensor(-91.2514, device='cuda:0')
epoch: 95 test_true_pfm: 1680.052886928351 sim_pfm: 144.7946785032594
episode: 380 training return: tensor(407.4138, device='cuda:0')
episode: 381 training return: tensor(435.2457, device='cuda:0')
episode: 382 training return: tensor(49.9015, device='cuda:0')
episode: 383 training return: tensor(241.2457, device='cuda:0')
epoch: 96 test_true_pfm: 3167.1405896710526 sim_pfm: 312.37613707843894
episode: 384 training return: tensor(70.6530, device='cuda:0')
episode: 385 training return: tensor(229.9245, device='cuda:0')
episode: 386 training return: tensor(430.2879, device='cuda:0')
episode: 387 training return: tensor(-21.0196, device='cuda:0')
epoch: 97 test_true_pfm: 1710.2179151919836 sim_pfm: 171.1287739214798
episode: 388 training return: tensor(-5.4003, device='cuda:0')
episode: 389 training return: tensor(65.9973, device='cuda:0')
episode: 390 training return: tensor(-1.3102, device='cuda:0')
episode: 391 training return: tensor(406.8080, device='cuda:0')
epoch: 98 test_true_pfm: 2136.320732383288 sim_pfm: 174.1012707760092
episode: 392 training return: tensor(334.4727, device='cuda:0')
episode: 393 training return: tensor(400.3384, device='cuda:0')
episode: 394 training return: tensor(445.8141, device='cuda:0')
episode: 395 training return: tensor(-35.1689, device='cuda:0')
epoch: 99 test_true_pfm: 1652.8092324964364 sim_pfm: 159.39152053545695
episode: 396 training return: tensor(281.9218, device='cuda:0')
episode: 397 training return: tensor(66.8987, device='cuda:0')
episode: 398 training return: tensor(-9.4967, device='cuda:0')
episode: 399 training return: tensor(413.4661, device='cuda:0')
epoch: 100 test_true_pfm: 3196.3508399772404 sim_pfm: 249.39040458312957
episode: 400 training return: tensor(233.1675, device='cuda:0')
episode: 401 training return: tensor(434.9100, device='cuda:0')
episode: 402 training return: tensor(112.9131, device='cuda:0')
episode: 403 training return: tensor(-101.4381, device='cuda:0')
epoch: 101 test_true_pfm: 2739.7789878874846 sim_pfm: 140.38575111539103
episode: 404 training return: tensor(218.4624, device='cuda:0')
episode: 405 training return: tensor(453.2308, device='cuda:0')
episode: 406 training return: tensor(-88.3718, device='cuda:0')
episode: 407 training return: tensor(99.7820, device='cuda:0')
epoch: 102 test_true_pfm: 2699.1330050906813 sim_pfm: 279.7696691558813
episode: 408 training return: tensor(419.4942, device='cuda:0')
episode: 409 training return: tensor(443.5295, device='cuda:0')
episode: 410 training return: tensor(429.8654, device='cuda:0')
episode: 411 training return: tensor(400.7464, device='cuda:0')
epoch: 103 test_true_pfm: 2187.1068651953556 sim_pfm: 166.06512950560622
episode: 412 training return: tensor(420.8212, device='cuda:0')
episode: 413 training return: tensor(8.2333, device='cuda:0')
episode: 414 training return: tensor(237.5026, device='cuda:0')
episode: 415 training return: tensor(-7.9532, device='cuda:0')
epoch: 104 test_true_pfm: 1944.2810016829453 sim_pfm: 255.89690079033608
episode: 416 training return: tensor(74.5331, device='cuda:0')
episode: 417 training return: tensor(-49.3950, device='cuda:0')
episode: 418 training return: tensor(414.1840, device='cuda:0')
episode: 419 training return: tensor(-25.0193, device='cuda:0')
epoch: 105 test_true_pfm: 1691.6041182070715 sim_pfm: 129.071903334038
episode: 420 training return: tensor(478.7388, device='cuda:0')
episode: 421 training return: tensor(413.8578, device='cuda:0')
episode: 422 training return: tensor(416.1744, device='cuda:0')
episode: 423 training return: tensor(-16.5990, device='cuda:0')
epoch: 106 test_true_pfm: 2243.500811572348 sim_pfm: 133.7667812257229
episode: 424 training return: tensor(27.9193, device='cuda:0')
episode: 425 training return: tensor(458.5860, device='cuda:0')
episode: 426 training return: tensor(433.4159, device='cuda:0')
episode: 427 training return: tensor(422.7078, device='cuda:0')
epoch: 107 test_true_pfm: 1691.5979237632803 sim_pfm: 282.0364759072351
episode: 428 training return: tensor(461.3306, device='cuda:0')
episode: 429 training return: tensor(212.9295, device='cuda:0')
episode: 430 training return: tensor(-53.3513, device='cuda:0')
episode: 431 training return: tensor(-99.9356, device='cuda:0')
epoch: 108 test_true_pfm: 2699.3747330620463 sim_pfm: 163.4788082288966
episode: 432 training return: tensor(-60.6912, device='cuda:0')
episode: 433 training return: tensor(437.8541, device='cuda:0')
episode: 434 training return: tensor(163.7958, device='cuda:0')
episode: 435 training return: tensor(412.1848, device='cuda:0')
epoch: 109 test_true_pfm: 3200.884570069082 sim_pfm: 198.5362571425891
episode: 436 training return: tensor(77.7366, device='cuda:0')
episode: 437 training return: tensor(415.0220, device='cuda:0')
episode: 438 training return: tensor(429.3197, device='cuda:0')
episode: 439 training return: tensor(-39.6793, device='cuda:0')
epoch: 110 test_true_pfm: 2732.9236479901792 sim_pfm: 34.93347515166776
episode: 440 training return: tensor(220.7597, device='cuda:0')
episode: 441 training return: tensor(1.0507, device='cuda:0')
episode: 442 training return: tensor(425.6407, device='cuda:0')
episode: 443 training return: tensor(150.0234, device='cuda:0')
epoch: 111 test_true_pfm: 3195.272380260389 sim_pfm: 287.1487617835519
episode: 444 training return: tensor(467.7428, device='cuda:0')
episode: 445 training return: tensor(37.3673, device='cuda:0')
episode: 446 training return: tensor(207.7152, device='cuda:0')
episode: 447 training return: tensor(4.6264, device='cuda:0')
epoch: 112 test_true_pfm: 2852.7362543713657 sim_pfm: -11.638704260956729
episode: 448 training return: tensor(410.5281, device='cuda:0')
episode: 449 training return: tensor(57.4070, device='cuda:0')
episode: 450 training return: tensor(92.3433, device='cuda:0')
episode: 451 training return: tensor(3.2559, device='cuda:0')
epoch: 113 test_true_pfm: 1638.7241160129215 sim_pfm: 129.84564884627858
episode: 452 training return: tensor(393.3333, device='cuda:0')
episode: 453 training return: tensor(72.7233, device='cuda:0')
episode: 454 training return: tensor(227.1313, device='cuda:0')
episode: 455 training return: tensor(-51.1978, device='cuda:0')
epoch: 114 test_true_pfm: 3213.1296313430926 sim_pfm: 389.6698472142064
episode: 456 training return: tensor(-44.1104, device='cuda:0')
episode: 457 training return: tensor(420.9812, device='cuda:0')
episode: 458 training return: tensor(55.8973, device='cuda:0')
episode: 459 training return: tensor(333.5279, device='cuda:0')
epoch: 115 test_true_pfm: 2749.0878823915177 sim_pfm: 142.89056984179965
episode: 460 training return: tensor(469.8040, device='cuda:0')
episode: 461 training return: tensor(407.0166, device='cuda:0')
episode: 462 training return: tensor(4.3437, device='cuda:0')
episode: 463 training return: tensor(52.2513, device='cuda:0')
epoch: 116 test_true_pfm: 2686.3699037615643 sim_pfm: 411.33519860527787
episode: 464 training return: tensor(457.5415, device='cuda:0')
episode: 465 training return: tensor(41.2794, device='cuda:0')
episode: 466 training return: tensor(196.4552, device='cuda:0')
episode: 467 training return: tensor(331.6477, device='cuda:0')
epoch: 117 test_true_pfm: 2142.9279554782806 sim_pfm: 140.99867008855412
episode: 468 training return: tensor(423.7427, device='cuda:0')
episode: 469 training return: tensor(80.2132, device='cuda:0')
episode: 470 training return: tensor(114.6208, device='cuda:0')
episode: 471 training return: tensor(27.4063, device='cuda:0')
epoch: 118 test_true_pfm: 3202.680443967871 sim_pfm: 434.0982149161573
episode: 472 training return: tensor(319.1344, device='cuda:0')
episode: 473 training return: tensor(-45.4222, device='cuda:0')
episode: 474 training return: tensor(428.3782, device='cuda:0')
episode: 475 training return: tensor(421.3051, device='cuda:0')
epoch: 119 test_true_pfm: 1658.8501235019876 sim_pfm: 17.68298795908534
episode: 476 training return: tensor(405.9312, device='cuda:0')
episode: 477 training return: tensor(139.2361, device='cuda:0')
episode: 478 training return: tensor(-18.9729, device='cuda:0')
episode: 479 training return: tensor(415.3176, device='cuda:0')
epoch: 120 test_true_pfm: 2762.4894583950568 sim_pfm: 275.02422018521855
episode: 480 training return: tensor(261.8864, device='cuda:0')
episode: 481 training return: tensor(413.8752, device='cuda:0')
episode: 482 training return: tensor(125.2196, device='cuda:0')
episode: 483 training return: tensor(150.6107, device='cuda:0')
epoch: 121 test_true_pfm: 2219.0937829914287 sim_pfm: 290.7823637637678
episode: 484 training return: tensor(-37.5909, device='cuda:0')
episode: 485 training return: tensor(300.5833, device='cuda:0')
episode: 486 training return: tensor(-12.3911, device='cuda:0')
episode: 487 training return: tensor(56.5010, device='cuda:0')
epoch: 122 test_true_pfm: 2714.6051803839364 sim_pfm: 290.49790580578457
episode: 488 training return: tensor(296.5424, device='cuda:0')
episode: 489 training return: tensor(-12.3012, device='cuda:0')
episode: 490 training return: tensor(414.6388, device='cuda:0')
episode: 491 training return: tensor(413.0058, device='cuda:0')
epoch: 123 test_true_pfm: 2685.602313374806 sim_pfm: 298.64635416217305
episode: 492 training return: tensor(352.2455, device='cuda:0')
episode: 493 training return: tensor(-68.1489, device='cuda:0')
episode: 494 training return: tensor(120.8823, device='cuda:0')
episode: 495 training return: tensor(424.7097, device='cuda:0')
epoch: 124 test_true_pfm: 3200.4021178931416 sim_pfm: 381.33092298172414
episode: 496 training return: tensor(419.5496, device='cuda:0')
episode: 497 training return: tensor(380.5538, device='cuda:0')
episode: 498 training return: tensor(28.3071, device='cuda:0')
episode: 499 training return: tensor(71.8041, device='cuda:0')
epoch: 125 test_true_pfm: 2612.735949125555 sim_pfm: 434.4961608899757
episode: 500 training return: tensor(28.3922, device='cuda:0')
episode: 501 training return: tensor(419.3522, device='cuda:0')
episode: 502 training return: tensor(218.1254, device='cuda:0')
episode: 503 training return: tensor(404.0204, device='cuda:0')
epoch: 126 test_true_pfm: 2248.5317557046874 sim_pfm: 87.7622916981345
episode: 504 training return: tensor(426.4445, device='cuda:0')
episode: 505 training return: tensor(225.9932, device='cuda:0')
episode: 506 training return: tensor(425.7263, device='cuda:0')
episode: 507 training return: tensor(15.3950, device='cuda:0')
epoch: 127 test_true_pfm: 1797.645270472658 sim_pfm: 429.3150101809879
episode: 508 training return: tensor(422.8521, device='cuda:0')
episode: 509 training return: tensor(461.2511, device='cuda:0')
episode: 510 training return: tensor(-0.0339, device='cuda:0')
episode: 511 training return: tensor(429.5165, device='cuda:0')
epoch: 128 test_true_pfm: 2434.263674200392 sim_pfm: 275.46904792729765
episode: 512 training return: tensor(474.7054, device='cuda:0')
episode: 513 training return: tensor(296.3065, device='cuda:0')
episode: 514 training return: tensor(410.8649, device='cuda:0')
episode: 515 training return: tensor(411.6900, device='cuda:0')
epoch: 129 test_true_pfm: 2802.2646724427736 sim_pfm: 54.96755289443536
episode: 516 training return: tensor(468.9487, device='cuda:0')
episode: 517 training return: tensor(107.7475, device='cuda:0')
episode: 518 training return: tensor(402.3919, device='cuda:0')
episode: 519 training return: tensor(15.3590, device='cuda:0')
epoch: 130 test_true_pfm: 2731.121930358764 sim_pfm: 120.14858099158543
episode: 520 training return: tensor(5.9729, device='cuda:0')
episode: 521 training return: tensor(427.9503, device='cuda:0')
episode: 522 training return: tensor(-22.5609, device='cuda:0')
episode: 523 training return: tensor(185.2822, device='cuda:0')
epoch: 131 test_true_pfm: 2186.325139445804 sim_pfm: 324.2497919145001
episode: 524 training return: tensor(427.9026, device='cuda:0')
episode: 525 training return: tensor(346.9946, device='cuda:0')
episode: 526 training return: tensor(23.3514, device='cuda:0')
episode: 527 training return: tensor(424.1615, device='cuda:0')
epoch: 132 test_true_pfm: 2974.1935108548882 sim_pfm: 273.5803875202352
episode: 528 training return: tensor(416.9533, device='cuda:0')
episode: 529 training return: tensor(-10.0155, device='cuda:0')
episode: 530 training return: tensor(36.4804, device='cuda:0')
episode: 531 training return: tensor(-120.7295, device='cuda:0')
epoch: 133 test_true_pfm: 2229.251390760021 sim_pfm: 251.06178901965419
episode: 532 training return: tensor(211.6565, device='cuda:0')
episode: 533 training return: tensor(412.2733, device='cuda:0')
episode: 534 training return: tensor(167.5215, device='cuda:0')
episode: 535 training return: tensor(365.0235, device='cuda:0')
epoch: 134 test_true_pfm: 2672.8539279395513 sim_pfm: 43.78287626402258
episode: 536 training return: tensor(423.9443, device='cuda:0')
episode: 537 training return: tensor(442.4085, device='cuda:0')
episode: 538 training return: tensor(424.0283, device='cuda:0')
episode: 539 training return: tensor(159.0070, device='cuda:0')
epoch: 135 test_true_pfm: 2435.8073118164552 sim_pfm: 218.00998312361966
episode: 540 training return: tensor(457.2552, device='cuda:0')
episode: 541 training return: tensor(427.0799, device='cuda:0')
episode: 542 training return: tensor(360.8421, device='cuda:0')
episode: 543 training return: tensor(427.6865, device='cuda:0')
epoch: 136 test_true_pfm: 1957.4634198420856 sim_pfm: 134.05273689317983
episode: 544 training return: tensor(420.0237, device='cuda:0')
episode: 545 training return: tensor(407.3922, device='cuda:0')
episode: 546 training return: tensor(-15.0893, device='cuda:0')
episode: 547 training return: tensor(427.3899, device='cuda:0')
epoch: 137 test_true_pfm: 3042.952669591649 sim_pfm: 175.68814871661985
episode: 548 training return: tensor(71.5417, device='cuda:0')
episode: 549 training return: tensor(-87.9230, device='cuda:0')
episode: 550 training return: tensor(408.2223, device='cuda:0')
episode: 551 training return: tensor(376.4131, device='cuda:0')
epoch: 138 test_true_pfm: 2126.718816070504 sim_pfm: 153.2683881755414
episode: 552 training return: tensor(284.9978, device='cuda:0')
episode: 553 training return: tensor(423.7713, device='cuda:0')
episode: 554 training return: tensor(76.2259, device='cuda:0')
episode: 555 training return: tensor(94.7236, device='cuda:0')
epoch: 139 test_true_pfm: 2090.139400493726 sim_pfm: 434.26342308789026
episode: 556 training return: tensor(-72.3280, device='cuda:0')
episode: 557 training return: tensor(18.2725, device='cuda:0')
episode: 558 training return: tensor(427.4586, device='cuda:0')
episode: 559 training return: tensor(405.9415, device='cuda:0')
epoch: 140 test_true_pfm: 2183.159633564462 sim_pfm: 154.4694175674425
episode: 560 training return: tensor(-16.2216, device='cuda:0')
episode: 561 training return: tensor(-82.8762, device='cuda:0')
episode: 562 training return: tensor(428.8522, device='cuda:0')
episode: 563 training return: tensor(68.2142, device='cuda:0')
epoch: 141 test_true_pfm: 2752.220762205521 sim_pfm: 34.298750136687886
episode: 564 training return: tensor(428.6249, device='cuda:0')
episode: 565 training return: tensor(44.1834, device='cuda:0')
episode: 566 training return: tensor(-97.3947, device='cuda:0')
episode: 567 training return: tensor(407.3976, device='cuda:0')
epoch: 142 test_true_pfm: 2214.643200556227 sim_pfm: 159.1506583733329
episode: 568 training return: tensor(63.0838, device='cuda:0')
episode: 569 training return: tensor(37.8341, device='cuda:0')
episode: 570 training return: tensor(116.9775, device='cuda:0')
episode: 571 training return: tensor(20.3141, device='cuda:0')
epoch: 143 test_true_pfm: 2068.4709162263894 sim_pfm: 270.07479199148173
episode: 572 training return: tensor(423.7975, device='cuda:0')
episode: 573 training return: tensor(461.9163, device='cuda:0')
episode: 574 training return: tensor(427.3603, device='cuda:0')
episode: 575 training return: tensor(250.8566, device='cuda:0')
epoch: 144 test_true_pfm: 2674.3658874760495 sim_pfm: 306.89253795989015
episode: 576 training return: tensor(427.8451, device='cuda:0')
episode: 577 training return: tensor(20.5723, device='cuda:0')
episode: 578 training return: tensor(422.0933, device='cuda:0')
episode: 579 training return: tensor(447.7075, device='cuda:0')
epoch: 145 test_true_pfm: 2294.8764158709077 sim_pfm: 295.6232629711546
episode: 580 training return: tensor(369.9489, device='cuda:0')
episode: 581 training return: tensor(216.2205, device='cuda:0')
episode: 582 training return: tensor(286.4353, device='cuda:0')
episode: 583 training return: tensor(15.9595, device='cuda:0')
epoch: 146 test_true_pfm: 2755.0707382101136 sim_pfm: 288.66485442446236
episode: 584 training return: tensor(436.6908, device='cuda:0')
episode: 585 training return: tensor(414.2479, device='cuda:0')
episode: 586 training return: tensor(186.1185, device='cuda:0')
episode: 587 training return: tensor(465.0648, device='cuda:0')
epoch: 147 test_true_pfm: 2645.208705593898 sim_pfm: 433.2917290920547
episode: 588 training return: tensor(420.2599, device='cuda:0')
episode: 589 training return: tensor(187.9285, device='cuda:0')
episode: 590 training return: tensor(425.8649, device='cuda:0')
episode: 591 training return: tensor(449.6213, device='cuda:0')
epoch: 148 test_true_pfm: 2139.377016751969 sim_pfm: 135.20892239850946
episode: 592 training return: tensor(-56.8514, device='cuda:0')
episode: 593 training return: tensor(407.6259, device='cuda:0')
episode: 594 training return: tensor(-4.0560, device='cuda:0')
episode: 595 training return: tensor(-46.1721, device='cuda:0')
epoch: 149 test_true_pfm: 2661.290848094773 sim_pfm: 140.0059861493064
episode: 596 training return: tensor(-78.4161, device='cuda:0')
episode: 597 training return: tensor(-96.8527, device='cuda:0')
episode: 598 training return: tensor(463.7547, device='cuda:0')
episode: 599 training return: tensor(433.0535, device='cuda:0')
epoch: 150 test_true_pfm: 3124.6183449435775 sim_pfm: 3.54250376970352
