['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '4']
epoch: 0 training_loss 0.25630328446626666 test_loss: 0.1506602644920349
epoch: 1 training_loss 0.15862064611166715 test_loss: 0.14576641321182252
epoch: 2 training_loss 0.13187057234346866 test_loss: 0.12757546901702882
epoch: 3 training_loss 0.1184977451339364 test_loss: 0.10565515756607055
epoch: 4 training_loss 0.11999835100024939 test_loss: 0.1284104108810425
epoch: 5 training_loss 0.12296839650720358 test_loss: 0.12560372352600097
epoch: 6 training_loss 0.11836308576166629 test_loss: 0.11746689081192016
epoch: 7 training_loss 0.10925214713439345 test_loss: 0.11766523122787476
epoch: 8 training_loss 0.10957002878189087 test_loss: 0.10395190715789795
epoch: 9 training_loss 0.11191684018820525 test_loss: 0.10822728872299195
epoch: 10 training_loss 0.1157965924963355 test_loss: 0.11934539079666137
epoch: 11 training_loss 0.10240066230297089 test_loss: 0.11701147556304932
epoch: 12 training_loss 0.11521095816046 test_loss: 0.09938945770263671
epoch: 13 training_loss 0.0995951296016574 test_loss: 0.10512433052062989
epoch: 14 training_loss 0.1052168595790863 test_loss: 0.08921173810958863
epoch: 15 training_loss 0.10970183748751879 test_loss: 0.10432111024856568
epoch: 16 training_loss 0.10605683889240027 test_loss: 0.1074332356452942
epoch: 17 training_loss 0.0974046665802598 test_loss: 0.11131237745285034
epoch: 18 training_loss 0.0986043643578887 test_loss: 0.08886970281600952
epoch: 19 training_loss 0.09495040014386177 test_loss: 0.09091690182685852
epoch: 20 training_loss 0.10312236782163381 test_loss: 0.07542186975479126
epoch: 21 training_loss 0.09535631824284792 test_loss: 0.09453694820404053
epoch: 22 training_loss 0.09995164124295115 test_loss: 0.10088263750076294
epoch: 23 training_loss 0.09907556384801865 test_loss: 0.1059727430343628
epoch: 24 training_loss 0.0925654105655849 test_loss: 0.1018854022026062
epoch: 25 training_loss 0.10035409111529589 test_loss: 0.1028203010559082
epoch: 26 training_loss 0.10216668264940382 test_loss: 0.10427552461624146
epoch: 27 training_loss 0.09104178048670292 test_loss: 0.10367408990859986
epoch: 28 training_loss 0.09208380484953523 test_loss: 0.08984532356262206
epoch: 29 training_loss 0.09697440078482032 test_loss: 0.09681153297424316
epoch: 30 training_loss 0.09705410588532687 test_loss: 0.08996101021766663
epoch: 31 training_loss 0.09637084029614926 test_loss: 0.11045805215835572
epoch: 32 training_loss 0.09601289557293058 test_loss: 0.0848171591758728
epoch: 33 training_loss 0.09762543674558401 test_loss: 0.08833135366439819
epoch: 34 training_loss 0.09152847852557898 test_loss: 0.10393037796020507
epoch: 35 training_loss 0.10101365938782692 test_loss: 0.08775542378425598
epoch: 36 training_loss 0.09111174097284674 test_loss: 0.09789320826530457
epoch: 37 training_loss 0.09717968475073575 test_loss: 0.09027864933013915
epoch: 38 training_loss 0.09943664031103254 test_loss: 0.086788409948349
epoch: 39 training_loss 0.09385990329086781 test_loss: 0.07935482263565063
epoch: 40 training_loss 0.09279240753501654 test_loss: 0.10254662036895752
epoch: 41 training_loss 0.10256446873769164 test_loss: 0.10459121465682983
epoch: 42 training_loss 0.10096485882997513 test_loss: 0.09651890993118287
epoch: 43 training_loss 0.09776500213891268 test_loss: 0.10927214622497558
epoch: 44 training_loss 0.09398817738518118 test_loss: 0.09091922640800476
epoch: 45 training_loss 0.08998565779998899 test_loss: 0.0835183322429657
epoch: 46 training_loss 0.08915688682347536 test_loss: 0.0852582335472107
epoch: 47 training_loss 0.08751224119216204 test_loss: 0.09835669994354249
epoch: 48 training_loss 0.08347960850223898 test_loss: 0.1049604058265686
epoch: 49 training_loss 0.089558350648731 test_loss: 0.08756864070892334
epoch: 50 training_loss 0.09409284647554159 test_loss: 0.09578520059585571
epoch: 51 training_loss 0.09425393499433994 test_loss: 0.10513415336608886
epoch: 52 training_loss 0.0922692620754242 test_loss: 0.09860090017318726
epoch: 53 training_loss 0.0973080636560917 test_loss: 0.10662362575531006
epoch: 54 training_loss 0.09330241162329912 test_loss: 0.08423993587493897
epoch: 55 training_loss 0.093486048579216 test_loss: 0.07949679493904113
epoch: 56 training_loss 0.09143294677138329 test_loss: 0.09199382662773133
epoch: 57 training_loss 0.09541959587484598 test_loss: 0.08996435403823852
epoch: 58 training_loss 0.09528424805030226 test_loss: 0.10416966676712036
epoch: 59 training_loss 0.09080523032695055 test_loss: 0.09428614377975464
epoch: 60 training_loss 0.09104224242269993 test_loss: 0.07554959058761597
epoch: 61 training_loss 0.095492208506912 test_loss: 0.08988693356513977
epoch: 62 training_loss 0.09166227219626308 test_loss: 0.09438441395759582
epoch: 63 training_loss 0.09327535226941108 test_loss: 0.09365625977516175
epoch: 64 training_loss 0.09782980769872665 test_loss: 0.08911094069480896
epoch: 65 training_loss 0.09245962485671043 test_loss: 0.10972737073898316
epoch: 66 training_loss 0.0861852528527379 test_loss: 0.09630582928657531
epoch: 67 training_loss 0.09286574942991138 test_loss: 0.08901814222335816
epoch: 68 training_loss 0.09894354987889528 test_loss: 0.10366657972335816
epoch: 69 training_loss 0.09796573668718338 test_loss: 0.07917912602424622
epoch: 70 training_loss 0.09681450251489877 test_loss: 0.0918757140636444
epoch: 71 training_loss 0.09341133141890168 test_loss: 0.09821484684944153
epoch: 72 training_loss 0.08724336786195636 test_loss: 0.098161381483078
epoch: 73 training_loss 0.09303345307707786 test_loss: 0.08602938652038575
epoch: 74 training_loss 0.08557578217238188 test_loss: 0.09294854402542115
epoch: 75 training_loss 0.0943371950276196 test_loss: 0.1041688084602356
epoch: 76 training_loss 0.0943776666559279 test_loss: 0.09084190726280213
epoch: 77 training_loss 0.09163612402975559 test_loss: 0.0865972876548767
epoch: 78 training_loss 0.09481215478852391 test_loss: 0.08808295130729675
epoch: 79 training_loss 0.09297764779999852 test_loss: 0.0931837022304535
epoch: 80 training_loss 0.08960033485665918 test_loss: 0.1024286389350891
epoch: 81 training_loss 0.09224353739991784 test_loss: 0.10225629806518555
epoch: 82 training_loss 0.08865519981831312 test_loss: 0.08924705982208252
epoch: 83 training_loss 0.08897963900119066 test_loss: 0.10522586107254028
epoch: 84 training_loss 0.08629355531185866 test_loss: 0.0926167607307434
epoch: 85 training_loss 0.0902092656120658 test_loss: 0.08771276473999023
epoch: 86 training_loss 0.09247751105576754 test_loss: 0.09507874846458435
epoch: 87 training_loss 0.09180124398320913 test_loss: 0.09938648343086243
epoch: 88 training_loss 0.09263918470591306 test_loss: 0.08692445755004882
epoch: 89 training_loss 0.08076591992750765 test_loss: 0.08172656893730164
epoch: 90 training_loss 0.08735811395570636 test_loss: 0.08941349387168884
epoch: 91 training_loss 0.08817594738677144 test_loss: 0.08927984833717346
epoch: 92 training_loss 0.0942415518220514 test_loss: 0.09436176419258117
epoch: 93 training_loss 0.09654452316462994 test_loss: 0.08916860818862915
epoch: 94 training_loss 0.08992743253707886 test_loss: 0.09050394296646118
epoch: 95 training_loss 0.0904875280894339 test_loss: 0.09740893244743347
epoch: 96 training_loss 0.08103210385423154 test_loss: 0.07956560254096985
epoch: 97 training_loss 0.08434796221554279 test_loss: 0.09585239887237548
epoch: 98 training_loss 0.09452054396271706 test_loss: 0.10230343341827393
epoch: 99 training_loss 0.0843783581070602 test_loss: 0.09436972141265869
epoch: 100 training_loss 0.08974799375981092 test_loss: 0.08336138129234313
epoch: 101 training_loss 0.09223157214000821 test_loss: 0.07913272380828858
epoch: 102 training_loss 0.08889887472614647 test_loss: 0.07363370656967164
epoch: 103 training_loss 0.09268472323194146 test_loss: 0.09156914353370667
epoch: 104 training_loss 0.09062915498390794 test_loss: 0.09090410470962525
epoch: 105 training_loss 0.09118794552981853 test_loss: 0.09131630659103393
epoch: 106 training_loss 0.09669745326042176 test_loss: 0.08634054660797119
epoch: 107 training_loss 0.08747067018412054 test_loss: 0.08277698159217835
epoch: 108 training_loss 0.0911392289865762 test_loss: 0.09069355130195618
epoch: 109 training_loss 0.08731332831084729 test_loss: 0.10198625326156616
epoch: 110 training_loss 0.0901077539473772 test_loss: 0.08419315218925476
epoch: 111 training_loss 0.09466553959995508 test_loss: 0.08606464266777039
epoch: 112 training_loss 0.09212089285254478 test_loss: 0.09257006645202637
epoch: 113 training_loss 0.0975157991424203 test_loss: 0.09378433823585511
epoch: 114 training_loss 0.08732542663812637 test_loss: 0.08185568451881409
epoch: 115 training_loss 0.08634341614320874 test_loss: 0.07306544184684753
epoch: 116 training_loss 0.08703364677727222 test_loss: 0.08986626863479615
epoch: 117 training_loss 0.09188451133668422 test_loss: 0.09415609836578369
epoch: 118 training_loss 0.08230707202106714 test_loss: 0.08137463927268981
epoch: 119 training_loss 0.08591008787974715 test_loss: 0.08709348440170288
epoch: 120 training_loss 0.09815076328814029 test_loss: 0.09112393856048584
epoch: 121 training_loss 0.09823415014892817 test_loss: 0.09656955599784851
epoch: 122 training_loss 0.0893506853096187 test_loss: 0.09261321425437927
epoch: 123 training_loss 0.08525356644764542 test_loss: 0.08421460390090943
epoch: 124 training_loss 0.0867696787416935 test_loss: 0.0897923767566681
epoch: 125 training_loss 0.08563771389424801 test_loss: 0.099055415391922
epoch: 126 training_loss 0.08440817728638648 test_loss: 0.1019974708557129
epoch: 127 training_loss 0.09327587265521288 test_loss: 0.09528849124908448
epoch: 128 training_loss 0.09004308549687266 test_loss: 0.08979581594467163
epoch: 129 training_loss 0.09047330936416983 test_loss: 0.09588865041732789
epoch: 130 training_loss 0.09419574527069927 test_loss: 0.1021417498588562
epoch: 131 training_loss 0.08909364959225058 test_loss: 0.11490380764007568
epoch: 132 training_loss 0.08454088118858635 test_loss: 0.0892064094543457
epoch: 133 training_loss 0.08831528760492802 test_loss: 0.09331610798835754
epoch: 134 training_loss 0.09022321848198772 test_loss: 0.0898266851902008
epoch: 135 training_loss 0.08475502733141184 test_loss: 0.09121471047401428
epoch: 136 training_loss 0.08788436688482762 test_loss: 0.09671797156333924
epoch: 137 training_loss 0.09113358207046986 test_loss: 0.09774957299232483
epoch: 138 training_loss 0.09183060586452484 test_loss: 0.1028399109840393
epoch: 139 training_loss 0.08381108086556197 test_loss: 0.08986056447029114
epoch: 140 training_loss 0.08548496827483178 test_loss: 0.08658174276351929
epoch: 141 training_loss 0.08624092789366841 test_loss: 0.08881900906562805
epoch: 142 training_loss 0.08800798557698726 test_loss: 0.0993896782398224
epoch: 143 training_loss 0.08765493125654757 test_loss: 0.09162741303443908
epoch: 144 training_loss 0.09118609244003892 test_loss: 0.08997944593429566
epoch: 145 training_loss 0.09047230053693056 test_loss: 0.1059725522994995
epoch: 146 training_loss 0.09533509075641632 test_loss: 0.09012545943260193
epoch: 147 training_loss 0.08960669230669736 test_loss: 0.08165771961212158
epoch: 148 training_loss 0.0855436485633254 test_loss: 0.08418323397636414
epoch: 149 training_loss 0.0919105682335794 test_loss: 0.10080758333206177
epoch: 0 training_loss 40.500985622406006 test_loss: 20.088214111328124
epoch: 1 training_loss 16.10257445335388 test_loss: 14.345098876953125
epoch: 2 training_loss 12.433020324707032 test_loss: 11.487471771240234
epoch: 3 training_loss 10.635036125183106 test_loss: 10.150682067871093
epoch: 4 training_loss 9.267158503532409 test_loss: 9.15039520263672
epoch: 5 training_loss 8.529129614830017 test_loss: 8.127470397949219
epoch: 6 training_loss 7.765504117012024 test_loss: 7.7835693359375
epoch: 7 training_loss 7.148015518188476 test_loss: 7.696199035644531
epoch: 8 training_loss 6.977457304000854 test_loss: 6.9553077697753904
epoch: 9 training_loss 6.627969460487366 test_loss: 6.382412338256836
epoch: 10 training_loss 6.225851111412048 test_loss: 6.1699787139892575
epoch: 11 training_loss 5.9331944990158085 test_loss: 5.994601821899414
epoch: 12 training_loss 5.6693237686157225 test_loss: 5.491009902954102
epoch: 13 training_loss 5.529380102157592 test_loss: 5.479497909545898
epoch: 14 training_loss 5.321145172119141 test_loss: 5.348930358886719
epoch: 15 training_loss 5.095221498012543 test_loss: 4.90556755065918
epoch: 16 training_loss 4.836467108726501 test_loss: 4.801807022094726
epoch: 17 training_loss 4.813475017547607 test_loss: 4.708683776855469
epoch: 18 training_loss 4.633869509696961 test_loss: 4.83838005065918
epoch: 19 training_loss 4.467292454242706 test_loss: 4.488574981689453
epoch: 20 training_loss 4.267163314819336 test_loss: 4.454416275024414
epoch: 21 training_loss 4.272891602516174 test_loss: 4.462484359741211
epoch: 22 training_loss 4.239223713874817 test_loss: 4.246758270263672
epoch: 23 training_loss 4.030659060478211 test_loss: 4.289285659790039
epoch: 24 training_loss 4.075036823749542 test_loss: 4.149597930908203
epoch: 25 training_loss 3.977993495464325 test_loss: 3.980486297607422
epoch: 26 training_loss 3.889165549278259 test_loss: 3.908544921875
epoch: 27 training_loss 3.7856311058998107 test_loss: 3.963740921020508
epoch: 28 training_loss 3.877747540473938 test_loss: 3.866395950317383
epoch: 29 training_loss 3.860930309295654 test_loss: 3.7263771057128907
epoch: 30 training_loss 3.672389848232269 test_loss: 3.7088672637939455
epoch: 31 training_loss 3.7565037655830382 test_loss: 3.5610355377197265
epoch: 32 training_loss 3.582517075538635 test_loss: 3.680906295776367
epoch: 33 training_loss 3.526234951019287 test_loss: 3.479663848876953
epoch: 34 training_loss 3.636597013473511 test_loss: 3.672413635253906
epoch: 35 training_loss 3.555428111553192 test_loss: 3.404063034057617
epoch: 36 training_loss 3.491380903720856 test_loss: 3.346995162963867
epoch: 37 training_loss 3.4275639033317566 test_loss: 3.560160827636719
epoch: 38 training_loss 3.4061804461479186 test_loss: 3.5127613067626955
epoch: 39 training_loss 3.33167608499527 test_loss: 3.302585220336914
epoch: 40 training_loss 3.284160068035126 test_loss: 3.3129241943359373
epoch: 41 training_loss 3.293718683719635 test_loss: 3.446119689941406
epoch: 42 training_loss 3.2482606840133665 test_loss: 3.209562301635742
epoch: 43 training_loss 3.242221291065216 test_loss: 3.171850395202637
epoch: 44 training_loss 3.2115295362472533 test_loss: 3.2818599700927735
epoch: 45 training_loss 3.201748540401459 test_loss: 3.1561485290527345
epoch: 46 training_loss 3.103421585559845 test_loss: 3.224840545654297
epoch: 47 training_loss 3.126432173252106 test_loss: 3.1380502700805666
epoch: 48 training_loss 3.0932356023788454 test_loss: 3.0431236267089843
epoch: 49 training_loss 3.1455025005340578 test_loss: 3.19085578918457
epoch: 50 training_loss 3.0454964208602906 test_loss: 3.096394729614258
epoch: 51 training_loss 2.9744616842269895 test_loss: 3.012648582458496
epoch: 52 training_loss 3.048657989501953 test_loss: 2.948345947265625
epoch: 53 training_loss 3.002262144088745 test_loss: 3.1578054428100586
epoch: 54 training_loss 2.9830714321136473 test_loss: 3.0733070373535156
epoch: 55 training_loss 2.977980797290802 test_loss: 3.1131540298461915
epoch: 56 training_loss 2.909360632896423 test_loss: 2.904197120666504
epoch: 57 training_loss 2.9240869212150575 test_loss: 2.970204162597656
epoch: 58 training_loss 2.8695571541786196 test_loss: 2.923067092895508
epoch: 59 training_loss 2.85224942445755 test_loss: 2.913694381713867
epoch: 60 training_loss 2.8985629510879516 test_loss: 2.762301445007324
epoch: 61 training_loss 2.813389234542847 test_loss: 2.9142889022827148
epoch: 62 training_loss 2.9168467831611635 test_loss: 3.065316581726074
epoch: 63 training_loss 2.8742432045936583 test_loss: 2.9663740158081056
epoch: 64 training_loss 2.865144202709198 test_loss: 2.97816162109375
epoch: 65 training_loss 2.797776527404785 test_loss: 2.7669172286987305
epoch: 66 training_loss 2.801362838745117 test_loss: 2.8927286148071287
epoch: 67 training_loss 2.7420541572570802 test_loss: 2.8001781463623048
epoch: 68 training_loss 2.7181598925590515 test_loss: 2.756944274902344
epoch: 69 training_loss 2.714599697589874 test_loss: 2.8023183822631834
epoch: 70 training_loss 2.772506392002106 test_loss: 2.7785724639892577
epoch: 71 training_loss 2.7582867670059206 test_loss: 2.9374357223510743
epoch: 72 training_loss 2.7066610217094422 test_loss: 2.9224620819091798
epoch: 73 training_loss 2.737152293920517 test_loss: 2.8629432678222657
epoch: 74 training_loss 2.6510077333450317 test_loss: 2.755470085144043
epoch: 75 training_loss 2.6674676895141602 test_loss: 2.7880790710449217
epoch: 76 training_loss 2.703030366897583 test_loss: 2.661520576477051
epoch: 77 training_loss 2.6314760458469393 test_loss: 2.743386077880859
epoch: 78 training_loss 2.703639268875122 test_loss: 2.8607948303222654
epoch: 79 training_loss 2.6362749004364012 test_loss: 2.5920042037963866
epoch: 80 training_loss 2.680231328010559 test_loss: 2.681622314453125
epoch: 81 training_loss 2.655386571884155 test_loss: 2.7305837631225587
epoch: 82 training_loss 2.62049067735672 test_loss: 2.74300479888916
epoch: 83 training_loss 2.5998840069770814 test_loss: 2.632895660400391
epoch: 84 training_loss 2.566095918416977 test_loss: 2.6524276733398438
epoch: 85 training_loss 2.586715157032013 test_loss: 2.7974712371826174
epoch: 86 training_loss 2.5773977971076967 test_loss: 2.7306234359741213
epoch: 87 training_loss 2.560715022087097 test_loss: 2.6093889236450196
epoch: 88 training_loss 2.5174766075611115 test_loss: 2.6991647720336913
epoch: 89 training_loss 2.5577839648723604 test_loss: 2.5289262771606444
epoch: 90 training_loss 2.540423387289047 test_loss: 2.6994098663330077
epoch: 91 training_loss 2.506032257080078 test_loss: 2.4864696502685546
epoch: 92 training_loss 2.5411359226703643 test_loss: 2.5556108474731447
epoch: 93 training_loss 2.4829652762413024 test_loss: 2.562920570373535
epoch: 94 training_loss 2.503316251039505 test_loss: 2.621987152099609
epoch: 95 training_loss 2.5505013501644136 test_loss: 2.6328720092773437
epoch: 96 training_loss 2.5167981433868407 test_loss: 2.475596046447754
epoch: 97 training_loss 2.477924565076828 test_loss: 2.5879180908203123
epoch: 98 training_loss 2.429904707670212 test_loss: 2.478870964050293
epoch: 99 training_loss 2.5164969897270204 test_loss: 2.4901857376098633
epoch: 100 training_loss 2.4613716161251067 test_loss: 2.5101747512817383
epoch: 101 training_loss 2.432438780069351 test_loss: 2.573989677429199
epoch: 102 training_loss 2.4142866039276125 test_loss: 2.468650245666504
epoch: 103 training_loss 2.4836178505420685 test_loss: 2.7018022537231445
epoch: 104 training_loss 2.483851979970932 test_loss: 2.4278125762939453
epoch: 105 training_loss 2.4613523852825163 test_loss: 2.5789215087890627
epoch: 106 training_loss 2.4339024567604066 test_loss: 2.433754539489746
epoch: 107 training_loss 2.3950010895729066 test_loss: 2.3799415588378907
epoch: 108 training_loss 2.3974718487262727 test_loss: 2.5686174392700196
epoch: 109 training_loss 2.3677675771713256 test_loss: 2.4087696075439453
epoch: 110 training_loss 2.3775695860385895 test_loss: 2.4306919097900392
epoch: 111 training_loss 2.3546607208251955 test_loss: 2.4002239227294924
epoch: 112 training_loss 2.3748133897781374 test_loss: 2.5087970733642577
epoch: 113 training_loss 2.3651643550395964 test_loss: 2.467608642578125
epoch: 114 training_loss 2.384555492401123 test_loss: 2.5270307540893553
epoch: 115 training_loss 2.3431820178031924 test_loss: 2.383530044555664
epoch: 116 training_loss 2.3447879445552826 test_loss: 2.4194740295410155
epoch: 117 training_loss 2.342523753643036 test_loss: 2.3623111724853514
epoch: 118 training_loss 2.393525918722153 test_loss: 2.5243358612060547
epoch: 119 training_loss 2.3892121398448944 test_loss: 2.430041694641113
epoch: 120 training_loss 2.371021648645401 test_loss: 2.499466323852539
epoch: 121 training_loss 2.2852911245822907 test_loss: 2.303683853149414
epoch: 122 training_loss 2.3448061501979827 test_loss: 2.414558982849121
epoch: 123 training_loss 2.3251343464851377 test_loss: 2.3169748306274416
epoch: 124 training_loss 2.229464718103409 test_loss: 2.384733772277832
epoch: 125 training_loss 2.262483880519867 test_loss: 2.49974365234375
epoch: 126 training_loss 2.244277263879776 test_loss: 2.29110164642334
epoch: 127 training_loss 2.276276842355728 test_loss: 2.2981594085693358
epoch: 128 training_loss 2.3264203023910524 test_loss: 2.309101867675781
epoch: 129 training_loss 2.2501010584831236 test_loss: 2.2998538970947267
epoch: 130 training_loss 2.2728249394893645 test_loss: 2.3125003814697265
epoch: 131 training_loss 2.2693900895118713 test_loss: 2.2503761291503905
epoch: 132 training_loss 2.29671967625618 test_loss: 2.299026679992676
epoch: 133 training_loss 2.2264756143093107 test_loss: 2.338368797302246
epoch: 134 training_loss 2.275784727334976 test_loss: 2.329891586303711
epoch: 135 training_loss 2.2699049806594847 test_loss: 2.2274143218994142
epoch: 136 training_loss 2.2492640137672426 test_loss: 2.2514602661132814
epoch: 137 training_loss 2.236686315536499 test_loss: 2.343493843078613
epoch: 138 training_loss 2.249454061985016 test_loss: 2.3909467697143554
epoch: 139 training_loss 2.271782455444336 test_loss: 2.334775924682617
epoch: 140 training_loss 2.243594514131546 test_loss: 2.27279109954834
epoch: 141 training_loss 2.2751313078403474 test_loss: 2.2638427734375
epoch: 142 training_loss 2.258639245033264 test_loss: 2.2030450820922853
epoch: 143 training_loss 2.2761617815494537 test_loss: 2.2821277618408202
epoch: 144 training_loss 2.250048485994339 test_loss: 2.3269927978515623
epoch: 145 training_loss 2.2043617379665377 test_loss: 2.3580398559570312
epoch: 146 training_loss 2.1830187010765076 test_loss: 2.2832225799560546
epoch: 147 training_loss 2.1969626116752625 test_loss: 2.240349006652832
epoch: 148 training_loss 2.230733097791672 test_loss: 2.231920623779297
epoch: 149 training_loss 2.243065083026886 test_loss: 2.2985605239868163
2590.5123450753313
episode: 0 training return: tensor(219.2835, device='cuda:0')
episode: 1 training return: tensor(331.8352, device='cuda:0')
episode: 2 training return: tensor(246.7520, device='cuda:0')
episode: 3 training return: tensor(237.5511, device='cuda:0')
epoch: 1 test_true_pfm: 3171.753168973842 sim_pfm: 5.934748184401542
episode: 4 training return: tensor(-355.5465, device='cuda:0')
episode: 5 training return: tensor(-232.5619, device='cuda:0')
episode: 6 training return: tensor(303.4510, device='cuda:0')
episode: 7 training return: tensor(85.5300, device='cuda:0')
epoch: 2 test_true_pfm: 2443.035739689814 sim_pfm: 154.03775064089373
episode: 8 training return: tensor(275.7093, device='cuda:0')
episode: 9 training return: tensor(-41.2123, device='cuda:0')
episode: 10 training return: tensor(135.4515, device='cuda:0')
episode: 11 training return: tensor(163.4107, device='cuda:0')
epoch: 3 test_true_pfm: 3013.3784450133376 sim_pfm: 315.8106339616352
episode: 12 training return: tensor(91.8666, device='cuda:0')
episode: 13 training return: tensor(-30.3339, device='cuda:0')
episode: 14 training return: tensor(251.0524, device='cuda:0')
episode: 15 training return: tensor(289.9401, device='cuda:0')
epoch: 4 test_true_pfm: 3384.1121956796014 sim_pfm: 78.55010479176417
episode: 16 training return: tensor(357.6428, device='cuda:0')
episode: 17 training return: tensor(311.7797, device='cuda:0')
episode: 18 training return: tensor(101.9660, device='cuda:0')
episode: 19 training return: tensor(171.5554, device='cuda:0')
epoch: 5 test_true_pfm: 2619.6709017973703 sim_pfm: 177.88634150686752
episode: 20 training return: tensor(322.2833, device='cuda:0')
episode: 21 training return: tensor(297.3394, device='cuda:0')
episode: 22 training return: tensor(306.7374, device='cuda:0')
episode: 23 training return: tensor(303.3810, device='cuda:0')
epoch: 6 test_true_pfm: 2885.541022300404 sim_pfm: 225.2431118717941
episode: 24 training return: tensor(292.1627, device='cuda:0')
episode: 25 training return: tensor(-128.4195, device='cuda:0')
episode: 26 training return: tensor(-15.8437, device='cuda:0')
episode: 27 training return: tensor(209.1396, device='cuda:0')
epoch: 7 test_true_pfm: 3349.623921392845 sim_pfm: 156.53556082559712
episode: 28 training return: tensor(348.4381, device='cuda:0')
episode: 29 training return: tensor(308.2783, device='cuda:0')
episode: 30 training return: tensor(112.8198, device='cuda:0')
episode: 31 training return: tensor(350.3163, device='cuda:0')
epoch: 8 test_true_pfm: 2976.957926433008 sim_pfm: 264.6381168405448
episode: 32 training return: tensor(-107.9814, device='cuda:0')
episode: 33 training return: tensor(-108.4548, device='cuda:0')
episode: 34 training return: tensor(-45.4275, device='cuda:0')
episode: 35 training return: tensor(310.8751, device='cuda:0')
epoch: 9 test_true_pfm: 3436.1407155872153 sim_pfm: 227.4023610597748
episode: 36 training return: tensor(10.1855, device='cuda:0')
episode: 37 training return: tensor(192.3954, device='cuda:0')
episode: 38 training return: tensor(327.7525, device='cuda:0')
episode: 39 training return: tensor(323.8171, device='cuda:0')
epoch: 10 test_true_pfm: 3087.58906063924 sim_pfm: 277.4523751719001
episode: 40 training return: tensor(-18.5435, device='cuda:0')
episode: 41 training return: tensor(275.2330, device='cuda:0')
episode: 42 training return: tensor(320.4778, device='cuda:0')
episode: 43 training return: tensor(262.0430, device='cuda:0')
epoch: 11 test_true_pfm: 3413.1948392460254 sim_pfm: 118.41722090552987
episode: 44 training return: tensor(300.2153, device='cuda:0')
episode: 45 training return: tensor(262.1387, device='cuda:0')
episode: 46 training return: tensor(335.7681, device='cuda:0')
episode: 47 training return: tensor(307.5487, device='cuda:0')
epoch: 12 test_true_pfm: 2390.575772630554 sim_pfm: 223.76503728539683
episode: 48 training return: tensor(359.1125, device='cuda:0')
episode: 49 training return: tensor(154.5642, device='cuda:0')
episode: 50 training return: tensor(379.4610, device='cuda:0')
episode: 51 training return: tensor(289.4302, device='cuda:0')
epoch: 13 test_true_pfm: 3320.265002153538 sim_pfm: 298.9745964999699
episode: 52 training return: tensor(287.7812, device='cuda:0')
episode: 53 training return: tensor(290.9340, device='cuda:0')
episode: 54 training return: tensor(277.3914, device='cuda:0')
episode: 55 training return: tensor(254.9091, device='cuda:0')
epoch: 14 test_true_pfm: 3084.638621975921 sim_pfm: 294.2151965156663
episode: 56 training return: tensor(-134.1099, device='cuda:0')
episode: 57 training return: tensor(246.4318, device='cuda:0')
episode: 58 training return: tensor(298.4922, device='cuda:0')
episode: 59 training return: tensor(-159.8035, device='cuda:0')
epoch: 15 test_true_pfm: 3246.4287495473313 sim_pfm: 174.6467244781476
episode: 60 training return: tensor(-104.0775, device='cuda:0')
episode: 61 training return: tensor(341.2839, device='cuda:0')
episode: 62 training return: tensor(350.2668, device='cuda:0')
episode: 63 training return: tensor(53.5038, device='cuda:0')
epoch: 16 test_true_pfm: 3212.1184723261563 sim_pfm: 167.41543478998938
episode: 64 training return: tensor(242.1554, device='cuda:0')
episode: 65 training return: tensor(393.6415, device='cuda:0')
episode: 66 training return: tensor(-81.6332, device='cuda:0')
episode: 67 training return: tensor(303.2010, device='cuda:0')
epoch: 17 test_true_pfm: 3258.0262124290916 sim_pfm: 159.31776802516347
episode: 68 training return: tensor(54.5275, device='cuda:0')
episode: 69 training return: tensor(321.6935, device='cuda:0')
episode: 70 training return: tensor(326.8955, device='cuda:0')
episode: 71 training return: tensor(211.8811, device='cuda:0')
epoch: 18 test_true_pfm: 3108.2893502167476 sim_pfm: 272.38276564330835
episode: 72 training return: tensor(287.5602, device='cuda:0')
episode: 73 training return: tensor(68.3395, device='cuda:0')
episode: 74 training return: tensor(266.2639, device='cuda:0')
episode: 75 training return: tensor(320.8606, device='cuda:0')
epoch: 19 test_true_pfm: 3379.7753939003937 sim_pfm: 268.6852725125306
episode: 76 training return: tensor(-312.5304, device='cuda:0')
episode: 77 training return: tensor(108.6595, device='cuda:0')
episode: 78 training return: tensor(256.8308, device='cuda:0')
episode: 79 training return: tensor(291.9051, device='cuda:0')
epoch: 20 test_true_pfm: 3238.0729450949825 sim_pfm: 292.5121812656483
episode: 80 training return: tensor(314.1262, device='cuda:0')
episode: 81 training return: tensor(308.2302, device='cuda:0')
episode: 82 training return: tensor(295.9529, device='cuda:0')
episode: 83 training return: tensor(288.4486, device='cuda:0')
epoch: 21 test_true_pfm: 3412.8519983054543 sim_pfm: 299.21073184447596
episode: 84 training return: tensor(382.0698, device='cuda:0')
episode: 85 training return: tensor(319.8460, device='cuda:0')
episode: 86 training return: tensor(326.7698, device='cuda:0')
episode: 87 training return: tensor(206.2119, device='cuda:0')
epoch: 22 test_true_pfm: 3119.870604700835 sim_pfm: 221.44048063957598
episode: 88 training return: tensor(244.4902, device='cuda:0')
episode: 89 training return: tensor(12.3251, device='cuda:0')
episode: 90 training return: tensor(349.9280, device='cuda:0')
episode: 91 training return: tensor(144.0145, device='cuda:0')
epoch: 23 test_true_pfm: 3377.248982266872 sim_pfm: 281.2394926943816
episode: 92 training return: tensor(253.9460, device='cuda:0')
episode: 93 training return: tensor(381.2448, device='cuda:0')
episode: 94 training return: tensor(337.6815, device='cuda:0')
episode: 95 training return: tensor(304.5330, device='cuda:0')
epoch: 24 test_true_pfm: 3044.4742890515245 sim_pfm: 202.44186575895097
episode: 96 training return: tensor(-3.0025, device='cuda:0')
episode: 97 training return: tensor(280.1888, device='cuda:0')
episode: 98 training return: tensor(323.1756, device='cuda:0')
episode: 99 training return: tensor(319.8983, device='cuda:0')
epoch: 25 test_true_pfm: 3352.214646501632 sim_pfm: 287.53719153361936
episode: 100 training return: tensor(290.0301, device='cuda:0')
episode: 101 training return: tensor(280.4355, device='cuda:0')
episode: 102 training return: tensor(291.0779, device='cuda:0')
episode: 103 training return: tensor(358.1901, device='cuda:0')
epoch: 26 test_true_pfm: 3404.368931699099 sim_pfm: 299.60053859187366
episode: 104 training return: tensor(124.4472, device='cuda:0')
episode: 105 training return: tensor(246.0416, device='cuda:0')
episode: 106 training return: tensor(259.0313, device='cuda:0')
episode: 107 training return: tensor(369.5431, device='cuda:0')
epoch: 27 test_true_pfm: 3410.2969138999492 sim_pfm: 276.74543791403994
episode: 108 training return: tensor(303.8255, device='cuda:0')
episode: 109 training return: tensor(308.7158, device='cuda:0')
episode: 110 training return: tensor(319.1070, device='cuda:0')
episode: 111 training return: tensor(302.3951, device='cuda:0')
epoch: 28 test_true_pfm: 3368.333232516617 sim_pfm: 341.4663194071812
episode: 112 training return: tensor(276.4904, device='cuda:0')
episode: 113 training return: tensor(-59.5056, device='cuda:0')
episode: 114 training return: tensor(239.3820, device='cuda:0')
episode: 115 training return: tensor(300.6373, device='cuda:0')
epoch: 29 test_true_pfm: 3126.487869103102 sim_pfm: 362.9251925012213
episode: 116 training return: tensor(344.0005, device='cuda:0')
episode: 117 training return: tensor(370.3998, device='cuda:0')
episode: 118 training return: tensor(360.5063, device='cuda:0')
episode: 119 training return: tensor(351.5383, device='cuda:0')
epoch: 30 test_true_pfm: 2898.05713731958 sim_pfm: 307.3976848014281
episode: 120 training return: tensor(302.8366, device='cuda:0')
episode: 121 training return: tensor(386.4597, device='cuda:0')
episode: 122 training return: tensor(317.1040, device='cuda:0')
episode: 123 training return: tensor(378.6568, device='cuda:0')
epoch: 31 test_true_pfm: 3456.450717110507 sim_pfm: 352.55687923100777
episode: 124 training return: tensor(65.6882, device='cuda:0')
episode: 125 training return: tensor(2.0040, device='cuda:0')
episode: 126 training return: tensor(383.5422, device='cuda:0')
episode: 127 training return: tensor(-74.7733, device='cuda:0')
epoch: 32 test_true_pfm: 3420.2400794334885 sim_pfm: 277.0489563618515
episode: 128 training return: tensor(258.9546, device='cuda:0')
episode: 129 training return: tensor(231.4229, device='cuda:0')
episode: 130 training return: tensor(276.9590, device='cuda:0')
episode: 131 training return: tensor(321.2009, device='cuda:0')
epoch: 33 test_true_pfm: 3386.677067960909 sim_pfm: 299.627878299468
episode: 132 training return: tensor(308.0226, device='cuda:0')
episode: 133 training return: tensor(343.2731, device='cuda:0')
episode: 134 training return: tensor(289.8483, device='cuda:0')
episode: 135 training return: tensor(329.4644, device='cuda:0')
epoch: 34 test_true_pfm: 3030.378097657244 sim_pfm: 223.33185526684005
episode: 136 training return: tensor(346.0154, device='cuda:0')
episode: 137 training return: tensor(323.9521, device='cuda:0')
episode: 138 training return: tensor(326.4007, device='cuda:0')
episode: 139 training return: tensor(339.8087, device='cuda:0')
epoch: 35 test_true_pfm: 3445.0017838831977 sim_pfm: 325.8571876020869
episode: 140 training return: tensor(333.0687, device='cuda:0')
episode: 141 training return: tensor(301.7862, device='cuda:0')
episode: 142 training return: tensor(286.2727, device='cuda:0')
episode: 143 training return: tensor(311.7133, device='cuda:0')
epoch: 36 test_true_pfm: 3378.7711504938948 sim_pfm: 345.46620132653817
episode: 144 training return: tensor(361.2057, device='cuda:0')
episode: 145 training return: tensor(304.9963, device='cuda:0')
episode: 146 training return: tensor(369.4286, device='cuda:0')
episode: 147 training return: tensor(319.1897, device='cuda:0')
epoch: 37 test_true_pfm: 3384.7854886325695 sim_pfm: 341.71508479925495
episode: 148 training return: tensor(321.6955, device='cuda:0')
episode: 149 training return: tensor(264.1573, device='cuda:0')
episode: 150 training return: tensor(230.7931, device='cuda:0')
episode: 151 training return: tensor(346.6029, device='cuda:0')
epoch: 38 test_true_pfm: 3445.6142987159506 sim_pfm: 334.24816591552616
episode: 152 training return: tensor(103.5207, device='cuda:0')
episode: 153 training return: tensor(380.9987, device='cuda:0')
episode: 154 training return: tensor(322.9291, device='cuda:0')
episode: 155 training return: tensor(340.7562, device='cuda:0')
epoch: 39 test_true_pfm: 3438.975555749848 sim_pfm: 309.7469380891028
episode: 156 training return: tensor(119.5756, device='cuda:0')
episode: 157 training return: tensor(287.4482, device='cuda:0')
episode: 158 training return: tensor(-124.7895, device='cuda:0')
episode: 159 training return: tensor(295.1777, device='cuda:0')
epoch: 40 test_true_pfm: 3444.03715426583 sim_pfm: 370.66754264447565
episode: 160 training return: tensor(323.0714, device='cuda:0')
episode: 161 training return: tensor(273.1857, device='cuda:0')
episode: 162 training return: tensor(361.8448, device='cuda:0')
episode: 163 training return: tensor(308.5738, device='cuda:0')
epoch: 41 test_true_pfm: 3417.10155887661 sim_pfm: 353.2459075320512
episode: 164 training return: tensor(121.7422, device='cuda:0')
episode: 165 training return: tensor(330.3737, device='cuda:0')
episode: 166 training return: tensor(312.6571, device='cuda:0')
episode: 167 training return: tensor(382.2584, device='cuda:0')
epoch: 42 test_true_pfm: 3407.866587101587 sim_pfm: 305.19109028911527
episode: 168 training return: tensor(293.6514, device='cuda:0')
episode: 169 training return: tensor(298.4742, device='cuda:0')
episode: 170 training return: tensor(310.2635, device='cuda:0')
episode: 171 training return: tensor(262.7399, device='cuda:0')
epoch: 43 test_true_pfm: 3499.1929790683553 sim_pfm: 331.3586348272317
episode: 172 training return: tensor(304.2303, device='cuda:0')
episode: 173 training return: tensor(282.2700, device='cuda:0')
episode: 174 training return: tensor(320.4636, device='cuda:0')
episode: 175 training return: tensor(318.0580, device='cuda:0')
epoch: 44 test_true_pfm: 3422.1327498729465 sim_pfm: 326.19635136879515
episode: 176 training return: tensor(309.0229, device='cuda:0')
episode: 177 training return: tensor(300.7477, device='cuda:0')
episode: 178 training return: tensor(348.5471, device='cuda:0')
episode: 179 training return: tensor(362.5547, device='cuda:0')
epoch: 45 test_true_pfm: 3022.867991196356 sim_pfm: 316.1366766691014
episode: 180 training return: tensor(9.2780, device='cuda:0')
episode: 181 training return: tensor(332.7754, device='cuda:0')
episode: 182 training return: tensor(339.1816, device='cuda:0')
episode: 183 training return: tensor(62.6456, device='cuda:0')
epoch: 46 test_true_pfm: 3479.0089556081307 sim_pfm: 347.64022895437665
episode: 184 training return: tensor(297.4128, device='cuda:0')
episode: 185 training return: tensor(344.7298, device='cuda:0')
episode: 186 training return: tensor(384.4715, device='cuda:0')
episode: 187 training return: tensor(257.4083, device='cuda:0')
epoch: 47 test_true_pfm: 3420.320458290194 sim_pfm: 276.30733411217807
episode: 188 training return: tensor(353.6550, device='cuda:0')
episode: 189 training return: tensor(341.3607, device='cuda:0')
episode: 190 training return: tensor(396.4050, device='cuda:0')
episode: 191 training return: tensor(333.0226, device='cuda:0')
epoch: 48 test_true_pfm: 3350.4703005538454 sim_pfm: 282.6220059439559
episode: 192 training return: tensor(332.3157, device='cuda:0')
episode: 193 training return: tensor(348.7384, device='cuda:0')
episode: 194 training return: tensor(273.1542, device='cuda:0')
episode: 195 training return: tensor(287.2920, device='cuda:0')
epoch: 49 test_true_pfm: 3386.384654128731 sim_pfm: 327.81052046323504
episode: 196 training return: tensor(410.3119, device='cuda:0')
episode: 197 training return: tensor(349.3159, device='cuda:0')
episode: 198 training return: tensor(315.9010, device='cuda:0')
episode: 199 training return: tensor(289.9023, device='cuda:0')
epoch: 50 test_true_pfm: 3393.9034301322317 sim_pfm: 323.91181160009
episode: 200 training return: tensor(339.0173, device='cuda:0')
episode: 201 training return: tensor(344.5295, device='cuda:0')
episode: 202 training return: tensor(267.9112, device='cuda:0')
episode: 203 training return: tensor(341.7343, device='cuda:0')
epoch: 51 test_true_pfm: 3377.366882605818 sim_pfm: 316.4860219777038
episode: 204 training return: tensor(264.4582, device='cuda:0')
episode: 205 training return: tensor(53.8221, device='cuda:0')
episode: 206 training return: tensor(361.7869, device='cuda:0')
episode: 207 training return: tensor(379.8726, device='cuda:0')
epoch: 52 test_true_pfm: 3384.8535947615987 sim_pfm: 346.6504185847977
episode: 208 training return: tensor(326.6578, device='cuda:0')
episode: 209 training return: tensor(342.6229, device='cuda:0')
episode: 210 training return: tensor(67.9597, device='cuda:0')
episode: 211 training return: tensor(-183.7585, device='cuda:0')
epoch: 53 test_true_pfm: 3396.668791612495 sim_pfm: 296.4705971703418
episode: 212 training return: tensor(290.5843, device='cuda:0')
episode: 213 training return: tensor(291.6361, device='cuda:0')
episode: 214 training return: tensor(269.9389, device='cuda:0')
episode: 215 training return: tensor(312.8005, device='cuda:0')
epoch: 54 test_true_pfm: 3429.2102912147416 sim_pfm: 309.5571389703546
episode: 216 training return: tensor(354.2612, device='cuda:0')
episode: 217 training return: tensor(328.1247, device='cuda:0')
episode: 218 training return: tensor(347.8419, device='cuda:0')
episode: 219 training return: tensor(274.3921, device='cuda:0')
epoch: 55 test_true_pfm: 3356.981259029155 sim_pfm: 305.56205958590726
episode: 220 training return: tensor(301.8694, device='cuda:0')
episode: 221 training return: tensor(333.1786, device='cuda:0')
episode: 222 training return: tensor(304.5123, device='cuda:0')
episode: 223 training return: tensor(330.6053, device='cuda:0')
epoch: 56 test_true_pfm: 3178.652286481802 sim_pfm: 223.72601020468088
episode: 224 training return: tensor(309.6891, device='cuda:0')
episode: 225 training return: tensor(128.9833, device='cuda:0')
episode: 226 training return: tensor(305.4130, device='cuda:0')
episode: 227 training return: tensor(296.0305, device='cuda:0')
epoch: 57 test_true_pfm: 3402.7992994512583 sim_pfm: 324.55869911189075
episode: 228 training return: tensor(353.5221, device='cuda:0')
episode: 229 training return: tensor(-51.5213, device='cuda:0')
episode: 230 training return: tensor(73.4769, device='cuda:0')
episode: 231 training return: tensor(326.5279, device='cuda:0')
epoch: 58 test_true_pfm: 3476.071585288426 sim_pfm: 264.97679565810057
episode: 232 training return: tensor(305.3091, device='cuda:0')
episode: 233 training return: tensor(309.3636, device='cuda:0')
episode: 234 training return: tensor(314.2200, device='cuda:0')
episode: 235 training return: tensor(79.2762, device='cuda:0')
epoch: 59 test_true_pfm: 3361.8430658518337 sim_pfm: 286.65007137076464
episode: 236 training return: tensor(328.1861, device='cuda:0')
episode: 237 training return: tensor(304.5404, device='cuda:0')
episode: 238 training return: tensor(393.4204, device='cuda:0')
episode: 239 training return: tensor(390.1279, device='cuda:0')
epoch: 60 test_true_pfm: 3335.062346171902 sim_pfm: 350.57942448505975
episode: 240 training return: tensor(331.4720, device='cuda:0')
episode: 241 training return: tensor(277.5259, device='cuda:0')
episode: 242 training return: tensor(297.2863, device='cuda:0')
episode: 243 training return: tensor(383.3976, device='cuda:0')
epoch: 61 test_true_pfm: 3328.247239986511 sim_pfm: 326.4314620633668
episode: 244 training return: tensor(311.4558, device='cuda:0')
episode: 245 training return: tensor(307.1721, device='cuda:0')
episode: 246 training return: tensor(308.7753, device='cuda:0')
episode: 247 training return: tensor(248.0295, device='cuda:0')
epoch: 62 test_true_pfm: 3485.4037454074164 sim_pfm: 342.92583813450375
episode: 248 training return: tensor(307.0178, device='cuda:0')
episode: 249 training return: tensor(342.8988, device='cuda:0')
episode: 250 training return: tensor(329.1852, device='cuda:0')
episode: 251 training return: tensor(270.2993, device='cuda:0')
epoch: 63 test_true_pfm: 3450.397651839698 sim_pfm: 301.8438498370718
episode: 252 training return: tensor(289.4847, device='cuda:0')
episode: 253 training return: tensor(328.7444, device='cuda:0')
episode: 254 training return: tensor(297.3815, device='cuda:0')
episode: 255 training return: tensor(310.7153, device='cuda:0')
epoch: 64 test_true_pfm: 3407.0323212023723 sim_pfm: 322.84066765968845
episode: 256 training return: tensor(290.2141, device='cuda:0')
episode: 257 training return: tensor(169.0655, device='cuda:0')
episode: 258 training return: tensor(319.1086, device='cuda:0')
episode: 259 training return: tensor(342.6132, device='cuda:0')
epoch: 65 test_true_pfm: 3099.9609828153357 sim_pfm: 319.09893233942176
episode: 260 training return: tensor(355.1223, device='cuda:0')
episode: 261 training return: tensor(249.6298, device='cuda:0')
episode: 262 training return: tensor(314.1636, device='cuda:0')
episode: 263 training return: tensor(315.8560, device='cuda:0')
epoch: 66 test_true_pfm: 3413.9348942770425 sim_pfm: 313.16511950220837
episode: 264 training return: tensor(306.8929, device='cuda:0')
episode: 265 training return: tensor(233.3563, device='cuda:0')
episode: 266 training return: tensor(267.7790, device='cuda:0')
episode: 267 training return: tensor(298.4066, device='cuda:0')
epoch: 67 test_true_pfm: 3373.3585806999295 sim_pfm: 102.69313595024869
episode: 268 training return: tensor(290.6529, device='cuda:0')
episode: 269 training return: tensor(328.6295, device='cuda:0')
episode: 270 training return: tensor(345.6423, device='cuda:0')
episode: 271 training return: tensor(290.8506, device='cuda:0')
epoch: 68 test_true_pfm: 3491.504671734227 sim_pfm: 325.67652067373274
episode: 272 training return: tensor(-77.8522, device='cuda:0')
episode: 273 training return: tensor(353.3193, device='cuda:0')
episode: 274 training return: tensor(379.3185, device='cuda:0')
episode: 275 training return: tensor(325.9514, device='cuda:0')
epoch: 69 test_true_pfm: 3441.6943456984313 sim_pfm: 298.26959604069515
episode: 276 training return: tensor(279.8398, device='cuda:0')
episode: 277 training return: tensor(354.5126, device='cuda:0')
episode: 278 training return: tensor(282.2003, device='cuda:0')
episode: 279 training return: tensor(319.0573, device='cuda:0')
epoch: 70 test_true_pfm: 3387.52704024866 sim_pfm: 331.2155778206264
episode: 280 training return: tensor(336.2162, device='cuda:0')
episode: 281 training return: tensor(-82.0748, device='cuda:0')
episode: 282 training return: tensor(257.6458, device='cuda:0')
episode: 283 training return: tensor(280.2587, device='cuda:0')
epoch: 71 test_true_pfm: 3399.4706223680623 sim_pfm: 330.24614423852955
episode: 284 training return: tensor(354.6530, device='cuda:0')
episode: 285 training return: tensor(312.1248, device='cuda:0')
episode: 286 training return: tensor(281.9015, device='cuda:0')
episode: 287 training return: tensor(282.8790, device='cuda:0')
epoch: 72 test_true_pfm: 3294.7554193992596 sim_pfm: 324.775688698069
episode: 288 training return: tensor(369.1965, device='cuda:0')
episode: 289 training return: tensor(353.1953, device='cuda:0')
episode: 290 training return: tensor(303.3982, device='cuda:0')
episode: 291 training return: tensor(240.2005, device='cuda:0')
epoch: 73 test_true_pfm: 3336.0844114101947 sim_pfm: 245.8976849980439
episode: 292 training return: tensor(70.0041, device='cuda:0')
episode: 293 training return: tensor(279.9165, device='cuda:0')
episode: 294 training return: tensor(-383.1530, device='cuda:0')
episode: 295 training return: tensor(312.8074, device='cuda:0')
epoch: 74 test_true_pfm: 3429.4882915083817 sim_pfm: 175.81097323657013
episode: 296 training return: tensor(324.0221, device='cuda:0')
episode: 297 training return: tensor(321.4079, device='cuda:0')
episode: 298 training return: tensor(313.9913, device='cuda:0')
episode: 299 training return: tensor(250.0763, device='cuda:0')
epoch: 75 test_true_pfm: 3366.420311909135 sim_pfm: 298.47484357087524
episode: 300 training return: tensor(313.2852, device='cuda:0')
episode: 301 training return: tensor(308.3712, device='cuda:0')
episode: 302 training return: tensor(283.2949, device='cuda:0')
episode: 303 training return: tensor(277.7218, device='cuda:0')
epoch: 76 test_true_pfm: 3435.3391928473807 sim_pfm: 261.44350464326754
episode: 304 training return: tensor(352.1542, device='cuda:0')
episode: 305 training return: tensor(305.3649, device='cuda:0')
episode: 306 training return: tensor(299.5850, device='cuda:0')
episode: 307 training return: tensor(305.9988, device='cuda:0')
epoch: 77 test_true_pfm: 3393.212405741477 sim_pfm: 309.16516234356095
episode: 308 training return: tensor(332.3089, device='cuda:0')
episode: 309 training return: tensor(330.8723, device='cuda:0')
episode: 310 training return: tensor(295.6649, device='cuda:0')
episode: 311 training return: tensor(364.2869, device='cuda:0')
epoch: 78 test_true_pfm: 3422.8972625888805 sim_pfm: 304.2500901055755
episode: 312 training return: tensor(297.8789, device='cuda:0')
episode: 313 training return: tensor(322.2552, device='cuda:0')
episode: 314 training return: tensor(297.7153, device='cuda:0')
episode: 315 training return: tensor(351.5229, device='cuda:0')
epoch: 79 test_true_pfm: 3470.6876795543344 sim_pfm: 324.35758230646024
episode: 316 training return: tensor(356.1162, device='cuda:0')
episode: 317 training return: tensor(358.3385, device='cuda:0')
episode: 318 training return: tensor(314.0495, device='cuda:0')
episode: 319 training return: tensor(298.6909, device='cuda:0')
epoch: 80 test_true_pfm: 3419.6749611342493 sim_pfm: 333.90568289146177
episode: 320 training return: tensor(295.3529, device='cuda:0')
episode: 321 training return: tensor(316.1856, device='cuda:0')
episode: 322 training return: tensor(255.7585, device='cuda:0')
episode: 323 training return: tensor(311.9020, device='cuda:0')
epoch: 81 test_true_pfm: 3368.06696241974 sim_pfm: 286.08753864178044
episode: 324 training return: tensor(298.6431, device='cuda:0')
episode: 325 training return: tensor(-22.1787, device='cuda:0')
episode: 326 training return: tensor(-54.7085, device='cuda:0')
episode: 327 training return: tensor(318.2428, device='cuda:0')
epoch: 82 test_true_pfm: 3481.4407381159094 sim_pfm: 307.2928444823289
episode: 328 training return: tensor(308.5939, device='cuda:0')
episode: 329 training return: tensor(348.2609, device='cuda:0')
episode: 330 training return: tensor(329.9576, device='cuda:0')
episode: 331 training return: tensor(281.4895, device='cuda:0')
epoch: 83 test_true_pfm: 3422.6826452589394 sim_pfm: 345.162541101318
episode: 332 training return: tensor(341.5168, device='cuda:0')
episode: 333 training return: tensor(396.2531, device='cuda:0')
episode: 334 training return: tensor(369.8133, device='cuda:0')
episode: 335 training return: tensor(-428.4383, device='cuda:0')
epoch: 84 test_true_pfm: 3378.443600255432 sim_pfm: 365.2296548113033
episode: 336 training return: tensor(20.5503, device='cuda:0')
episode: 337 training return: tensor(276.4933, device='cuda:0')
episode: 338 training return: tensor(366.7391, device='cuda:0')
episode: 339 training return: tensor(348.3586, device='cuda:0')
epoch: 85 test_true_pfm: 3319.8954050582056 sim_pfm: 340.6972981110448
episode: 340 training return: tensor(222.5994, device='cuda:0')
episode: 341 training return: tensor(287.7324, device='cuda:0')
episode: 342 training return: tensor(309.1078, device='cuda:0')
episode: 343 training return: tensor(202.7289, device='cuda:0')
epoch: 86 test_true_pfm: 3379.692677830853 sim_pfm: 342.19432958507485
episode: 344 training return: tensor(303.7791, device='cuda:0')
episode: 345 training return: tensor(326.1601, device='cuda:0')
episode: 346 training return: tensor(341.9265, device='cuda:0')
episode: 347 training return: tensor(361.7746, device='cuda:0')
epoch: 87 test_true_pfm: 3411.53405811385 sim_pfm: 322.3978058978876
episode: 348 training return: tensor(336.7588, device='cuda:0')
episode: 349 training return: tensor(-99.0168, device='cuda:0')
episode: 350 training return: tensor(295.9256, device='cuda:0')
episode: 351 training return: tensor(286.4368, device='cuda:0')
epoch: 88 test_true_pfm: 3418.1885441289105 sim_pfm: 339.45062227074715
episode: 352 training return: tensor(20.2164, device='cuda:0')
episode: 353 training return: tensor(256.7082, device='cuda:0')
episode: 354 training return: tensor(273.5778, device='cuda:0')
episode: 355 training return: tensor(318.7935, device='cuda:0')
epoch: 89 test_true_pfm: 3461.284783011402 sim_pfm: 267.98889513887116
episode: 356 training return: tensor(221.2874, device='cuda:0')
episode: 357 training return: tensor(351.2623, device='cuda:0')
episode: 358 training return: tensor(329.6740, device='cuda:0')
episode: 359 training return: tensor(326.6779, device='cuda:0')
epoch: 90 test_true_pfm: 3402.560428892128 sim_pfm: 299.31068014978274
episode: 360 training return: tensor(206.2582, device='cuda:0')
episode: 361 training return: tensor(343.5153, device='cuda:0')
episode: 362 training return: tensor(311.8703, device='cuda:0')
episode: 363 training return: tensor(247.1403, device='cuda:0')
epoch: 91 test_true_pfm: 3430.3846259765855 sim_pfm: 308.3208245615048
episode: 364 training return: tensor(389.5651, device='cuda:0')
episode: 365 training return: tensor(352.3316, device='cuda:0')
episode: 366 training return: tensor(374.4485, device='cuda:0')
episode: 367 training return: tensor(322.7650, device='cuda:0')
epoch: 92 test_true_pfm: 3402.5429408593495 sim_pfm: 397.92692360301345
episode: 368 training return: tensor(357.7812, device='cuda:0')
episode: 369 training return: tensor(334.3924, device='cuda:0')
episode: 370 training return: tensor(187.0918, device='cuda:0')
episode: 371 training return: tensor(363.1856, device='cuda:0')
epoch: 93 test_true_pfm: 3405.2394260218803 sim_pfm: 290.3998304938238
episode: 372 training return: tensor(283.2727, device='cuda:0')
episode: 373 training return: tensor(331.5063, device='cuda:0')
episode: 374 training return: tensor(316.0305, device='cuda:0')
episode: 375 training return: tensor(200.4171, device='cuda:0')
epoch: 94 test_true_pfm: 3373.966577143421 sim_pfm: 290.4723167270325
episode: 376 training return: tensor(304.4846, device='cuda:0')
episode: 377 training return: tensor(364.5092, device='cuda:0')
episode: 378 training return: tensor(269.2508, device='cuda:0')
episode: 379 training return: tensor(337.9035, device='cuda:0')
epoch: 95 test_true_pfm: 3392.3466863877934 sim_pfm: 333.6152954810823
episode: 380 training return: tensor(314.1713, device='cuda:0')
episode: 381 training return: tensor(344.0752, device='cuda:0')
episode: 382 training return: tensor(312.1227, device='cuda:0')
episode: 383 training return: tensor(23.2783, device='cuda:0')
epoch: 96 test_true_pfm: 3446.9131217643935 sim_pfm: 326.2970035325464
episode: 384 training return: tensor(316.6318, device='cuda:0')
episode: 385 training return: tensor(287.8490, device='cuda:0')
episode: 386 training return: tensor(328.0418, device='cuda:0')
episode: 387 training return: tensor(327.7710, device='cuda:0')
epoch: 97 test_true_pfm: 3407.640228805131 sim_pfm: 285.76664011068834
episode: 388 training return: tensor(299.5754, device='cuda:0')
episode: 389 training return: tensor(384.0709, device='cuda:0')
episode: 390 training return: tensor(304.0268, device='cuda:0')
episode: 391 training return: tensor(330.5916, device='cuda:0')
epoch: 98 test_true_pfm: 3417.1042729721735 sim_pfm: 306.8763243802823
episode: 392 training return: tensor(359.1289, device='cuda:0')
episode: 393 training return: tensor(339.5447, device='cuda:0')
episode: 394 training return: tensor(272.0148, device='cuda:0')
episode: 395 training return: tensor(303.7179, device='cuda:0')
epoch: 99 test_true_pfm: 3422.3960921550483 sim_pfm: 323.9365337005681
episode: 396 training return: tensor(362.8703, device='cuda:0')
episode: 397 training return: tensor(381.1285, device='cuda:0')
episode: 398 training return: tensor(303.3539, device='cuda:0')
episode: 399 training return: tensor(370.2921, device='cuda:0')
epoch: 100 test_true_pfm: 3428.0151035244594 sim_pfm: 329.4452641737492
episode: 400 training return: tensor(288.7872, device='cuda:0')
episode: 401 training return: tensor(277.2273, device='cuda:0')
episode: 402 training return: tensor(313.3363, device='cuda:0')
episode: 403 training return: tensor(333.4373, device='cuda:0')
epoch: 101 test_true_pfm: 3031.052206023826 sim_pfm: 348.455837218217
episode: 404 training return: tensor(333.9949, device='cuda:0')
episode: 405 training return: tensor(316.7114, device='cuda:0')
episode: 406 training return: tensor(303.0320, device='cuda:0')
episode: 407 training return: tensor(314.6851, device='cuda:0')
epoch: 102 test_true_pfm: 3449.345341899363 sim_pfm: 324.59162411217886
episode: 408 training return: tensor(280.1050, device='cuda:0')
episode: 409 training return: tensor(358.8359, device='cuda:0')
episode: 410 training return: tensor(293.9486, device='cuda:0')
episode: 411 training return: tensor(-19.2359, device='cuda:0')
epoch: 103 test_true_pfm: 3080.866433330744 sim_pfm: 335.84091090525425
episode: 412 training return: tensor(277.6935, device='cuda:0')
episode: 413 training return: tensor(243.9770, device='cuda:0')
episode: 414 training return: tensor(392.9486, device='cuda:0')
episode: 415 training return: tensor(385.1865, device='cuda:0')
epoch: 104 test_true_pfm: 3419.227394442605 sim_pfm: 322.8761747783671
episode: 416 training return: tensor(349.1832, device='cuda:0')
episode: 417 training return: tensor(316.8663, device='cuda:0')
episode: 418 training return: tensor(301.9366, device='cuda:0')
episode: 419 training return: tensor(355.5720, device='cuda:0')
epoch: 105 test_true_pfm: 3338.207518707828 sim_pfm: 306.3418453478953
episode: 420 training return: tensor(350.4688, device='cuda:0')
episode: 421 training return: tensor(337.1518, device='cuda:0')
episode: 422 training return: tensor(6.6903, device='cuda:0')
episode: 423 training return: tensor(342.9868, device='cuda:0')
epoch: 106 test_true_pfm: 3441.845797269005 sim_pfm: 280.05856723464484
episode: 424 training return: tensor(248.8290, device='cuda:0')
episode: 425 training return: tensor(304.2547, device='cuda:0')
episode: 426 training return: tensor(330.1397, device='cuda:0')
episode: 427 training return: tensor(-92.8483, device='cuda:0')
epoch: 107 test_true_pfm: 3439.2631084496147 sim_pfm: 311.53453396906843
episode: 428 training return: tensor(325.3354, device='cuda:0')
episode: 429 training return: tensor(352.1317, device='cuda:0')
episode: 430 training return: tensor(354.3200, device='cuda:0')
episode: 431 training return: tensor(326.9843, device='cuda:0')
epoch: 108 test_true_pfm: 3418.4868803514287 sim_pfm: 209.5295089746845
episode: 432 training return: tensor(-7.1912, device='cuda:0')
episode: 433 training return: tensor(370.0623, device='cuda:0')
episode: 434 training return: tensor(334.8390, device='cuda:0')
episode: 435 training return: tensor(376.6700, device='cuda:0')
epoch: 109 test_true_pfm: 3376.949246461665 sim_pfm: 384.10472798200016
episode: 436 training return: tensor(316.0777, device='cuda:0')
episode: 437 training return: tensor(346.0124, device='cuda:0')
episode: 438 training return: tensor(300.0941, device='cuda:0')
episode: 439 training return: tensor(313.6313, device='cuda:0')
epoch: 110 test_true_pfm: 3382.540327206551 sim_pfm: 310.4159127419504
episode: 440 training return: tensor(262.4914, device='cuda:0')
episode: 441 training return: tensor(329.0798, device='cuda:0')
episode: 442 training return: tensor(313.1892, device='cuda:0')
episode: 443 training return: tensor(370.2132, device='cuda:0')
epoch: 111 test_true_pfm: 3403.655838699066 sim_pfm: 320.5043937607746
episode: 444 training return: tensor(-23.6283, device='cuda:0')
episode: 445 training return: tensor(301.7398, device='cuda:0')
episode: 446 training return: tensor(381.3551, device='cuda:0')
episode: 447 training return: tensor(310.5101, device='cuda:0')
epoch: 112 test_true_pfm: 3381.0209330542825 sim_pfm: 328.85306701863493
episode: 448 training return: tensor(305.8364, device='cuda:0')
episode: 449 training return: tensor(294.8617, device='cuda:0')
episode: 450 training return: tensor(344.1617, device='cuda:0')
episode: 451 training return: tensor(305.1293, device='cuda:0')
epoch: 113 test_true_pfm: 3440.5046509723466 sim_pfm: 342.3748671626963
episode: 452 training return: tensor(-247.6601, device='cuda:0')
episode: 453 training return: tensor(342.7237, device='cuda:0')
episode: 454 training return: tensor(332.2475, device='cuda:0')
episode: 455 training return: tensor(261.4878, device='cuda:0')
epoch: 114 test_true_pfm: 3466.619546715506 sim_pfm: 363.1594216397013
episode: 456 training return: tensor(318.2984, device='cuda:0')
episode: 457 training return: tensor(276.0164, device='cuda:0')
episode: 458 training return: tensor(289.4578, device='cuda:0')
episode: 459 training return: tensor(343.2181, device='cuda:0')
epoch: 115 test_true_pfm: 3449.1967347989917 sim_pfm: 353.0305939970228
episode: 460 training return: tensor(355.7065, device='cuda:0')
episode: 461 training return: tensor(295.6831, device='cuda:0')
episode: 462 training return: tensor(335.1882, device='cuda:0')
episode: 463 training return: tensor(344.8745, device='cuda:0')
epoch: 116 test_true_pfm: 3439.571993490213 sim_pfm: 325.32661763286643
episode: 464 training return: tensor(327.7883, device='cuda:0')
episode: 465 training return: tensor(349.2780, device='cuda:0')
episode: 466 training return: tensor(-60.0093, device='cuda:0')
episode: 467 training return: tensor(303.4772, device='cuda:0')
epoch: 117 test_true_pfm: 3492.5480111372326 sim_pfm: 384.64289534006576
episode: 468 training return: tensor(326.0504, device='cuda:0')
episode: 469 training return: tensor(311.1973, device='cuda:0')
episode: 470 training return: tensor(351.7329, device='cuda:0')
episode: 471 training return: tensor(341.6584, device='cuda:0')
epoch: 118 test_true_pfm: 3397.5835207832656 sim_pfm: 294.6054571871937
episode: 472 training return: tensor(328.6756, device='cuda:0')
episode: 473 training return: tensor(313.0790, device='cuda:0')
episode: 474 training return: tensor(426.3795, device='cuda:0')
episode: 475 training return: tensor(285.7151, device='cuda:0')
epoch: 119 test_true_pfm: 3380.6505135818775 sim_pfm: 310.7481168424517
episode: 476 training return: tensor(349.9222, device='cuda:0')
episode: 477 training return: tensor(301.1122, device='cuda:0')
episode: 478 training return: tensor(332.4829, device='cuda:0')
episode: 479 training return: tensor(313.8898, device='cuda:0')
epoch: 120 test_true_pfm: 3440.310686137838 sim_pfm: 340.5332636182041
episode: 480 training return: tensor(304.9534, device='cuda:0')
episode: 481 training return: tensor(301.4363, device='cuda:0')
episode: 482 training return: tensor(364.4920, device='cuda:0')
episode: 483 training return: tensor(313.6071, device='cuda:0')
epoch: 121 test_true_pfm: 3463.696561252966 sim_pfm: 371.68756484552677
episode: 484 training return: tensor(300.3565, device='cuda:0')
episode: 485 training return: tensor(-7.6964, device='cuda:0')
episode: 486 training return: tensor(341.7919, device='cuda:0')
episode: 487 training return: tensor(386.1980, device='cuda:0')
epoch: 122 test_true_pfm: 3407.2097375562917 sim_pfm: 326.2353038743992
episode: 488 training return: tensor(299.9664, device='cuda:0')
episode: 489 training return: tensor(-30.1465, device='cuda:0')
episode: 490 training return: tensor(354.0392, device='cuda:0')
episode: 491 training return: tensor(337.1981, device='cuda:0')
epoch: 123 test_true_pfm: 3423.042137133462 sim_pfm: 296.6807160353346
episode: 492 training return: tensor(295.0254, device='cuda:0')
episode: 493 training return: tensor(312.0238, device='cuda:0')
episode: 494 training return: tensor(251.3337, device='cuda:0')
episode: 495 training return: tensor(352.4641, device='cuda:0')
epoch: 124 test_true_pfm: 3477.748128330592 sim_pfm: 306.8503602443573
episode: 496 training return: tensor(348.1714, device='cuda:0')
episode: 497 training return: tensor(264.9777, device='cuda:0')
episode: 498 training return: tensor(334.9917, device='cuda:0')
episode: 499 training return: tensor(351.4897, device='cuda:0')
epoch: 125 test_true_pfm: 3436.007164603481 sim_pfm: 304.09789430969005
episode: 500 training return: tensor(10.1518, device='cuda:0')
episode: 501 training return: tensor(301.6623, device='cuda:0')
episode: 502 training return: tensor(342.1758, device='cuda:0')
episode: 503 training return: tensor(306.6307, device='cuda:0')
epoch: 126 test_true_pfm: 3426.8008715088877 sim_pfm: 292.8361761603737
episode: 504 training return: tensor(365.8434, device='cuda:0')
episode: 505 training return: tensor(308.3115, device='cuda:0')
episode: 506 training return: tensor(329.8750, device='cuda:0')
episode: 507 training return: tensor(359.7728, device='cuda:0')
epoch: 127 test_true_pfm: 3365.19641665426 sim_pfm: 305.45106479426613
episode: 508 training return: tensor(266.3499, device='cuda:0')
episode: 509 training return: tensor(301.4011, device='cuda:0')
episode: 510 training return: tensor(348.2640, device='cuda:0')
episode: 511 training return: tensor(315.9528, device='cuda:0')
epoch: 128 test_true_pfm: 3376.4238948831626 sim_pfm: 322.41257015413913
episode: 512 training return: tensor(246.5080, device='cuda:0')
episode: 513 training return: tensor(252.9393, device='cuda:0')
episode: 514 training return: tensor(383.8638, device='cuda:0')
episode: 515 training return: tensor(315.0007, device='cuda:0')
epoch: 129 test_true_pfm: 3252.128940749866 sim_pfm: 342.7613813446078
episode: 516 training return: tensor(-13.8047, device='cuda:0')
episode: 517 training return: tensor(314.6296, device='cuda:0')
episode: 518 training return: tensor(320.9537, device='cuda:0')
episode: 519 training return: tensor(353.9963, device='cuda:0')
epoch: 130 test_true_pfm: 3491.1034687867727 sim_pfm: 348.40851978806313
episode: 520 training return: tensor(360.6547, device='cuda:0')
episode: 521 training return: tensor(335.8011, device='cuda:0')
episode: 522 training return: tensor(318.2411, device='cuda:0')
episode: 523 training return: tensor(307.1042, device='cuda:0')
epoch: 131 test_true_pfm: 3432.078040241516 sim_pfm: 340.86991794149316
episode: 524 training return: tensor(366.3857, device='cuda:0')
episode: 525 training return: tensor(289.8622, device='cuda:0')
episode: 526 training return: tensor(343.6785, device='cuda:0')
episode: 527 training return: tensor(329.1863, device='cuda:0')
epoch: 132 test_true_pfm: 3465.28373563539 sim_pfm: 359.64207726791693
episode: 528 training return: tensor(306.0172, device='cuda:0')
episode: 529 training return: tensor(319.5742, device='cuda:0')
episode: 530 training return: tensor(266.8841, device='cuda:0')
episode: 531 training return: tensor(349.2343, device='cuda:0')
epoch: 133 test_true_pfm: 3451.029938124137 sim_pfm: 354.83809858000797
episode: 532 training return: tensor(363.0065, device='cuda:0')
episode: 533 training return: tensor(326.7642, device='cuda:0')
episode: 534 training return: tensor(295.6037, device='cuda:0')
episode: 535 training return: tensor(326.1233, device='cuda:0')
epoch: 134 test_true_pfm: 3396.530553277977 sim_pfm: 335.6964746654655
episode: 536 training return: tensor(388.1501, device='cuda:0')
episode: 537 training return: tensor(322.9812, device='cuda:0')
episode: 538 training return: tensor(303.7397, device='cuda:0')
episode: 539 training return: tensor(300.8377, device='cuda:0')
epoch: 135 test_true_pfm: 3172.7930173110867 sim_pfm: 240.92520956334192
episode: 540 training return: tensor(329.2065, device='cuda:0')
episode: 541 training return: tensor(330.0314, device='cuda:0')
episode: 542 training return: tensor(363.9992, device='cuda:0')
episode: 543 training return: tensor(289.5688, device='cuda:0')
epoch: 136 test_true_pfm: 3497.3211673050605 sim_pfm: 278.19892556979903
episode: 544 training return: tensor(326.8370, device='cuda:0')
episode: 545 training return: tensor(316.7163, device='cuda:0')
episode: 546 training return: tensor(346.2765, device='cuda:0')
episode: 547 training return: tensor(301.7733, device='cuda:0')
epoch: 137 test_true_pfm: 3499.0802000824847 sim_pfm: 398.1315784448525
episode: 548 training return: tensor(301.7281, device='cuda:0')
episode: 549 training return: tensor(322.9702, device='cuda:0')
episode: 550 training return: tensor(-21.5202, device='cuda:0')
episode: 551 training return: tensor(357.4586, device='cuda:0')
epoch: 138 test_true_pfm: 3413.1960239568725 sim_pfm: 314.6848704493993
episode: 552 training return: tensor(355.2504, device='cuda:0')
episode: 553 training return: tensor(350.6245, device='cuda:0')
episode: 554 training return: tensor(376.6812, device='cuda:0')
episode: 555 training return: tensor(324.2943, device='cuda:0')
epoch: 139 test_true_pfm: 3398.3789615502405 sim_pfm: 353.44821928449284
episode: 556 training return: tensor(305.4670, device='cuda:0')
episode: 557 training return: tensor(290.6679, device='cuda:0')
episode: 558 training return: tensor(402.0093, device='cuda:0')
episode: 559 training return: tensor(295.0870, device='cuda:0')
epoch: 140 test_true_pfm: 3247.84221658995 sim_pfm: 341.2561884239258
episode: 560 training return: tensor(-121.4793, device='cuda:0')
episode: 561 training return: tensor(294.4133, device='cuda:0')
episode: 562 training return: tensor(307.1217, device='cuda:0')
episode: 563 training return: tensor(366.3820, device='cuda:0')
epoch: 141 test_true_pfm: 3475.91249341185 sim_pfm: 329.5413003706587
episode: 564 training return: tensor(331.9711, device='cuda:0')
episode: 565 training return: tensor(326.5554, device='cuda:0')
episode: 566 training return: tensor(297.5973, device='cuda:0')
episode: 567 training return: tensor(324.3125, device='cuda:0')
epoch: 142 test_true_pfm: 3456.731881430542 sim_pfm: 322.6488884848077
episode: 568 training return: tensor(358.7528, device='cuda:0')
episode: 569 training return: tensor(344.0544, device='cuda:0')
episode: 570 training return: tensor(190.2203, device='cuda:0')
episode: 571 training return: tensor(377.1543, device='cuda:0')
epoch: 143 test_true_pfm: 3393.481156965439 sim_pfm: 320.44327538794215
episode: 572 training return: tensor(362.3703, device='cuda:0')
episode: 573 training return: tensor(319.3496, device='cuda:0')
episode: 574 training return: tensor(372.7313, device='cuda:0')
episode: 575 training return: tensor(340.6243, device='cuda:0')
epoch: 144 test_true_pfm: 3470.0189783052956 sim_pfm: 363.018742664591
episode: 576 training return: tensor(216.1515, device='cuda:0')
episode: 577 training return: tensor(325.4117, device='cuda:0')
episode: 578 training return: tensor(340.5282, device='cuda:0')
episode: 579 training return: tensor(343.2067, device='cuda:0')
epoch: 145 test_true_pfm: 3423.643265946357 sim_pfm: 350.49999459959025
episode: 580 training return: tensor(303.7789, device='cuda:0')
episode: 581 training return: tensor(386.8401, device='cuda:0')
episode: 582 training return: tensor(300.0257, device='cuda:0')
episode: 583 training return: tensor(316.1111, device='cuda:0')
epoch: 146 test_true_pfm: 3442.810114088578 sim_pfm: 316.02581795781344
episode: 584 training return: tensor(285.0264, device='cuda:0')
episode: 585 training return: tensor(340.2375, device='cuda:0')
episode: 586 training return: tensor(310.4193, device='cuda:0')
episode: 587 training return: tensor(359.7118, device='cuda:0')
epoch: 147 test_true_pfm: 3476.0203893517278 sim_pfm: 355.0005850423283
episode: 588 training return: tensor(-21.1383, device='cuda:0')
episode: 589 training return: tensor(336.0077, device='cuda:0')
episode: 590 training return: tensor(351.6650, device='cuda:0')
episode: 591 training return: tensor(358.7525, device='cuda:0')
epoch: 148 test_true_pfm: 3460.6418081270926 sim_pfm: 336.11608979006024
episode: 592 training return: tensor(349.1991, device='cuda:0')
episode: 593 training return: tensor(350.4797, device='cuda:0')
episode: 594 training return: tensor(277.3403, device='cuda:0')
episode: 595 training return: tensor(208.3964, device='cuda:0')
epoch: 149 test_true_pfm: 3415.8000076654685 sim_pfm: 310.10049949755194
episode: 596 training return: tensor(341.0979, device='cuda:0')
episode: 597 training return: tensor(-136.4897, device='cuda:0')
episode: 598 training return: tensor(273.0253, device='cuda:0')
episode: 599 training return: tensor(311.3765, device='cuda:0')
epoch: 150 test_true_pfm: 3468.1865840324936 sim_pfm: 334.66702733107377
