['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '0', '--data', '30000', '--sub']
epoch: 0 training_loss 0.2562980401515961 test_loss: 0.19275300502777098
epoch: 1 training_loss 0.16391753651201724 test_loss: 0.17723821401596068
epoch: 2 training_loss 0.13548423271626234 test_loss: 0.12111209630966187
epoch: 3 training_loss 0.12610247446224093 test_loss: 0.13005452156066893
epoch: 4 training_loss 0.1233852837793529 test_loss: 0.11831154823303222
epoch: 5 training_loss 0.11904461864382028 test_loss: 0.14152854681015015
epoch: 6 training_loss 0.11466610364615917 test_loss: 0.10791918039321899
epoch: 7 training_loss 0.110527551099658 test_loss: 0.11635912656784057
epoch: 8 training_loss 0.10494050309062004 test_loss: 0.1189117431640625
epoch: 9 training_loss 0.10163968512788416 test_loss: 0.12963409423828126
epoch: 10 training_loss 0.10146304100751877 test_loss: 0.11394635438919068
epoch: 11 training_loss 0.10264375448226928 test_loss: 0.10293495655059814
epoch: 12 training_loss 0.10098505757749081 test_loss: 0.1124117612838745
epoch: 13 training_loss 0.10754314988851547 test_loss: 0.11070082187652588
epoch: 14 training_loss 0.10233197869732975 test_loss: 0.09835752248764038
epoch: 15 training_loss 0.1020976947247982 test_loss: 0.10628135204315185
epoch: 16 training_loss 0.09882192760705948 test_loss: 0.11358715295791626
epoch: 17 training_loss 0.09873340860009193 test_loss: 0.10311577320098878
epoch: 18 training_loss 0.09990683045238256 test_loss: 0.11368674039840698
epoch: 19 training_loss 0.09607451800256968 test_loss: 0.11958837509155273
epoch: 20 training_loss 0.09569701235741376 test_loss: 0.09834090471267701
epoch: 21 training_loss 0.09796867571771145 test_loss: 0.12852569818496704
epoch: 22 training_loss 0.09435409987345338 test_loss: 0.09443469643592835
epoch: 23 training_loss 0.09679486572742463 test_loss: 0.10233857631683349
epoch: 24 training_loss 0.09242056619375943 test_loss: 0.10285642147064208
epoch: 25 training_loss 0.09276049874722958 test_loss: 0.1081240177154541
epoch: 26 training_loss 0.09745987232774496 test_loss: 0.11781916618347169
epoch: 27 training_loss 0.10180846698582173 test_loss: 0.09926803708076477
epoch: 28 training_loss 0.09482897974550725 test_loss: 0.09882893562316894
epoch: 29 training_loss 0.09186801306903362 test_loss: 0.09934311509132385
epoch: 30 training_loss 0.0960030135512352 test_loss: 0.1041983723640442
epoch: 31 training_loss 0.09961340695619583 test_loss: 0.09048335552215576
epoch: 32 training_loss 0.09155717857182026 test_loss: 0.10234782695770264
epoch: 33 training_loss 0.08526783544570207 test_loss: 0.09313459396362304
epoch: 34 training_loss 0.09212076511234045 test_loss: 0.08739368915557862
epoch: 35 training_loss 0.09174290789291263 test_loss: 0.10623515844345092
epoch: 36 training_loss 0.09199045211076737 test_loss: 0.11797473430633545
epoch: 37 training_loss 0.0917358085513115 test_loss: 0.10024485588073731
epoch: 38 training_loss 0.08698259418830276 test_loss: 0.09592795372009277
epoch: 39 training_loss 0.08137249397113919 test_loss: 0.11197148561477661
epoch: 40 training_loss 0.09188948914408684 test_loss: 0.09149336218833923
epoch: 41 training_loss 0.09354253619909286 test_loss: 0.10341241359710693
epoch: 42 training_loss 0.08772836241871118 test_loss: 0.10494872331619262
epoch: 43 training_loss 0.08733749393373728 test_loss: 0.08690721392631531
epoch: 44 training_loss 0.08590621192008258 test_loss: 0.09743955731391907
epoch: 45 training_loss 0.09651939118281007 test_loss: 0.1027783989906311
epoch: 46 training_loss 0.08796734960749746 test_loss: 0.10081721544265747
epoch: 47 training_loss 0.08613746397197247 test_loss: 0.10116788148880004
epoch: 48 training_loss 0.09224907616153359 test_loss: 0.09730969071388244
epoch: 49 training_loss 0.0811817012913525 test_loss: 0.09852204322814942
epoch: 50 training_loss 0.09076888320967555 test_loss: 0.10495818853378296
epoch: 51 training_loss 0.09126782432198524 test_loss: 0.10362706184387208
epoch: 52 training_loss 0.0921049245633185 test_loss: 0.10131982564926148
epoch: 53 training_loss 0.08769289653748275 test_loss: 0.09874452948570252
epoch: 54 training_loss 0.0847061268799007 test_loss: 0.10855240821838379
epoch: 55 training_loss 0.08652973787859082 test_loss: 0.09597092270851135
epoch: 56 training_loss 0.08445310439914465 test_loss: 0.09533045887947082
epoch: 57 training_loss 0.08952713292092085 test_loss: 0.0938210904598236
epoch: 58 training_loss 0.09217205572873353 test_loss: 0.09047754406929016
epoch: 59 training_loss 0.08615047080442309 test_loss: 0.0900178849697113
epoch: 60 training_loss 0.08219798477366566 test_loss: 0.10317796468734741
epoch: 61 training_loss 0.09098189894109965 test_loss: 0.09408417940139771
epoch: 62 training_loss 0.08230565908364952 test_loss: 0.10588963031768799
epoch: 63 training_loss 0.07536302033811808 test_loss: 0.10558656454086304
epoch: 64 training_loss 0.08753976717591286 test_loss: 0.11881027221679688
epoch: 65 training_loss 0.09339220801368356 test_loss: 0.09618622064590454
epoch: 66 training_loss 0.07969774869270622 test_loss: 0.10480976104736328
epoch: 67 training_loss 0.08497886158525944 test_loss: 0.11664608716964722
epoch: 68 training_loss 0.08831559339538217 test_loss: 0.09333856701850891
epoch: 69 training_loss 0.09452457953244447 test_loss: 0.08337197303771973
epoch: 70 training_loss 0.09176432613283396 test_loss: 0.09559909701347351
epoch: 71 training_loss 0.08449725542217493 test_loss: 0.09850274920463561
epoch: 72 training_loss 0.0861157595552504 test_loss: 0.09505930542945862
epoch: 73 training_loss 0.08336032394319773 test_loss: 0.10393803119659424
epoch: 74 training_loss 0.08478466771543026 test_loss: 0.08523184657096863
epoch: 75 training_loss 0.09209055203944444 test_loss: 0.12188481092453003
epoch: 76 training_loss 0.08895327480509878 test_loss: 0.09538472890853882
epoch: 77 training_loss 0.0766954566910863 test_loss: 0.10873932838439941
epoch: 78 training_loss 0.0831065028347075 test_loss: 0.11353440284729004
epoch: 79 training_loss 0.08676686150953174 test_loss: 0.10085711479187012
epoch: 80 training_loss 0.08589406003244221 test_loss: 0.09911232590675353
epoch: 81 training_loss 0.07747295135632157 test_loss: 0.10326817035675048
epoch: 82 training_loss 0.08534528383985161 test_loss: 0.10379829406738281
epoch: 83 training_loss 0.08740876780822873 test_loss: 0.11068030595779418
epoch: 84 training_loss 0.07360804229974746 test_loss: 0.09043070673942566
epoch: 85 training_loss 0.08274204667657614 test_loss: 0.10621075630187989
epoch: 86 training_loss 0.08144220799207687 test_loss: 0.10123523473739623
epoch: 87 training_loss 0.09053174125030637 test_loss: 0.09831352233886718
epoch: 88 training_loss 0.08055824507027864 test_loss: 0.11177414655685425
epoch: 89 training_loss 0.08103875927627087 test_loss: 0.10722206830978394
epoch: 90 training_loss 0.09236520204693079 test_loss: 0.10523929595947265
epoch: 91 training_loss 0.08713529834523798 test_loss: 0.10631881952285767
epoch: 92 training_loss 0.08547719903290271 test_loss: 0.09849106669425964
epoch: 93 training_loss 0.08447115380316973 test_loss: 0.1057349681854248
epoch: 94 training_loss 0.07913510026410223 test_loss: 0.11448774337768555
epoch: 95 training_loss 0.08166282070800662 test_loss: 0.1011350154876709
epoch: 96 training_loss 0.08825077252462506 test_loss: 0.10095748901367188
epoch: 97 training_loss 0.08544822867959738 test_loss: 0.09931935667991638
epoch: 98 training_loss 0.08262260902673006 test_loss: 0.09421997666358947
epoch: 99 training_loss 0.08424067180603742 test_loss: 0.10534827709197998
epoch: 100 training_loss 0.08908511106856168 test_loss: 0.09453888535499573
epoch: 101 training_loss 0.08881979554891586 test_loss: 0.10006074905395508
epoch: 102 training_loss 0.08440384382382035 test_loss: 0.1011279582977295
epoch: 103 training_loss 0.08506012184545397 test_loss: 0.09593988060951233
epoch: 104 training_loss 0.0845506783388555 test_loss: 0.08045246601104736
epoch: 105 training_loss 0.08466030234470963 test_loss: 0.09799467325210572
epoch: 106 training_loss 0.08486980883404613 test_loss: 0.10783805847167968
epoch: 107 training_loss 0.08973442986607552 test_loss: 0.10449283123016358
epoch: 108 training_loss 0.08308518767356872 test_loss: 0.10252761840820312
epoch: 109 training_loss 0.08614625642076135 test_loss: 0.12019127607345581
epoch: 110 training_loss 0.07874391261488199 test_loss: 0.09744270443916321
epoch: 111 training_loss 0.08181917425245047 test_loss: 0.08989012241363525
epoch: 112 training_loss 0.08419035555794835 test_loss: 0.10023820400238037
epoch: 113 training_loss 0.07916015369817614 test_loss: 0.10229437351226807
epoch: 114 training_loss 0.08544461024925112 test_loss: 0.0923276424407959
epoch: 115 training_loss 0.07927947942167521 test_loss: 0.11540727615356446
epoch: 116 training_loss 0.08143430151045322 test_loss: 0.103553307056427
epoch: 117 training_loss 0.07923453008756041 test_loss: 0.10491952896118165
epoch: 118 training_loss 0.08254051106050611 test_loss: 0.10891568660736084
epoch: 119 training_loss 0.0811197017878294 test_loss: 0.08969894647598267
epoch: 120 training_loss 0.08193556165322662 test_loss: 0.09654159545898437
epoch: 121 training_loss 0.08002187162637711 test_loss: 0.10539504289627075
epoch: 122 training_loss 0.08718665059655904 test_loss: 0.10655753612518311
epoch: 123 training_loss 0.08216776803135872 test_loss: 0.09367173314094543
epoch: 124 training_loss 0.08143258927389979 test_loss: 0.09096860885620117
epoch: 125 training_loss 0.08180150259286165 test_loss: 0.10753246545791625
epoch: 126 training_loss 0.07997924416325987 test_loss: 0.10452214479446412
epoch: 127 training_loss 0.08107430456206202 test_loss: 0.09646154642105102
epoch: 128 training_loss 0.08080218365415931 test_loss: 0.10087891817092895
epoch: 129 training_loss 0.07932443328201771 test_loss: 0.11783828735351562
epoch: 130 training_loss 0.07928102903068066 test_loss: 0.09834855794906616
epoch: 131 training_loss 0.07972181651741267 test_loss: 0.09257071614265441
epoch: 132 training_loss 0.08004663214087486 test_loss: 0.09203737378120422
epoch: 133 training_loss 0.0788312990963459 test_loss: 0.1111291527748108
epoch: 134 training_loss 0.08022613614797593 test_loss: 0.10675187110900879
epoch: 135 training_loss 0.08787057576701045 test_loss: 0.09763222932815552
epoch: 136 training_loss 0.07453134502284228 test_loss: 0.09880992770195007
epoch: 137 training_loss 0.08891811879351735 test_loss: 0.10344411134719848
epoch: 138 training_loss 0.0825844301097095 test_loss: 0.11009613275527955
epoch: 139 training_loss 0.08370965527370572 test_loss: 0.10710511207580567
epoch: 140 training_loss 0.0791183929052204 test_loss: 0.09610254764556884
epoch: 141 training_loss 0.08081239307299257 test_loss: 0.10616813898086548
epoch: 142 training_loss 0.08205671330913901 test_loss: 0.09582273960113526
epoch: 143 training_loss 0.07899381764233113 test_loss: 0.10347872972488403
epoch: 144 training_loss 0.08286481773480774 test_loss: 0.09887418150901794
epoch: 145 training_loss 0.07657337170094251 test_loss: 0.08962292671203613
epoch: 146 training_loss 0.08248412465676665 test_loss: 0.11264944076538086
epoch: 147 training_loss 0.07824446059763432 test_loss: 0.08968530893325806
epoch: 148 training_loss 0.07322166819125414 test_loss: 0.11322554349899291
epoch: 149 training_loss 0.07961896743625402 test_loss: 0.09704758524894715
epoch: 0 training_loss 39.102650833129886 test_loss: 20.88341369628906
epoch: 1 training_loss 17.241848077774048 test_loss: 14.712847900390624
epoch: 2 training_loss 12.77708553314209 test_loss: 11.484551239013673
epoch: 3 training_loss 10.633974485397339 test_loss: 10.0883056640625
epoch: 4 training_loss 9.63078113079071 test_loss: 8.967623138427735
epoch: 5 training_loss 8.549914445877075 test_loss: 8.191958618164062
epoch: 6 training_loss 7.760223817825318 test_loss: 7.544805908203125
epoch: 7 training_loss 7.30272403717041 test_loss: 6.830731964111328
epoch: 8 training_loss 6.906008625030518 test_loss: 6.8395851135253904
epoch: 9 training_loss 6.365302815437317 test_loss: 6.620024108886719
epoch: 10 training_loss 6.276962251663208 test_loss: 6.193159484863282
epoch: 11 training_loss 5.9964144945144655 test_loss: 5.771002578735351
epoch: 12 training_loss 5.554033703804016 test_loss: 5.6049346923828125
epoch: 13 training_loss 5.460801422595978 test_loss: 5.53956298828125
epoch: 14 training_loss 5.376713948249817 test_loss: 5.4387256622314455
epoch: 15 training_loss 5.041376466751099 test_loss: 5.312019729614258
epoch: 16 training_loss 4.969000999927521 test_loss: 5.112250137329101
epoch: 17 training_loss 4.741333332061767 test_loss: 4.831113815307617
epoch: 18 training_loss 4.724558312892913 test_loss: 4.7743385314941404
epoch: 19 training_loss 4.63161416053772 test_loss: 4.813915252685547
epoch: 20 training_loss 4.421153328418732 test_loss: 4.562959289550781
epoch: 21 training_loss 4.426639029979706 test_loss: 4.638983917236328
epoch: 22 training_loss 4.294977388381958 test_loss: 4.2853271484375
epoch: 23 training_loss 4.1700674915313725 test_loss: 4.17693862915039
epoch: 24 training_loss 4.20247296333313 test_loss: 4.125133514404297
epoch: 25 training_loss 4.019634432792664 test_loss: 4.191911315917968
epoch: 26 training_loss 4.0008305025100706 test_loss: 3.9387676239013674
epoch: 27 training_loss 3.9245170903205873 test_loss: 3.901809310913086
epoch: 28 training_loss 3.761434359550476 test_loss: 4.177215957641602
epoch: 29 training_loss 3.8285617518424986 test_loss: 3.7863800048828127
epoch: 30 training_loss 3.797363362312317 test_loss: 3.823370361328125
epoch: 31 training_loss 3.6971812653541565 test_loss: 3.7621883392333983
epoch: 32 training_loss 3.7013221740722657 test_loss: 3.7862289428710936
epoch: 33 training_loss 3.703376178741455 test_loss: 3.4953983306884764
epoch: 34 training_loss 3.550915744304657 test_loss: 3.7568904876708986
epoch: 35 training_loss 3.492695159912109 test_loss: 3.714844512939453
epoch: 36 training_loss 3.497552354335785 test_loss: 3.5334938049316404
epoch: 37 training_loss 3.4989239597320556 test_loss: 3.5084487915039064
epoch: 38 training_loss 3.372007040977478 test_loss: 3.4731483459472656
epoch: 39 training_loss 3.3378197264671328 test_loss: 3.4348129272460937
epoch: 40 training_loss 3.2902577328681946 test_loss: 3.363175964355469
epoch: 41 training_loss 3.3045321488380432 test_loss: 3.395395278930664
epoch: 42 training_loss 3.2789514446258545 test_loss: 3.4814876556396483
epoch: 43 training_loss 3.283354313373566 test_loss: 3.348489761352539
epoch: 44 training_loss 3.1410048389434815 test_loss: 3.234312057495117
epoch: 45 training_loss 3.176041877269745 test_loss: 3.259550857543945
epoch: 46 training_loss 3.1916068959236146 test_loss: 3.299036407470703
epoch: 47 training_loss 3.1759955883026123 test_loss: 3.149150085449219
epoch: 48 training_loss 3.1020599722862245 test_loss: 3.1443166732788086
epoch: 49 training_loss 3.034220025539398 test_loss: 3.268690490722656
epoch: 50 training_loss 3.1007346224784853 test_loss: 3.043213653564453
epoch: 51 training_loss 3.070267927646637 test_loss: 3.1006999969482423
epoch: 52 training_loss 3.004442722797394 test_loss: 3.016293525695801
epoch: 53 training_loss 3.037920587062836 test_loss: 3.2232070922851563
epoch: 54 training_loss 2.992547218799591 test_loss: 3.108108139038086
epoch: 55 training_loss 2.975319685935974 test_loss: 3.132024383544922
epoch: 56 training_loss 2.9583225226402283 test_loss: 3.0429906845092773
epoch: 57 training_loss 2.973318910598755 test_loss: 2.9615942001342774
epoch: 58 training_loss 2.9116580724716186 test_loss: 2.887811279296875
epoch: 59 training_loss 2.8608990335464477 test_loss: 3.0436080932617187
epoch: 60 training_loss 2.8692740106582644 test_loss: 2.9637325286865233
epoch: 61 training_loss 2.8930306744575502 test_loss: 3.0234909057617188
epoch: 62 training_loss 2.8275225067138674 test_loss: 2.9840085983276365
epoch: 63 training_loss 2.823235201835632 test_loss: 2.9996721267700197
epoch: 64 training_loss 2.80643609046936 test_loss: 2.9306350708007813
epoch: 65 training_loss 2.800532958507538 test_loss: 3.010576629638672
epoch: 66 training_loss 2.8247962832450866 test_loss: 2.8444480895996094
epoch: 67 training_loss 2.7856884956359864 test_loss: 2.794805908203125
epoch: 68 training_loss 2.7576615619659424 test_loss: 2.834341049194336
epoch: 69 training_loss 2.7961517858505247 test_loss: 2.819981002807617
epoch: 70 training_loss 2.748074154853821 test_loss: 2.8323360443115235
epoch: 71 training_loss 2.7404230999946595 test_loss: 2.632440376281738
epoch: 72 training_loss 2.72362633228302 test_loss: 2.859360122680664
epoch: 73 training_loss 2.7066297578811644 test_loss: 2.8011358261108397
epoch: 74 training_loss 2.757277021408081 test_loss: 2.871243476867676
epoch: 75 training_loss 2.6960539960861207 test_loss: 2.7784906387329102
epoch: 76 training_loss 2.6624333333969115 test_loss: 2.8472766876220703
epoch: 77 training_loss 2.7298883175849915 test_loss: 2.762744140625
epoch: 78 training_loss 2.6060067677497862 test_loss: 2.6483030319213867
epoch: 79 training_loss 2.698110818862915 test_loss: 2.786542510986328
epoch: 80 training_loss 2.660739612579346 test_loss: 2.868012237548828
epoch: 81 training_loss 2.6701817440986635 test_loss: 2.6231237411499024
epoch: 82 training_loss 2.6043304443359374 test_loss: 2.663400650024414
epoch: 83 training_loss 2.6546384489536283 test_loss: 2.7624271392822264
epoch: 84 training_loss 2.5874193000793455 test_loss: 2.667869758605957
epoch: 85 training_loss 2.642666701078415 test_loss: 2.6595964431762695
epoch: 86 training_loss 2.585348615646362 test_loss: 2.653269386291504
epoch: 87 training_loss 2.6174329900741578 test_loss: 2.726950454711914
epoch: 88 training_loss 2.536527680158615 test_loss: 2.7116796493530275
epoch: 89 training_loss 2.594151734113693 test_loss: 2.723356819152832
epoch: 90 training_loss 2.4875655806064607 test_loss: 2.6928432464599608
epoch: 91 training_loss 2.5287511312961577 test_loss: 2.6116916656494142
epoch: 92 training_loss 2.536113029718399 test_loss: 2.670706939697266
epoch: 93 training_loss 2.5757114720344543 test_loss: 2.684947204589844
epoch: 94 training_loss 2.503207379579544 test_loss: 2.695268440246582
epoch: 95 training_loss 2.5608914518356323 test_loss: 2.6572093963623047
epoch: 96 training_loss 2.4644900906085967 test_loss: 2.6068117141723635
epoch: 97 training_loss 2.5073315286636353 test_loss: 2.6119579315185546
epoch: 98 training_loss 2.566135308742523 test_loss: 2.5714576721191404
epoch: 99 training_loss 2.5029880738258363 test_loss: 2.562398910522461
epoch: 100 training_loss 2.5177507960796355 test_loss: 2.641637992858887
epoch: 101 training_loss 2.5053128921985626 test_loss: 2.488704299926758
epoch: 102 training_loss 2.457175784111023 test_loss: 2.611277198791504
epoch: 103 training_loss 2.42816025018692 test_loss: 2.5993356704711914
epoch: 104 training_loss 2.425458812713623 test_loss: 2.5688461303710937
epoch: 105 training_loss 2.4380009508132936 test_loss: 2.620384407043457
epoch: 106 training_loss 2.390950957536697 test_loss: 2.491312789916992
epoch: 107 training_loss 2.4111953461170197 test_loss: 2.4754364013671877
epoch: 108 training_loss 2.4579047000408174 test_loss: 2.635350799560547
epoch: 109 training_loss 2.47975933432579 test_loss: 2.442672538757324
epoch: 110 training_loss 2.4721822011470795 test_loss: 2.4854698181152344
epoch: 111 training_loss 2.4149821162223817 test_loss: 2.5476131439208984
epoch: 112 training_loss 2.4361928367614745 test_loss: 2.475760078430176
epoch: 113 training_loss 2.42689041018486 test_loss: 2.5243568420410156
epoch: 114 training_loss 2.362561239004135 test_loss: 2.500807189941406
epoch: 115 training_loss 2.4521868515014646 test_loss: 2.497381401062012
epoch: 116 training_loss 2.411658970117569 test_loss: 2.366377067565918
epoch: 117 training_loss 2.3777244699001314 test_loss: 2.4544198989868162
epoch: 118 training_loss 2.4369504976272585 test_loss: 2.526803398132324
epoch: 119 training_loss 2.394109333753586 test_loss: 2.536570167541504
epoch: 120 training_loss 2.3972121822834014 test_loss: 2.5343181610107424
epoch: 121 training_loss 2.35301074385643 test_loss: 2.4928682327270506
epoch: 122 training_loss 2.3131077635288237 test_loss: 2.4326290130615233
epoch: 123 training_loss 2.26776175737381 test_loss: 2.4654394149780274
epoch: 124 training_loss 2.372693886756897 test_loss: 2.451397705078125
epoch: 125 training_loss 2.3457712030410764 test_loss: 2.508680534362793
epoch: 126 training_loss 2.3013650465011595 test_loss: 2.3863594055175783
epoch: 127 training_loss 2.339547424316406 test_loss: 2.405401039123535
epoch: 128 training_loss 2.37626473903656 test_loss: 2.440414619445801
epoch: 129 training_loss 2.3416923415660857 test_loss: 2.4530126571655275
epoch: 130 training_loss 2.3485765659809115 test_loss: 2.3779802322387695
epoch: 131 training_loss 2.353854433298111 test_loss: 2.5056835174560548
epoch: 132 training_loss 2.2785082197189332 test_loss: 2.387385368347168
epoch: 133 training_loss 2.3017586207389833 test_loss: 2.434933662414551
epoch: 134 training_loss 2.3406493985652923 test_loss: 2.340272903442383
epoch: 135 training_loss 2.295787789821625 test_loss: 2.4280315399169923
epoch: 136 training_loss 2.326186876296997 test_loss: 2.4281246185302736
epoch: 137 training_loss 2.3156348145008088 test_loss: 2.500962257385254
epoch: 138 training_loss 2.3055381536483766 test_loss: 2.4555133819580077
epoch: 139 training_loss 2.25130707859993 test_loss: 2.463037872314453
epoch: 140 training_loss 2.253435080051422 test_loss: 2.366764259338379
epoch: 141 training_loss 2.295493146181107 test_loss: 2.3503284454345703
epoch: 142 training_loss 2.2756688630580904 test_loss: 2.3939224243164063
epoch: 143 training_loss 2.3255225133895876 test_loss: 2.28029842376709
epoch: 144 training_loss 2.226733503341675 test_loss: 2.4048334121704102
epoch: 145 training_loss 2.243871592283249 test_loss: 2.41656494140625
epoch: 146 training_loss 2.2783126866817476 test_loss: 2.3861343383789064
epoch: 147 training_loss 2.2354364931583404 test_loss: 2.298534393310547
epoch: 148 training_loss 2.2530304908752443 test_loss: 2.181156349182129
epoch: 149 training_loss 2.26593847990036 test_loss: 2.3828557968139648
2794.9789739779953
episode: 0 training return: tensor(321.3806, device='cuda:0')
episode: 1 training return: tensor(280.5189, device='cuda:0')
episode: 2 training return: tensor(310.1575, device='cuda:0')
episode: 3 training return: tensor(-39.4255, device='cuda:0')
epoch: 1 test_true_pfm: 3316.8313651925387 sim_pfm: 235.53350174141815
episode: 4 training return: tensor(-18.8166, device='cuda:0')
episode: 5 training return: tensor(261.6782, device='cuda:0')
episode: 6 training return: tensor(-122.5599, device='cuda:0')
episode: 7 training return: tensor(306.7730, device='cuda:0')
epoch: 2 test_true_pfm: 3346.526996357246 sim_pfm: 306.049711229658
episode: 8 training return: tensor(-31.4200, device='cuda:0')
episode: 9 training return: tensor(342.0069, device='cuda:0')
episode: 10 training return: tensor(305.2697, device='cuda:0')
episode: 11 training return: tensor(309.2671, device='cuda:0')
epoch: 3 test_true_pfm: 2948.233949247911 sim_pfm: 325.1166423613128
episode: 12 training return: tensor(312.4736, device='cuda:0')
episode: 13 training return: tensor(93.3623, device='cuda:0')
episode: 14 training return: tensor(327.7691, device='cuda:0')
episode: 15 training return: tensor(248.1658, device='cuda:0')
epoch: 4 test_true_pfm: 3219.6237449263754 sim_pfm: 266.2043001417284
episode: 16 training return: tensor(177.0077, device='cuda:0')
episode: 17 training return: tensor(-206.9000, device='cuda:0')
episode: 18 training return: tensor(340.6392, device='cuda:0')
episode: 19 training return: tensor(232.7178, device='cuda:0')
epoch: 5 test_true_pfm: 3181.5334965902216 sim_pfm: 150.68141735576015
episode: 20 training return: tensor(-119.9983, device='cuda:0')
episode: 21 training return: tensor(-66.5189, device='cuda:0')
episode: 22 training return: tensor(410.1414, device='cuda:0')
episode: 23 training return: tensor(246.1574, device='cuda:0')
epoch: 6 test_true_pfm: 3245.3718669263167 sim_pfm: 180.12578249982712
episode: 24 training return: tensor(352.0385, device='cuda:0')
episode: 25 training return: tensor(177.4844, device='cuda:0')
episode: 26 training return: tensor(77.5465, device='cuda:0')
episode: 27 training return: tensor(231.2513, device='cuda:0')
epoch: 7 test_true_pfm: 3148.581176670734 sim_pfm: 320.39937794645084
episode: 28 training return: tensor(297.5116, device='cuda:0')
episode: 29 training return: tensor(77.2586, device='cuda:0')
episode: 30 training return: tensor(169.5097, device='cuda:0')
episode: 31 training return: tensor(284.0084, device='cuda:0')
epoch: 8 test_true_pfm: 3342.740216485505 sim_pfm: 139.75232002109988
episode: 32 training return: tensor(345.8902, device='cuda:0')
episode: 33 training return: tensor(445.3829, device='cuda:0')
episode: 34 training return: tensor(398.6952, device='cuda:0')
episode: 35 training return: tensor(283.7087, device='cuda:0')
epoch: 9 test_true_pfm: 3304.160956274964 sim_pfm: 318.4632059598128
episode: 36 training return: tensor(-251.3152, device='cuda:0')
episode: 37 training return: tensor(296.0713, device='cuda:0')
episode: 38 training return: tensor(310.7693, device='cuda:0')
episode: 39 training return: tensor(105.6604, device='cuda:0')
epoch: 10 test_true_pfm: 3311.664453187755 sim_pfm: 288.59010055143153
episode: 40 training return: tensor(349.3846, device='cuda:0')
episode: 41 training return: tensor(298.2281, device='cuda:0')
episode: 42 training return: tensor(145.8170, device='cuda:0')
episode: 43 training return: tensor(344.4820, device='cuda:0')
epoch: 11 test_true_pfm: 3370.802933995135 sim_pfm: 204.69478081469424
episode: 44 training return: tensor(127.6857, device='cuda:0')
episode: 45 training return: tensor(198.6335, device='cuda:0')
episode: 46 training return: tensor(217.4326, device='cuda:0')
episode: 47 training return: tensor(307.2090, device='cuda:0')
epoch: 12 test_true_pfm: 3126.4351396690054 sim_pfm: 277.1835925919586
episode: 48 training return: tensor(329.7708, device='cuda:0')
episode: 49 training return: tensor(-114.4840, device='cuda:0')
episode: 50 training return: tensor(373.1556, device='cuda:0')
episode: 51 training return: tensor(306.8570, device='cuda:0')
epoch: 13 test_true_pfm: 3057.291961562401 sim_pfm: 302.044279360193
episode: 52 training return: tensor(318.2752, device='cuda:0')
episode: 53 training return: tensor(243.5864, device='cuda:0')
episode: 54 training return: tensor(296.5022, device='cuda:0')
episode: 55 training return: tensor(335.3638, device='cuda:0')
epoch: 14 test_true_pfm: 3316.6949101465557 sim_pfm: 138.3633123766631
episode: 56 training return: tensor(239.0846, device='cuda:0')
episode: 57 training return: tensor(334.4153, device='cuda:0')
episode: 58 training return: tensor(258.6912, device='cuda:0')
episode: 59 training return: tensor(403.8673, device='cuda:0')
epoch: 15 test_true_pfm: 3121.1062675552907 sim_pfm: 293.11063720258727
episode: 60 training return: tensor(338.2931, device='cuda:0')
episode: 61 training return: tensor(288.6730, device='cuda:0')
episode: 62 training return: tensor(286.8152, device='cuda:0')
episode: 63 training return: tensor(305.0821, device='cuda:0')
epoch: 16 test_true_pfm: 3217.2863222120395 sim_pfm: 156.68317835836206
episode: 64 training return: tensor(261.4530, device='cuda:0')
episode: 65 training return: tensor(290.1411, device='cuda:0')
episode: 66 training return: tensor(349.2696, device='cuda:0')
episode: 67 training return: tensor(362.6685, device='cuda:0')
epoch: 17 test_true_pfm: 3265.706719082287 sim_pfm: 145.53499746974558
episode: 68 training return: tensor(301.3291, device='cuda:0')
episode: 69 training return: tensor(328.4831, device='cuda:0')
episode: 70 training return: tensor(253.5738, device='cuda:0')
episode: 71 training return: tensor(311.4913, device='cuda:0')
epoch: 18 test_true_pfm: 3275.477703041477 sim_pfm: 304.37319757176255
episode: 72 training return: tensor(121.1397, device='cuda:0')
episode: 73 training return: tensor(350.1187, device='cuda:0')
episode: 74 training return: tensor(302.2785, device='cuda:0')
episode: 75 training return: tensor(67.1197, device='cuda:0')
epoch: 19 test_true_pfm: 3346.529989172195 sim_pfm: 278.22966491508606
episode: 76 training return: tensor(240.7219, device='cuda:0')
episode: 77 training return: tensor(395.6143, device='cuda:0')
episode: 78 training return: tensor(283.0720, device='cuda:0')
episode: 79 training return: tensor(275.0110, device='cuda:0')
epoch: 20 test_true_pfm: 3382.726590537391 sim_pfm: 263.93172415807686
episode: 80 training return: tensor(-211.3857, device='cuda:0')
episode: 81 training return: tensor(389.4497, device='cuda:0')
episode: 82 training return: tensor(324.7266, device='cuda:0')
episode: 83 training return: tensor(234.7479, device='cuda:0')
epoch: 21 test_true_pfm: 3129.873407018909 sim_pfm: 292.14730908710044
episode: 84 training return: tensor(386.2923, device='cuda:0')
episode: 85 training return: tensor(261.6985, device='cuda:0')
episode: 86 training return: tensor(304.2062, device='cuda:0')
episode: 87 training return: tensor(373.8758, device='cuda:0')
epoch: 22 test_true_pfm: 3354.7481874653276 sim_pfm: 306.8544086420249
episode: 88 training return: tensor(315.2326, device='cuda:0')
episode: 89 training return: tensor(336.5193, device='cuda:0')
episode: 90 training return: tensor(384.0625, device='cuda:0')
episode: 91 training return: tensor(266.5139, device='cuda:0')
epoch: 23 test_true_pfm: 3369.8344023218237 sim_pfm: 337.87775105664815
episode: 92 training return: tensor(365.0007, device='cuda:0')
episode: 93 training return: tensor(271.1101, device='cuda:0')
episode: 94 training return: tensor(375.9749, device='cuda:0')
episode: 95 training return: tensor(342.6520, device='cuda:0')
epoch: 24 test_true_pfm: 3374.2701507022266 sim_pfm: 326.8729184550854
episode: 96 training return: tensor(304.8055, device='cuda:0')
episode: 97 training return: tensor(288.7222, device='cuda:0')
episode: 98 training return: tensor(356.3333, device='cuda:0')
episode: 99 training return: tensor(406.0365, device='cuda:0')
epoch: 25 test_true_pfm: 3451.1158536112107 sim_pfm: 369.4751926443423
episode: 100 training return: tensor(171.6175, device='cuda:0')
episode: 101 training return: tensor(272.5751, device='cuda:0')
episode: 102 training return: tensor(324.2849, device='cuda:0')
episode: 103 training return: tensor(199.8402, device='cuda:0')
epoch: 26 test_true_pfm: 3130.424258412406 sim_pfm: 286.6828003480526
episode: 104 training return: tensor(365.9885, device='cuda:0')
episode: 105 training return: tensor(307.6246, device='cuda:0')
episode: 106 training return: tensor(285.9717, device='cuda:0')
episode: 107 training return: tensor(361.8842, device='cuda:0')
epoch: 27 test_true_pfm: 3282.5779642720954 sim_pfm: 302.94003394482814
episode: 108 training return: tensor(359.2996, device='cuda:0')
episode: 109 training return: tensor(128.4845, device='cuda:0')
episode: 110 training return: tensor(305.5849, device='cuda:0')
episode: 111 training return: tensor(321.0378, device='cuda:0')
epoch: 28 test_true_pfm: 3336.602593329393 sim_pfm: 159.85089939493142
episode: 112 training return: tensor(391.7654, device='cuda:0')
episode: 113 training return: tensor(301.8999, device='cuda:0')
episode: 114 training return: tensor(283.9188, device='cuda:0')
episode: 115 training return: tensor(262.4580, device='cuda:0')
epoch: 29 test_true_pfm: 3259.601358353832 sim_pfm: 247.00746139263114
episode: 116 training return: tensor(323.0500, device='cuda:0')
episode: 117 training return: tensor(272.5139, device='cuda:0')
episode: 118 training return: tensor(122.8768, device='cuda:0')
episode: 119 training return: tensor(306.2309, device='cuda:0')
epoch: 30 test_true_pfm: 2867.6532031940083 sim_pfm: 286.5334000025371
episode: 120 training return: tensor(336.2870, device='cuda:0')
episode: 121 training return: tensor(390.8563, device='cuda:0')
episode: 122 training return: tensor(285.0243, device='cuda:0')
episode: 123 training return: tensor(402.5974, device='cuda:0')
epoch: 31 test_true_pfm: 2789.1549400121553 sim_pfm: 195.7007816567881
episode: 124 training return: tensor(268.9396, device='cuda:0')
episode: 125 training return: tensor(-113.6095, device='cuda:0')
episode: 126 training return: tensor(238.2052, device='cuda:0')
episode: 127 training return: tensor(287.5901, device='cuda:0')
epoch: 32 test_true_pfm: 3325.564107883136 sim_pfm: 153.92286651651375
episode: 128 training return: tensor(363.4418, device='cuda:0')
episode: 129 training return: tensor(284.9873, device='cuda:0')
episode: 130 training return: tensor(349.9084, device='cuda:0')
episode: 131 training return: tensor(337.6289, device='cuda:0')
epoch: 33 test_true_pfm: 3414.2830294950777 sim_pfm: 215.46195297157587
episode: 132 training return: tensor(341.5786, device='cuda:0')
episode: 133 training return: tensor(330.3416, device='cuda:0')
episode: 134 training return: tensor(411.3546, device='cuda:0')
episode: 135 training return: tensor(238.9193, device='cuda:0')
epoch: 34 test_true_pfm: 3095.3517525257507 sim_pfm: 283.81291724620195
episode: 136 training return: tensor(234.6895, device='cuda:0')
episode: 137 training return: tensor(419.5349, device='cuda:0')
episode: 138 training return: tensor(383.7379, device='cuda:0')
episode: 139 training return: tensor(46.4310, device='cuda:0')
epoch: 35 test_true_pfm: 3319.4499868803614 sim_pfm: 337.6813348366025
episode: 140 training return: tensor(266.3291, device='cuda:0')
episode: 141 training return: tensor(350.1241, device='cuda:0')
episode: 142 training return: tensor(405.9204, device='cuda:0')
episode: 143 training return: tensor(322.4305, device='cuda:0')
epoch: 36 test_true_pfm: 2937.1612632200126 sim_pfm: 323.81510919249075
episode: 144 training return: tensor(282.0843, device='cuda:0')
episode: 145 training return: tensor(290.8974, device='cuda:0')
episode: 146 training return: tensor(241.9792, device='cuda:0')
episode: 147 training return: tensor(276.9637, device='cuda:0')
epoch: 37 test_true_pfm: 2719.524452949696 sim_pfm: 311.4160895509801
episode: 148 training return: tensor(224.2709, device='cuda:0')
episode: 149 training return: tensor(251.2551, device='cuda:0')
episode: 150 training return: tensor(307.3633, device='cuda:0')
episode: 151 training return: tensor(324.3328, device='cuda:0')
epoch: 38 test_true_pfm: 3345.428838910941 sim_pfm: 313.66831877651083
episode: 152 training return: tensor(343.0692, device='cuda:0')
episode: 153 training return: tensor(327.2046, device='cuda:0')
episode: 154 training return: tensor(-126.0876, device='cuda:0')
episode: 155 training return: tensor(329.7313, device='cuda:0')
epoch: 39 test_true_pfm: 3377.431648291309 sim_pfm: 377.70999074636103
episode: 156 training return: tensor(345.0597, device='cuda:0')
episode: 157 training return: tensor(297.9644, device='cuda:0')
episode: 158 training return: tensor(325.2318, device='cuda:0')
episode: 159 training return: tensor(166.6536, device='cuda:0')
epoch: 40 test_true_pfm: 2666.522406500609 sim_pfm: 330.71722564060474
episode: 160 training return: tensor(379.7817, device='cuda:0')
episode: 161 training return: tensor(317.2121, device='cuda:0')
episode: 162 training return: tensor(391.3319, device='cuda:0')
episode: 163 training return: tensor(354.5417, device='cuda:0')
epoch: 41 test_true_pfm: 3115.1811925672155 sim_pfm: 197.7910151988132
episode: 164 training return: tensor(352.0972, device='cuda:0')
episode: 165 training return: tensor(337.6462, device='cuda:0')
episode: 166 training return: tensor(400.1576, device='cuda:0')
episode: 167 training return: tensor(344.4771, device='cuda:0')
epoch: 42 test_true_pfm: 2873.3020606442874 sim_pfm: 266.6776910404442
episode: 168 training return: tensor(401.5484, device='cuda:0')
episode: 169 training return: tensor(344.2525, device='cuda:0')
episode: 170 training return: tensor(272.8045, device='cuda:0')
episode: 171 training return: tensor(256.9135, device='cuda:0')
epoch: 43 test_true_pfm: 3184.434182117995 sim_pfm: 318.6822158564367
episode: 172 training return: tensor(254.0976, device='cuda:0')
episode: 173 training return: tensor(-25.1829, device='cuda:0')
episode: 174 training return: tensor(294.2879, device='cuda:0')
episode: 175 training return: tensor(358.1743, device='cuda:0')
epoch: 44 test_true_pfm: 3253.478100069253 sim_pfm: 372.48706213746726
episode: 176 training return: tensor(310.3257, device='cuda:0')
episode: 177 training return: tensor(332.6935, device='cuda:0')
episode: 178 training return: tensor(386.3412, device='cuda:0')
episode: 179 training return: tensor(180.5660, device='cuda:0')
epoch: 45 test_true_pfm: 3361.303167465015 sim_pfm: 327.02294786983595
episode: 180 training return: tensor(352.1503, device='cuda:0')
episode: 181 training return: tensor(373.4886, device='cuda:0')
episode: 182 training return: tensor(395.3953, device='cuda:0')
episode: 183 training return: tensor(375.6453, device='cuda:0')
epoch: 46 test_true_pfm: 3309.2682894841123 sim_pfm: 338.38893293527263
episode: 184 training return: tensor(347.2041, device='cuda:0')
episode: 185 training return: tensor(247.0253, device='cuda:0')
episode: 186 training return: tensor(-209.6849, device='cuda:0')
episode: 187 training return: tensor(198.3357, device='cuda:0')
epoch: 47 test_true_pfm: 3361.7510832993435 sim_pfm: 336.33165118158405
episode: 188 training return: tensor(375.6292, device='cuda:0')
episode: 189 training return: tensor(178.9363, device='cuda:0')
episode: 190 training return: tensor(330.1676, device='cuda:0')
episode: 191 training return: tensor(353.1524, device='cuda:0')
epoch: 48 test_true_pfm: 2456.5776054128514 sim_pfm: 340.2917115424643
episode: 192 training return: tensor(399.3518, device='cuda:0')
episode: 193 training return: tensor(376.4038, device='cuda:0')
episode: 194 training return: tensor(294.6145, device='cuda:0')
episode: 195 training return: tensor(346.4049, device='cuda:0')
epoch: 49 test_true_pfm: 3408.6486631303687 sim_pfm: 378.2525785624263
episode: 196 training return: tensor(321.9546, device='cuda:0')
episode: 197 training return: tensor(370.1787, device='cuda:0')
episode: 198 training return: tensor(323.3888, device='cuda:0')
episode: 199 training return: tensor(325.1364, device='cuda:0')
epoch: 50 test_true_pfm: 3339.193871133982 sim_pfm: 322.4157536220737
episode: 200 training return: tensor(362.1807, device='cuda:0')
episode: 201 training return: tensor(326.0848, device='cuda:0')
episode: 202 training return: tensor(-128.4658, device='cuda:0')
episode: 203 training return: tensor(422.9484, device='cuda:0')
epoch: 51 test_true_pfm: 3347.6443277608055 sim_pfm: 366.95466534017277
episode: 204 training return: tensor(264.8818, device='cuda:0')
episode: 205 training return: tensor(336.8929, device='cuda:0')
episode: 206 training return: tensor(276.8806, device='cuda:0')
episode: 207 training return: tensor(354.6354, device='cuda:0')
epoch: 52 test_true_pfm: 3377.095273203628 sim_pfm: 326.7883785708691
episode: 208 training return: tensor(350.7429, device='cuda:0')
episode: 209 training return: tensor(410.0018, device='cuda:0')
episode: 210 training return: tensor(335.2810, device='cuda:0')
episode: 211 training return: tensor(387.5958, device='cuda:0')
epoch: 53 test_true_pfm: 3358.6611130038928 sim_pfm: 193.08927603326933
episode: 212 training return: tensor(366.3111, device='cuda:0')
episode: 213 training return: tensor(282.1320, device='cuda:0')
episode: 214 training return: tensor(332.5013, device='cuda:0')
episode: 215 training return: tensor(425.0212, device='cuda:0')
epoch: 54 test_true_pfm: 3316.3500516044464 sim_pfm: 297.73845273711294
episode: 216 training return: tensor(376.2344, device='cuda:0')
episode: 217 training return: tensor(362.1395, device='cuda:0')
episode: 218 training return: tensor(353.0938, device='cuda:0')
episode: 219 training return: tensor(296.7872, device='cuda:0')
epoch: 55 test_true_pfm: 3345.824532766393 sim_pfm: 348.4954335372604
episode: 220 training return: tensor(328.0918, device='cuda:0')
episode: 221 training return: tensor(363.4345, device='cuda:0')
episode: 222 training return: tensor(254.9724, device='cuda:0')
episode: 223 training return: tensor(410.3455, device='cuda:0')
epoch: 56 test_true_pfm: 3408.6486954023626 sim_pfm: 234.754026744505
episode: 224 training return: tensor(281.2096, device='cuda:0')
episode: 225 training return: tensor(336.2604, device='cuda:0')
episode: 226 training return: tensor(329.2722, device='cuda:0')
episode: 227 training return: tensor(391.9011, device='cuda:0')
epoch: 57 test_true_pfm: 3368.3218519998227 sim_pfm: 361.88769039364223
episode: 228 training return: tensor(363.4509, device='cuda:0')
episode: 229 training return: tensor(349.7325, device='cuda:0')
episode: 230 training return: tensor(334.2677, device='cuda:0')
episode: 231 training return: tensor(391.4647, device='cuda:0')
epoch: 58 test_true_pfm: 3438.2322745049382 sim_pfm: 424.35438100733637
episode: 232 training return: tensor(286.3458, device='cuda:0')
episode: 233 training return: tensor(464.5692, device='cuda:0')
episode: 234 training return: tensor(-127.8363, device='cuda:0')
episode: 235 training return: tensor(362.8740, device='cuda:0')
epoch: 59 test_true_pfm: 2969.251719958447 sim_pfm: 377.93847193349694
episode: 236 training return: tensor(352.4259, device='cuda:0')
episode: 237 training return: tensor(347.1063, device='cuda:0')
episode: 238 training return: tensor(245.5089, device='cuda:0')
episode: 239 training return: tensor(331.5344, device='cuda:0')
epoch: 60 test_true_pfm: 3407.376056943436 sim_pfm: 153.89080104523842
episode: 240 training return: tensor(319.6347, device='cuda:0')
episode: 241 training return: tensor(355.1377, device='cuda:0')
episode: 242 training return: tensor(317.6953, device='cuda:0')
episode: 243 training return: tensor(395.9660, device='cuda:0')
epoch: 61 test_true_pfm: 3361.1188838537396 sim_pfm: 241.04756228104816
episode: 244 training return: tensor(403.3034, device='cuda:0')
episode: 245 training return: tensor(296.9505, device='cuda:0')
episode: 246 training return: tensor(258.2085, device='cuda:0')
episode: 247 training return: tensor(275.8918, device='cuda:0')
epoch: 62 test_true_pfm: 3429.469695328318 sim_pfm: 353.5730887497775
episode: 248 training return: tensor(302.1746, device='cuda:0')
episode: 249 training return: tensor(340.4614, device='cuda:0')
episode: 250 training return: tensor(360.3783, device='cuda:0')
episode: 251 training return: tensor(364.4500, device='cuda:0')
epoch: 63 test_true_pfm: 3465.3985447107043 sim_pfm: 410.21442241221666
episode: 252 training return: tensor(289.6028, device='cuda:0')
episode: 253 training return: tensor(401.2168, device='cuda:0')
episode: 254 training return: tensor(365.2103, device='cuda:0')
episode: 255 training return: tensor(323.6981, device='cuda:0')
epoch: 64 test_true_pfm: 3384.8823858352553 sim_pfm: 172.96792554284912
episode: 256 training return: tensor(330.6482, device='cuda:0')
episode: 257 training return: tensor(344.0904, device='cuda:0')
episode: 258 training return: tensor(306.5560, device='cuda:0')
episode: 259 training return: tensor(350.0706, device='cuda:0')
epoch: 65 test_true_pfm: 3186.685937339999 sim_pfm: 269.7334977902162
episode: 260 training return: tensor(352.9590, device='cuda:0')
episode: 261 training return: tensor(330.5119, device='cuda:0')
episode: 262 training return: tensor(289.7698, device='cuda:0')
episode: 263 training return: tensor(352.8545, device='cuda:0')
epoch: 66 test_true_pfm: 3275.7371681621134 sim_pfm: 227.4463033648984
episode: 264 training return: tensor(338.8168, device='cuda:0')
episode: 265 training return: tensor(337.8235, device='cuda:0')
episode: 266 training return: tensor(396.4515, device='cuda:0')
episode: 267 training return: tensor(325.5617, device='cuda:0')
epoch: 67 test_true_pfm: 2945.1531321226 sim_pfm: 302.3937698248192
episode: 268 training return: tensor(248.1612, device='cuda:0')
episode: 269 training return: tensor(297.5092, device='cuda:0')
episode: 270 training return: tensor(269.9552, device='cuda:0')
episode: 271 training return: tensor(349.8479, device='cuda:0')
epoch: 68 test_true_pfm: 3155.557507119682 sim_pfm: 253.02612887697373
episode: 272 training return: tensor(419.8113, device='cuda:0')
episode: 273 training return: tensor(423.2014, device='cuda:0')
episode: 274 training return: tensor(347.1140, device='cuda:0')
episode: 275 training return: tensor(321.5196, device='cuda:0')
epoch: 69 test_true_pfm: 3418.962067169472 sim_pfm: 323.7694971622841
episode: 276 training return: tensor(323.0047, device='cuda:0')
episode: 277 training return: tensor(320.9055, device='cuda:0')
episode: 278 training return: tensor(371.8251, device='cuda:0')
episode: 279 training return: tensor(360.1463, device='cuda:0')
epoch: 70 test_true_pfm: 3081.307196674445 sim_pfm: 318.0649783024758
episode: 280 training return: tensor(369.5917, device='cuda:0')
episode: 281 training return: tensor(351.8015, device='cuda:0')
episode: 282 training return: tensor(405.5329, device='cuda:0')
episode: 283 training return: tensor(320.0097, device='cuda:0')
epoch: 71 test_true_pfm: 3025.67077321595 sim_pfm: 308.8930216251756
episode: 284 training return: tensor(348.3862, device='cuda:0')
episode: 285 training return: tensor(322.9986, device='cuda:0')
episode: 286 training return: tensor(357.6274, device='cuda:0')
episode: 287 training return: tensor(358.3802, device='cuda:0')
epoch: 72 test_true_pfm: 3397.133606333325 sim_pfm: 334.5615815144556
episode: 288 training return: tensor(361.7043, device='cuda:0')
episode: 289 training return: tensor(406.6341, device='cuda:0')
episode: 290 training return: tensor(438.9808, device='cuda:0')
episode: 291 training return: tensor(331.8238, device='cuda:0')
epoch: 73 test_true_pfm: 3250.622697812543 sim_pfm: 211.16508692300218
episode: 292 training return: tensor(342.3613, device='cuda:0')
episode: 293 training return: tensor(338.3265, device='cuda:0')
episode: 294 training return: tensor(394.7502, device='cuda:0')
episode: 295 training return: tensor(232.9591, device='cuda:0')
epoch: 74 test_true_pfm: 3395.8956834625765 sim_pfm: 366.11556781593634
episode: 296 training return: tensor(365.6168, device='cuda:0')
episode: 297 training return: tensor(369.1342, device='cuda:0')
episode: 298 training return: tensor(330.4195, device='cuda:0')
episode: 299 training return: tensor(291.4897, device='cuda:0')
epoch: 75 test_true_pfm: 3381.913424426726 sim_pfm: 316.5426680040352
episode: 300 training return: tensor(379.4265, device='cuda:0')
episode: 301 training return: tensor(395.3328, device='cuda:0')
episode: 302 training return: tensor(355.3798, device='cuda:0')
episode: 303 training return: tensor(343.5753, device='cuda:0')
epoch: 76 test_true_pfm: 3229.7712780898037 sim_pfm: 230.84258128035194
episode: 304 training return: tensor(351.4613, device='cuda:0')
episode: 305 training return: tensor(412.7170, device='cuda:0')
episode: 306 training return: tensor(319.1920, device='cuda:0')
episode: 307 training return: tensor(366.1975, device='cuda:0')
epoch: 77 test_true_pfm: 3486.553722994031 sim_pfm: 195.65398266360475
episode: 308 training return: tensor(392.7283, device='cuda:0')
episode: 309 training return: tensor(337.4487, device='cuda:0')
episode: 310 training return: tensor(362.5712, device='cuda:0')
episode: 311 training return: tensor(320.9220, device='cuda:0')
epoch: 78 test_true_pfm: 3400.8961522723353 sim_pfm: 356.4639511175337
episode: 312 training return: tensor(126.0874, device='cuda:0')
episode: 313 training return: tensor(296.9257, device='cuda:0')
episode: 314 training return: tensor(394.9337, device='cuda:0')
episode: 315 training return: tensor(354.7590, device='cuda:0')
epoch: 79 test_true_pfm: 2896.270991929565 sim_pfm: 376.5249738069251
episode: 316 training return: tensor(330.8947, device='cuda:0')
episode: 317 training return: tensor(224.0432, device='cuda:0')
episode: 318 training return: tensor(327.0609, device='cuda:0')
episode: 319 training return: tensor(385.5194, device='cuda:0')
epoch: 80 test_true_pfm: 3364.2242074564897 sim_pfm: 363.72810513935593
episode: 320 training return: tensor(307.9756, device='cuda:0')
episode: 321 training return: tensor(325.0301, device='cuda:0')
episode: 322 training return: tensor(374.6221, device='cuda:0')
episode: 323 training return: tensor(323.5822, device='cuda:0')
epoch: 81 test_true_pfm: 3183.6640021972653 sim_pfm: 370.6007807214143
episode: 324 training return: tensor(293.7168, device='cuda:0')
episode: 325 training return: tensor(282.2661, device='cuda:0')
episode: 326 training return: tensor(298.7292, device='cuda:0')
episode: 327 training return: tensor(396.6372, device='cuda:0')
epoch: 82 test_true_pfm: 3284.333587326526 sim_pfm: 284.96892071852926
episode: 328 training return: tensor(331.5843, device='cuda:0')
episode: 329 training return: tensor(372.5342, device='cuda:0')
episode: 330 training return: tensor(394.5941, device='cuda:0')
episode: 331 training return: tensor(387.5067, device='cuda:0')
epoch: 83 test_true_pfm: 3006.276036763029 sim_pfm: 318.04214837418596
episode: 332 training return: tensor(371.8764, device='cuda:0')
episode: 333 training return: tensor(302.2362, device='cuda:0')
episode: 334 training return: tensor(349.3930, device='cuda:0')
episode: 335 training return: tensor(359.7219, device='cuda:0')
epoch: 84 test_true_pfm: 3495.6035114999117 sim_pfm: 297.8753917887322
episode: 336 training return: tensor(368.8583, device='cuda:0')
episode: 337 training return: tensor(268.3365, device='cuda:0')
episode: 338 training return: tensor(238.1632, device='cuda:0')
episode: 339 training return: tensor(381.0272, device='cuda:0')
epoch: 85 test_true_pfm: 3441.82033409633 sim_pfm: 159.26963448081128
episode: 340 training return: tensor(155.4360, device='cuda:0')
episode: 341 training return: tensor(327.4787, device='cuda:0')
episode: 342 training return: tensor(358.0005, device='cuda:0')
episode: 343 training return: tensor(366.7488, device='cuda:0')
epoch: 86 test_true_pfm: 3380.7703978722548 sim_pfm: 356.3330222433821
episode: 344 training return: tensor(352.4540, device='cuda:0')
episode: 345 training return: tensor(384.4946, device='cuda:0')
episode: 346 training return: tensor(296.0014, device='cuda:0')
episode: 347 training return: tensor(399.3281, device='cuda:0')
epoch: 87 test_true_pfm: 3225.504198305429 sim_pfm: 387.1875228053735
episode: 348 training return: tensor(324.6866, device='cuda:0')
episode: 349 training return: tensor(393.5701, device='cuda:0')
episode: 350 training return: tensor(284.6459, device='cuda:0')
episode: 351 training return: tensor(362.1791, device='cuda:0')
epoch: 88 test_true_pfm: 3475.3419102651874 sim_pfm: 311.8344564031965
episode: 352 training return: tensor(315.2496, device='cuda:0')
episode: 353 training return: tensor(310.3051, device='cuda:0')
episode: 354 training return: tensor(287.0933, device='cuda:0')
episode: 355 training return: tensor(338.8958, device='cuda:0')
epoch: 89 test_true_pfm: 3400.790652993716 sim_pfm: 394.1092973189079
episode: 356 training return: tensor(351.6275, device='cuda:0')
episode: 357 training return: tensor(368.2057, device='cuda:0')
episode: 358 training return: tensor(377.0673, device='cuda:0')
episode: 359 training return: tensor(304.9151, device='cuda:0')
epoch: 90 test_true_pfm: 3334.8504770699874 sim_pfm: 264.31353139699786
episode: 360 training return: tensor(302.9593, device='cuda:0')
episode: 361 training return: tensor(366.4531, device='cuda:0')
episode: 362 training return: tensor(294.3464, device='cuda:0')
episode: 363 training return: tensor(334.9581, device='cuda:0')
epoch: 91 test_true_pfm: 3142.987462792789 sim_pfm: 311.61532607121626
episode: 364 training return: tensor(321.6379, device='cuda:0')
episode: 365 training return: tensor(394.1380, device='cuda:0')
episode: 366 training return: tensor(254.2649, device='cuda:0')
episode: 367 training return: tensor(394.6900, device='cuda:0')
epoch: 92 test_true_pfm: 3406.633212626139 sim_pfm: 314.29169698420446
episode: 368 training return: tensor(364.9078, device='cuda:0')
episode: 369 training return: tensor(-99.9774, device='cuda:0')
episode: 370 training return: tensor(327.0827, device='cuda:0')
episode: 371 training return: tensor(282.6541, device='cuda:0')
epoch: 93 test_true_pfm: 3339.1174787662435 sim_pfm: 200.59213914945335
episode: 372 training return: tensor(389.6060, device='cuda:0')
episode: 373 training return: tensor(249.5130, device='cuda:0')
episode: 374 training return: tensor(324.6672, device='cuda:0')
episode: 375 training return: tensor(249.1530, device='cuda:0')
epoch: 94 test_true_pfm: 3304.5713139090526 sim_pfm: 409.04577948137495
episode: 376 training return: tensor(348.4040, device='cuda:0')
episode: 377 training return: tensor(336.6721, device='cuda:0')
episode: 378 training return: tensor(449.7422, device='cuda:0')
episode: 379 training return: tensor(424.2920, device='cuda:0')
epoch: 95 test_true_pfm: 3342.5273635811386 sim_pfm: 334.57943319860107
episode: 380 training return: tensor(290.8498, device='cuda:0')
episode: 381 training return: tensor(415.3481, device='cuda:0')
episode: 382 training return: tensor(364.7372, device='cuda:0')
episode: 383 training return: tensor(330.7634, device='cuda:0')
epoch: 96 test_true_pfm: 3467.9553977922237 sim_pfm: 351.2446803989781
episode: 384 training return: tensor(362.2422, device='cuda:0')
episode: 385 training return: tensor(334.2726, device='cuda:0')
episode: 386 training return: tensor(341.7428, device='cuda:0')
episode: 387 training return: tensor(339.7182, device='cuda:0')
epoch: 97 test_true_pfm: 3419.7995690038447 sim_pfm: 329.5163909604501
episode: 388 training return: tensor(402.6374, device='cuda:0')
episode: 389 training return: tensor(380.6640, device='cuda:0')
episode: 390 training return: tensor(340.2758, device='cuda:0')
episode: 391 training return: tensor(381.4253, device='cuda:0')
epoch: 98 test_true_pfm: 2901.1669622448303 sim_pfm: 337.2029165048152
episode: 392 training return: tensor(141.7177, device='cuda:0')
episode: 393 training return: tensor(373.9731, device='cuda:0')
episode: 394 training return: tensor(343.2427, device='cuda:0')
episode: 395 training return: tensor(328.8884, device='cuda:0')
epoch: 99 test_true_pfm: 3504.855478126299 sim_pfm: 336.639899343194
episode: 396 training return: tensor(441.5691, device='cuda:0')
episode: 397 training return: tensor(456.8026, device='cuda:0')
episode: 398 training return: tensor(348.5617, device='cuda:0')
episode: 399 training return: tensor(415.2189, device='cuda:0')
epoch: 100 test_true_pfm: 3369.9711541912857 sim_pfm: 382.9511177893534
episode: 400 training return: tensor(323.1719, device='cuda:0')
episode: 401 training return: tensor(327.1891, device='cuda:0')
episode: 402 training return: tensor(332.6798, device='cuda:0')
episode: 403 training return: tensor(398.5632, device='cuda:0')
epoch: 101 test_true_pfm: 3429.7661080614894 sim_pfm: 359.9093379327096
episode: 404 training return: tensor(314.2652, device='cuda:0')
episode: 405 training return: tensor(364.2587, device='cuda:0')
episode: 406 training return: tensor(341.9829, device='cuda:0')
episode: 407 training return: tensor(354.7999, device='cuda:0')
epoch: 102 test_true_pfm: 3411.486716733005 sim_pfm: 392.44993163765565
episode: 408 training return: tensor(381.9459, device='cuda:0')
episode: 409 training return: tensor(379.8155, device='cuda:0')
episode: 410 training return: tensor(375.8376, device='cuda:0')
episode: 411 training return: tensor(281.9110, device='cuda:0')
epoch: 103 test_true_pfm: 3485.8965718607665 sim_pfm: 406.3263101458239
episode: 412 training return: tensor(371.3842, device='cuda:0')
episode: 413 training return: tensor(345.2390, device='cuda:0')
episode: 414 training return: tensor(319.4381, device='cuda:0')
episode: 415 training return: tensor(454.2527, device='cuda:0')
epoch: 104 test_true_pfm: 3408.7720469744095 sim_pfm: 247.56061771329647
episode: 416 training return: tensor(438.8606, device='cuda:0')
episode: 417 training return: tensor(320.9402, device='cuda:0')
episode: 418 training return: tensor(383.7235, device='cuda:0')
episode: 419 training return: tensor(279.0177, device='cuda:0')
epoch: 105 test_true_pfm: 3339.9189348466957 sim_pfm: 424.91120247991057
episode: 420 training return: tensor(281.8144, device='cuda:0')
episode: 421 training return: tensor(368.9636, device='cuda:0')
episode: 422 training return: tensor(308.8762, device='cuda:0')
episode: 423 training return: tensor(379.6870, device='cuda:0')
epoch: 106 test_true_pfm: 3133.228768102617 sim_pfm: 375.1591192249325
episode: 424 training return: tensor(474.9876, device='cuda:0')
episode: 425 training return: tensor(432.9908, device='cuda:0')
episode: 426 training return: tensor(333.9625, device='cuda:0')
episode: 427 training return: tensor(337.8220, device='cuda:0')
epoch: 107 test_true_pfm: 3073.7245602751887 sim_pfm: 423.3386723306806
episode: 428 training return: tensor(350.4339, device='cuda:0')
episode: 429 training return: tensor(409.4978, device='cuda:0')
episode: 430 training return: tensor(447.2182, device='cuda:0')
episode: 431 training return: tensor(376.4420, device='cuda:0')
epoch: 108 test_true_pfm: 3170.8757188923028 sim_pfm: 405.3732184575638
episode: 432 training return: tensor(430.6320, device='cuda:0')
episode: 433 training return: tensor(360.1462, device='cuda:0')
episode: 434 training return: tensor(382.6614, device='cuda:0')
episode: 435 training return: tensor(348.9637, device='cuda:0')
epoch: 109 test_true_pfm: 3361.1561914007502 sim_pfm: 227.99538126975918
episode: 436 training return: tensor(313.7061, device='cuda:0')
episode: 437 training return: tensor(363.3652, device='cuda:0')
episode: 438 training return: tensor(370.0137, device='cuda:0')
episode: 439 training return: tensor(314.6359, device='cuda:0')
epoch: 110 test_true_pfm: 3321.5281579492453 sim_pfm: 361.26421848664177
episode: 440 training return: tensor(137.7925, device='cuda:0')
episode: 441 training return: tensor(294.6116, device='cuda:0')
episode: 442 training return: tensor(333.6326, device='cuda:0')
episode: 443 training return: tensor(366.4651, device='cuda:0')
epoch: 111 test_true_pfm: 3362.9249289486 sim_pfm: 371.4149068535965
episode: 444 training return: tensor(301.2926, device='cuda:0')
episode: 445 training return: tensor(373.3009, device='cuda:0')
episode: 446 training return: tensor(410.0359, device='cuda:0')
episode: 447 training return: tensor(386.1167, device='cuda:0')
epoch: 112 test_true_pfm: 3106.178556204457 sim_pfm: 368.6341507906618
episode: 448 training return: tensor(183.6399, device='cuda:0')
episode: 449 training return: tensor(360.0311, device='cuda:0')
episode: 450 training return: tensor(373.9704, device='cuda:0')
episode: 451 training return: tensor(341.1798, device='cuda:0')
epoch: 113 test_true_pfm: 3365.0627440355165 sim_pfm: 397.9081848144997
episode: 452 training return: tensor(326.3759, device='cuda:0')
episode: 453 training return: tensor(189.5650, device='cuda:0')
episode: 454 training return: tensor(351.8438, device='cuda:0')
episode: 455 training return: tensor(387.9369, device='cuda:0')
epoch: 114 test_true_pfm: 3018.7418038330366 sim_pfm: 393.54620020253543
episode: 456 training return: tensor(375.7095, device='cuda:0')
episode: 457 training return: tensor(360.6349, device='cuda:0')
episode: 458 training return: tensor(377.3600, device='cuda:0')
episode: 459 training return: tensor(321.4727, device='cuda:0')
epoch: 115 test_true_pfm: 2406.506720437563 sim_pfm: 398.82377574977
episode: 460 training return: tensor(415.6799, device='cuda:0')
episode: 461 training return: tensor(-147.2563, device='cuda:0')
episode: 462 training return: tensor(353.9821, device='cuda:0')
episode: 463 training return: tensor(318.9354, device='cuda:0')
epoch: 116 test_true_pfm: 3083.361055215522 sim_pfm: 341.3344203719559
episode: 464 training return: tensor(345.6302, device='cuda:0')
episode: 465 training return: tensor(62.6237, device='cuda:0')
episode: 466 training return: tensor(382.9742, device='cuda:0')
episode: 467 training return: tensor(327.5695, device='cuda:0')
epoch: 117 test_true_pfm: 3456.1519842152447 sim_pfm: 335.01017384251463
episode: 468 training return: tensor(275.7986, device='cuda:0')
episode: 469 training return: tensor(372.5235, device='cuda:0')
episode: 470 training return: tensor(262.2000, device='cuda:0')
episode: 471 training return: tensor(339.6193, device='cuda:0')
epoch: 118 test_true_pfm: 3443.5298360922848 sim_pfm: 418.5167677730108
episode: 472 training return: tensor(369.5087, device='cuda:0')
episode: 473 training return: tensor(415.0888, device='cuda:0')
episode: 474 training return: tensor(338.4062, device='cuda:0')
episode: 475 training return: tensor(358.0270, device='cuda:0')
epoch: 119 test_true_pfm: 3475.1930831758636 sim_pfm: 399.1410083178974
episode: 476 training return: tensor(317.1104, device='cuda:0')
episode: 477 training return: tensor(360.7022, device='cuda:0')
episode: 478 training return: tensor(334.8740, device='cuda:0')
episode: 479 training return: tensor(15.0864, device='cuda:0')
epoch: 120 test_true_pfm: 3366.2896053280633 sim_pfm: 377.893679411102
episode: 480 training return: tensor(344.4388, device='cuda:0')
episode: 481 training return: tensor(411.9249, device='cuda:0')
episode: 482 training return: tensor(350.2257, device='cuda:0')
episode: 483 training return: tensor(197.8336, device='cuda:0')
epoch: 121 test_true_pfm: 3017.499561729688 sim_pfm: 373.28812420614605
episode: 484 training return: tensor(396.8084, device='cuda:0')
episode: 485 training return: tensor(343.8658, device='cuda:0')
episode: 486 training return: tensor(335.7573, device='cuda:0')
episode: 487 training return: tensor(315.3947, device='cuda:0')
epoch: 122 test_true_pfm: 3440.465333489026 sim_pfm: 152.06748216971755
episode: 488 training return: tensor(351.4325, device='cuda:0')
episode: 489 training return: tensor(376.7789, device='cuda:0')
episode: 490 training return: tensor(356.9691, device='cuda:0')
episode: 491 training return: tensor(426.0610, device='cuda:0')
epoch: 123 test_true_pfm: 3368.60771302883 sim_pfm: 326.63718274221173
episode: 492 training return: tensor(389.4369, device='cuda:0')
episode: 493 training return: tensor(301.2112, device='cuda:0')
episode: 494 training return: tensor(347.5905, device='cuda:0')
episode: 495 training return: tensor(251.6997, device='cuda:0')
epoch: 124 test_true_pfm: 3489.021923272507 sim_pfm: 185.82220805102648
episode: 496 training return: tensor(349.0710, device='cuda:0')
episode: 497 training return: tensor(381.0309, device='cuda:0')
episode: 498 training return: tensor(286.6151, device='cuda:0')
episode: 499 training return: tensor(331.2809, device='cuda:0')
epoch: 125 test_true_pfm: 3360.748671577474 sim_pfm: 384.81467052628676
episode: 500 training return: tensor(371.6074, device='cuda:0')
episode: 501 training return: tensor(366.3018, device='cuda:0')
episode: 502 training return: tensor(318.1987, device='cuda:0')
episode: 503 training return: tensor(291.9806, device='cuda:0')
epoch: 126 test_true_pfm: 3388.131509019496 sim_pfm: 372.76569266089547
episode: 504 training return: tensor(349.7390, device='cuda:0')
episode: 505 training return: tensor(342.1253, device='cuda:0')
episode: 506 training return: tensor(352.1083, device='cuda:0')
episode: 507 training return: tensor(363.9760, device='cuda:0')
epoch: 127 test_true_pfm: 3472.9145554772736 sim_pfm: 284.9361607509297
episode: 508 training return: tensor(327.4561, device='cuda:0')
episode: 509 training return: tensor(360.8979, device='cuda:0')
episode: 510 training return: tensor(393.4507, device='cuda:0')
episode: 511 training return: tensor(2.7025, device='cuda:0')
epoch: 128 test_true_pfm: 3422.7435796145855 sim_pfm: 363.5622790423222
episode: 512 training return: tensor(396.9115, device='cuda:0')
episode: 513 training return: tensor(367.1735, device='cuda:0')
episode: 514 training return: tensor(411.9047, device='cuda:0')
episode: 515 training return: tensor(318.9511, device='cuda:0')
epoch: 129 test_true_pfm: 3411.627712075601 sim_pfm: 440.0906779957101
episode: 516 training return: tensor(319.2720, device='cuda:0')
episode: 517 training return: tensor(346.3564, device='cuda:0')
episode: 518 training return: tensor(335.4105, device='cuda:0')
episode: 519 training return: tensor(349.7939, device='cuda:0')
epoch: 130 test_true_pfm: 3452.2879879266425 sim_pfm: 319.67528573216987
episode: 520 training return: tensor(371.9331, device='cuda:0')
episode: 521 training return: tensor(363.8802, device='cuda:0')
episode: 522 training return: tensor(337.1992, device='cuda:0')
episode: 523 training return: tensor(384.0330, device='cuda:0')
epoch: 131 test_true_pfm: 3534.0330241661286 sim_pfm: 432.1358925686606
episode: 524 training return: tensor(381.8838, device='cuda:0')
episode: 525 training return: tensor(411.8021, device='cuda:0')
episode: 526 training return: tensor(390.3100, device='cuda:0')
episode: 527 training return: tensor(302.4745, device='cuda:0')
epoch: 132 test_true_pfm: 3412.099595273925 sim_pfm: 382.1240401025473
episode: 528 training return: tensor(240.0811, device='cuda:0')
episode: 529 training return: tensor(423.0609, device='cuda:0')
episode: 530 training return: tensor(364.2672, device='cuda:0')
episode: 531 training return: tensor(337.5921, device='cuda:0')
epoch: 133 test_true_pfm: 3486.0307437745973 sim_pfm: 410.3106600632018
episode: 532 training return: tensor(310.6234, device='cuda:0')
episode: 533 training return: tensor(410.7398, device='cuda:0')
episode: 534 training return: tensor(328.4702, device='cuda:0')
episode: 535 training return: tensor(316.0844, device='cuda:0')
epoch: 134 test_true_pfm: 3488.5384637856146 sim_pfm: 398.48603381581296
episode: 536 training return: tensor(195.7350, device='cuda:0')
episode: 537 training return: tensor(329.0282, device='cuda:0')
episode: 538 training return: tensor(354.5050, device='cuda:0')
episode: 539 training return: tensor(388.7068, device='cuda:0')
epoch: 135 test_true_pfm: 3357.1633733463423 sim_pfm: 225.0508570036618
episode: 540 training return: tensor(277.1768, device='cuda:0')
episode: 541 training return: tensor(349.4405, device='cuda:0')
episode: 542 training return: tensor(-321.6856, device='cuda:0')
episode: 543 training return: tensor(350.4897, device='cuda:0')
epoch: 136 test_true_pfm: 3483.5401486109863 sim_pfm: 352.6387821136935
episode: 544 training return: tensor(377.1503, device='cuda:0')
episode: 545 training return: tensor(336.2244, device='cuda:0')
episode: 546 training return: tensor(407.1471, device='cuda:0')
episode: 547 training return: tensor(362.1567, device='cuda:0')
epoch: 137 test_true_pfm: 3406.4906597102286 sim_pfm: 385.1279350321274
episode: 548 training return: tensor(247.8206, device='cuda:0')
episode: 549 training return: tensor(396.5809, device='cuda:0')
episode: 550 training return: tensor(320.2777, device='cuda:0')
episode: 551 training return: tensor(378.2057, device='cuda:0')
epoch: 138 test_true_pfm: 3411.504312084055 sim_pfm: 364.57104144253145
episode: 552 training return: tensor(332.5194, device='cuda:0')
episode: 553 training return: tensor(319.0879, device='cuda:0')
episode: 554 training return: tensor(362.6974, device='cuda:0')
episode: 555 training return: tensor(286.1420, device='cuda:0')
epoch: 139 test_true_pfm: 3438.3377371782626 sim_pfm: 392.4476228361891
episode: 556 training return: tensor(212.3353, device='cuda:0')
episode: 557 training return: tensor(358.5901, device='cuda:0')
episode: 558 training return: tensor(349.1499, device='cuda:0')
episode: 559 training return: tensor(242.3869, device='cuda:0')
epoch: 140 test_true_pfm: 3432.1971255856806 sim_pfm: 340.56661929298815
episode: 560 training return: tensor(344.1552, device='cuda:0')
episode: 561 training return: tensor(332.6938, device='cuda:0')
episode: 562 training return: tensor(391.2770, device='cuda:0')
episode: 563 training return: tensor(358.8171, device='cuda:0')
epoch: 141 test_true_pfm: 3448.5134448956956 sim_pfm: 362.12901821288204
episode: 564 training return: tensor(374.6045, device='cuda:0')
episode: 565 training return: tensor(388.8105, device='cuda:0')
episode: 566 training return: tensor(405.2964, device='cuda:0')
episode: 567 training return: tensor(357.1781, device='cuda:0')
epoch: 142 test_true_pfm: 3339.4935314167105 sim_pfm: 318.1516469163665
episode: 568 training return: tensor(437.6453, device='cuda:0')
episode: 569 training return: tensor(394.0852, device='cuda:0')
episode: 570 training return: tensor(409.2657, device='cuda:0')
episode: 571 training return: tensor(349.0333, device='cuda:0')
epoch: 143 test_true_pfm: 3461.179114807966 sim_pfm: 370.77387446234934
episode: 572 training return: tensor(306.4615, device='cuda:0')
episode: 573 training return: tensor(358.6781, device='cuda:0')
episode: 574 training return: tensor(387.4714, device='cuda:0')
episode: 575 training return: tensor(270.4518, device='cuda:0')
epoch: 144 test_true_pfm: 3477.8858903067908 sim_pfm: 387.25545507339604
episode: 576 training return: tensor(361.4290, device='cuda:0')
episode: 577 training return: tensor(443.4748, device='cuda:0')
episode: 578 training return: tensor(339.1376, device='cuda:0')
episode: 579 training return: tensor(298.9026, device='cuda:0')
epoch: 145 test_true_pfm: 3416.454782761442 sim_pfm: 381.8185653678859
episode: 580 training return: tensor(307.1550, device='cuda:0')
episode: 581 training return: tensor(303.8975, device='cuda:0')
episode: 582 training return: tensor(325.3808, device='cuda:0')
episode: 583 training return: tensor(331.1706, device='cuda:0')
epoch: 146 test_true_pfm: 3409.3396412779002 sim_pfm: 375.72528707368957
episode: 584 training return: tensor(318.9362, device='cuda:0')
episode: 585 training return: tensor(306.9310, device='cuda:0')
episode: 586 training return: tensor(359.3048, device='cuda:0')
episode: 587 training return: tensor(382.0275, device='cuda:0')
epoch: 147 test_true_pfm: 3458.724616595958 sim_pfm: 367.8813037402385
episode: 588 training return: tensor(344.0956, device='cuda:0')
episode: 589 training return: tensor(327.7188, device='cuda:0')
episode: 590 training return: tensor(323.2513, device='cuda:0')
episode: 591 training return: tensor(378.2727, device='cuda:0')
epoch: 148 test_true_pfm: 3194.1560088805786 sim_pfm: 351.0522123766714
episode: 592 training return: tensor(338.5495, device='cuda:0')
episode: 593 training return: tensor(378.1690, device='cuda:0')
episode: 594 training return: tensor(352.1754, device='cuda:0')
episode: 595 training return: tensor(295.5693, device='cuda:0')
epoch: 149 test_true_pfm: 2872.793778681442 sim_pfm: 367.93008963697747
episode: 596 training return: tensor(353.3927, device='cuda:0')
episode: 597 training return: tensor(357.7030, device='cuda:0')
episode: 598 training return: tensor(307.4429, device='cuda:0')
episode: 599 training return: tensor(317.6360, device='cuda:0')
epoch: 150 test_true_pfm: 3268.333020880556 sim_pfm: 314.3318774622555
