['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '2']
epoch: 0 training_loss 0.22653718411922455 test_loss: 0.15318940877914428
epoch: 1 training_loss 0.1496736357361078 test_loss: 0.13621305227279662
epoch: 2 training_loss 0.14027644485235213 test_loss: 0.13104215860366822
epoch: 3 training_loss 0.11690079063177108 test_loss: 0.13383933305740356
epoch: 4 training_loss 0.12415229458361864 test_loss: 0.1248063325881958
epoch: 5 training_loss 0.11934687916189432 test_loss: 0.1286209225654602
epoch: 6 training_loss 0.11094046480953694 test_loss: 0.12047454118728637
epoch: 7 training_loss 0.10831921119242907 test_loss: 0.11419148445129394
epoch: 8 training_loss 0.10676362777128816 test_loss: 0.11562409400939941
epoch: 9 training_loss 0.10655530635267496 test_loss: 0.10981162786483764
epoch: 10 training_loss 0.09720599804073572 test_loss: 0.12288120985031128
epoch: 11 training_loss 0.10341580538079143 test_loss: 0.09806171655654908
epoch: 12 training_loss 0.10711553853005171 test_loss: 0.1055000901222229
epoch: 13 training_loss 0.10900589998811483 test_loss: 0.11010113954544068
epoch: 14 training_loss 0.0964355306327343 test_loss: 0.11145360469818115
epoch: 15 training_loss 0.10057151388376952 test_loss: 0.09472256302833557
epoch: 16 training_loss 0.10052628152072429 test_loss: 0.11008172035217285
epoch: 17 training_loss 0.10308050733059644 test_loss: 0.1067996621131897
epoch: 18 training_loss 0.10146482199430466 test_loss: 0.10486671924591065
epoch: 19 training_loss 0.09783559618517756 test_loss: 0.11250321865081787
epoch: 20 training_loss 0.10360608167946339 test_loss: 0.13255268335342407
epoch: 21 training_loss 0.1053204714693129 test_loss: 0.10311163663864135
epoch: 22 training_loss 0.09878295576199889 test_loss: 0.10842431783676147
epoch: 23 training_loss 0.09905080379918217 test_loss: 0.09165716767311097
epoch: 24 training_loss 0.09448908999562264 test_loss: 0.09516640901565551
epoch: 25 training_loss 0.09772194374352694 test_loss: 0.09756438732147217
epoch: 26 training_loss 0.1038418335095048 test_loss: 0.09826158285140991
epoch: 27 training_loss 0.09585163185372948 test_loss: 0.10963990688323974
epoch: 28 training_loss 0.09429744772613048 test_loss: 0.10536168813705445
epoch: 29 training_loss 0.09518490878865123 test_loss: 0.09235091805458069
epoch: 30 training_loss 0.09914600413292646 test_loss: 0.0913703441619873
epoch: 31 training_loss 0.09408140245825053 test_loss: 0.09924579858779907
epoch: 32 training_loss 0.09188059486448764 test_loss: 0.09877467155456543
epoch: 33 training_loss 0.09408859677612781 test_loss: 0.08347652554512024
epoch: 34 training_loss 0.10011657012626529 test_loss: 0.09185659289360046
epoch: 35 training_loss 0.09327573232352733 test_loss: 0.09771186113357544
epoch: 36 training_loss 0.10183232087641954 test_loss: 0.0977925956249237
epoch: 37 training_loss 0.09937239350751043 test_loss: 0.09950479865074158
epoch: 38 training_loss 0.0942335930094123 test_loss: 0.09592841863632202
epoch: 39 training_loss 0.0936649139970541 test_loss: 0.08334308862686157
epoch: 40 training_loss 0.09223837910220027 test_loss: 0.0959922730922699
epoch: 41 training_loss 0.09823290895670653 test_loss: 0.10707473754882812
epoch: 42 training_loss 0.09451086014509201 test_loss: 0.1007469654083252
epoch: 43 training_loss 0.08956820188090205 test_loss: 0.08744359612464905
epoch: 44 training_loss 0.0921761111356318 test_loss: 0.11088699102401733
epoch: 45 training_loss 0.09675655771046877 test_loss: 0.09405922293663024
epoch: 46 training_loss 0.09616358330473304 test_loss: 0.08789937496185303
epoch: 47 training_loss 0.09466089194640517 test_loss: 0.10205036401748657
epoch: 48 training_loss 0.08684405785053968 test_loss: 0.095077645778656
epoch: 49 training_loss 0.09294640764594078 test_loss: 0.09868963360786438
epoch: 50 training_loss 0.08558154135942458 test_loss: 0.09472286701202393
epoch: 51 training_loss 0.09331535287201405 test_loss: 0.08890708088874817
epoch: 52 training_loss 0.09574690736830234 test_loss: 0.09745817184448242
epoch: 53 training_loss 0.09152436841279268 test_loss: 0.10343660116195678
epoch: 54 training_loss 0.09148698415607213 test_loss: 0.09436730146408082
epoch: 55 training_loss 0.10363908670842648 test_loss: 0.09283823370933533
epoch: 56 training_loss 0.09246072448790073 test_loss: 0.0968985140323639
epoch: 57 training_loss 0.09134294172748923 test_loss: 0.09024863243103028
epoch: 58 training_loss 0.08932728428393602 test_loss: 0.0933027446269989
epoch: 59 training_loss 0.09795830616727472 test_loss: 0.09445896744728088
epoch: 60 training_loss 0.09549890965223312 test_loss: 0.09042248725891114
epoch: 61 training_loss 0.0984167986921966 test_loss: 0.10086761713027954
epoch: 62 training_loss 0.09106797839514912 test_loss: 0.08787777423858642
epoch: 63 training_loss 0.09317707143723965 test_loss: 0.09558442234992981
epoch: 64 training_loss 0.09525079783052207 test_loss: 0.10126616954803466
epoch: 65 training_loss 0.08795933432877064 test_loss: 0.09909485578536988
epoch: 66 training_loss 0.08570709962397814 test_loss: 0.10701795816421508
epoch: 67 training_loss 0.08802920285612345 test_loss: 0.11446245908737182
epoch: 68 training_loss 0.09354987625032664 test_loss: 0.08217718601226806
epoch: 69 training_loss 0.08895121242851019 test_loss: 0.10222901105880737
epoch: 70 training_loss 0.08573138305917383 test_loss: 0.09163285493850708
epoch: 71 training_loss 0.08822286464273929 test_loss: 0.08829957246780396
epoch: 72 training_loss 0.09559638090431691 test_loss: 0.08312878012657166
epoch: 73 training_loss 0.08979294091463089 test_loss: 0.0975903868675232
epoch: 74 training_loss 0.08836378438398242 test_loss: 0.09703094363212586
epoch: 75 training_loss 0.09790412835776806 test_loss: 0.10654960870742798
epoch: 76 training_loss 0.08441325329244137 test_loss: 0.0929956316947937
epoch: 77 training_loss 0.08714449763298035 test_loss: 0.09912187457084656
epoch: 78 training_loss 0.08602154349908232 test_loss: 0.09622047543525696
epoch: 79 training_loss 0.09518530378118158 test_loss: 0.0930864930152893
epoch: 80 training_loss 0.09280196491628885 test_loss: 0.11316581964492797
epoch: 81 training_loss 0.0908095109835267 test_loss: 0.09270250797271729
epoch: 82 training_loss 0.09073457099497319 test_loss: 0.0942188024520874
epoch: 83 training_loss 0.09160401914268732 test_loss: 0.09165153503417969
epoch: 84 training_loss 0.09251449666917325 test_loss: 0.0849030077457428
epoch: 85 training_loss 0.091493269354105 test_loss: 0.08123064637184144
epoch: 86 training_loss 0.09727360744029284 test_loss: 0.08186588883399963
epoch: 87 training_loss 0.09104714520275593 test_loss: 0.09078602194786071
epoch: 88 training_loss 0.09404255480505526 test_loss: 0.08453803658485412
epoch: 89 training_loss 0.09695038713514804 test_loss: 0.09581970572471618
epoch: 90 training_loss 0.09437979931011796 test_loss: 0.08298860788345337
epoch: 91 training_loss 0.08875288136303425 test_loss: 0.08500015139579772
epoch: 92 training_loss 0.08949594551697374 test_loss: 0.10596370697021484
epoch: 93 training_loss 0.08497715130448341 test_loss: 0.1013482928276062
epoch: 94 training_loss 0.08395718785002827 test_loss: 0.08685927391052246
epoch: 95 training_loss 0.09032263725996018 test_loss: 0.08920100331306458
epoch: 96 training_loss 0.08937575645744801 test_loss: 0.09153717756271362
epoch: 97 training_loss 0.08358877930790186 test_loss: 0.09548840522766114
epoch: 98 training_loss 0.09089818898588418 test_loss: 0.07802889943122863
epoch: 99 training_loss 0.09351321004331112 test_loss: 0.10167092084884644
epoch: 100 training_loss 0.0880719895940274 test_loss: 0.0903558611869812
epoch: 101 training_loss 0.08852908250875771 test_loss: 0.08828571438789368
epoch: 102 training_loss 0.09143488792702556 test_loss: 0.0910334050655365
epoch: 103 training_loss 0.08697565611451864 test_loss: 0.07905729413032532
epoch: 104 training_loss 0.08779491875320673 test_loss: 0.09437318444252014
epoch: 105 training_loss 0.09035658089444042 test_loss: 0.0884540855884552
epoch: 106 training_loss 0.09158582830801607 test_loss: 0.09435535073280335
epoch: 107 training_loss 0.0839319172129035 test_loss: 0.08451930284500123
epoch: 108 training_loss 0.09378079447895288 test_loss: 0.07034754753112793
epoch: 109 training_loss 0.08835484825074673 test_loss: 0.08906393647193908
epoch: 110 training_loss 0.08837395561859011 test_loss: 0.09995937943458558
epoch: 111 training_loss 0.08815645549446344 test_loss: 0.08110324144363404
epoch: 112 training_loss 0.08380327744409442 test_loss: 0.08603776693344116
epoch: 113 training_loss 0.09074633227661252 test_loss: 0.09472966194152832
epoch: 114 training_loss 0.08784742772579193 test_loss: 0.09032060503959656
epoch: 115 training_loss 0.08717236649245023 test_loss: 0.08685298562049866
epoch: 116 training_loss 0.09228515507653355 test_loss: 0.08664410710334777
epoch: 117 training_loss 0.08849994810298085 test_loss: 0.0989428162574768
epoch: 118 training_loss 0.09124559752643108 test_loss: 0.07658776640892029
epoch: 119 training_loss 0.09076024640351534 test_loss: 0.09285383820533752
epoch: 120 training_loss 0.09180814353749156 test_loss: 0.08532496094703675
epoch: 121 training_loss 0.08289154963567853 test_loss: 0.10718710422515869
epoch: 122 training_loss 0.08966472851112485 test_loss: 0.10448378324508667
epoch: 123 training_loss 0.09638476001098752 test_loss: 0.08285269141197205
epoch: 124 training_loss 0.08471987657248974 test_loss: 0.10812690258026122
epoch: 125 training_loss 0.08677789647132159 test_loss: 0.08535023331642151
epoch: 126 training_loss 0.08075177373364567 test_loss: 0.08132127523422242
epoch: 127 training_loss 0.08204723006114363 test_loss: 0.11452146768569946
epoch: 128 training_loss 0.09569887032732367 test_loss: 0.08861003518104553
epoch: 129 training_loss 0.08490508854389191 test_loss: 0.11049505472183227
epoch: 130 training_loss 0.08754987847059965 test_loss: 0.07742411494255066
epoch: 131 training_loss 0.08175484595820308 test_loss: 0.09655433297157287
epoch: 132 training_loss 0.09354380324482918 test_loss: 0.09088478088378907
epoch: 133 training_loss 0.09423549801111221 test_loss: 0.08617007732391357
epoch: 134 training_loss 0.08512301046401262 test_loss: 0.08194365501403808
epoch: 135 training_loss 0.08735680781304836 test_loss: 0.10190724134445191
epoch: 136 training_loss 0.08604466868564487 test_loss: 0.09695901870727539
epoch: 137 training_loss 0.08400793122127652 test_loss: 0.09708290696144103
epoch: 138 training_loss 0.08254104129038752 test_loss: 0.08412818908691407
epoch: 139 training_loss 0.08352330539375544 test_loss: 0.09408233165740967
epoch: 140 training_loss 0.08793922796845437 test_loss: 0.08475208282470703
epoch: 141 training_loss 0.09003416231833399 test_loss: 0.09005001783370972
epoch: 142 training_loss 0.0816925343312323 test_loss: 0.08197546601295472
epoch: 143 training_loss 0.09719891538843513 test_loss: 0.09376397728919983
epoch: 144 training_loss 0.08141670387238265 test_loss: 0.11800569295883179
epoch: 145 training_loss 0.08788029557093978 test_loss: 0.0863196611404419
epoch: 146 training_loss 0.09083984959870577 test_loss: 0.09454341530799866
epoch: 147 training_loss 0.0807512036152184 test_loss: 0.095097416639328
epoch: 148 training_loss 0.08182475382462144 test_loss: 0.10068343877792359
epoch: 149 training_loss 0.08964573414996266 test_loss: 0.09923414587974548
epoch: 0 training_loss 39.80247673034668 test_loss: 21.27648468017578
epoch: 1 training_loss 17.03068416595459 test_loss: 14.384051513671874
epoch: 2 training_loss 12.456125173568726 test_loss: 11.006007385253906
epoch: 3 training_loss 10.092662687301635 test_loss: 9.549995422363281
epoch: 4 training_loss 8.930449857711793 test_loss: 8.653441619873046
epoch: 5 training_loss 8.06624445438385 test_loss: 7.622755432128907
epoch: 6 training_loss 7.625646624565125 test_loss: 7.252490997314453
epoch: 7 training_loss 7.169736332893372 test_loss: 7.010687255859375
epoch: 8 training_loss 6.690422463417053 test_loss: 6.550169372558594
epoch: 9 training_loss 6.399151420593261 test_loss: 6.164447021484375
epoch: 10 training_loss 6.093755006790161 test_loss: 6.068202972412109
epoch: 11 training_loss 5.9487656354904175 test_loss: 5.910533905029297
epoch: 12 training_loss 5.558696575164795 test_loss: 5.619822311401367
epoch: 13 training_loss 5.3964863824844365 test_loss: 5.352140808105469
epoch: 14 training_loss 5.171229953765869 test_loss: 4.948015594482422
epoch: 15 training_loss 5.063840777873993 test_loss: 5.050466156005859
epoch: 16 training_loss 4.907409510612488 test_loss: 4.558657455444336
epoch: 17 training_loss 4.7211777377128605 test_loss: 4.67989501953125
epoch: 18 training_loss 4.576701028347015 test_loss: 4.486748123168946
epoch: 19 training_loss 4.639491751194 test_loss: 4.260040283203125
epoch: 20 training_loss 4.412476129531861 test_loss: 4.594461822509766
epoch: 21 training_loss 4.372790718078614 test_loss: 4.290658569335937
epoch: 22 training_loss 4.209431917667389 test_loss: 4.222476959228516
epoch: 23 training_loss 4.150204074382782 test_loss: 4.0105335235595705
epoch: 24 training_loss 4.094348428249359 test_loss: 4.128007125854492
epoch: 25 training_loss 3.9937439155578613 test_loss: 3.998810958862305
epoch: 26 training_loss 4.025407493114471 test_loss: 4.220438385009766
epoch: 27 training_loss 3.889748673439026 test_loss: 3.6173152923583984
epoch: 28 training_loss 3.888240213394165 test_loss: 3.8169822692871094
epoch: 29 training_loss 3.915041997432709 test_loss: 3.9462459564208983
epoch: 30 training_loss 3.734691865444183 test_loss: 3.868824768066406
epoch: 31 training_loss 3.7786692667007444 test_loss: 3.736484146118164
epoch: 32 training_loss 3.6931245636940004 test_loss: 3.5386314392089844
epoch: 33 training_loss 3.5686488246917722 test_loss: 3.6898818969726563
epoch: 34 training_loss 3.570579071044922 test_loss: 3.4460254669189454
epoch: 35 training_loss 3.4968536901474 test_loss: 3.5785282135009764
epoch: 36 training_loss 3.50458544254303 test_loss: 3.5208133697509765
epoch: 37 training_loss 3.408449099063873 test_loss: 3.457627868652344
epoch: 38 training_loss 3.4824374437332155 test_loss: 3.372030258178711
epoch: 39 training_loss 3.405336332321167 test_loss: 3.5206424713134767
epoch: 40 training_loss 3.3782748794555664 test_loss: 3.2699050903320312
epoch: 41 training_loss 3.3443089532852173 test_loss: 3.3662391662597657
epoch: 42 training_loss 3.3488003611564636 test_loss: 3.506125640869141
epoch: 43 training_loss 3.2737955617904664 test_loss: 3.312760925292969
epoch: 44 training_loss 3.190433886051178 test_loss: 3.392988586425781
epoch: 45 training_loss 3.2376092219352723 test_loss: 3.25546875
epoch: 46 training_loss 3.297862389087677 test_loss: 3.222249221801758
epoch: 47 training_loss 3.1755376982688905 test_loss: 3.2025814056396484
epoch: 48 training_loss 3.1594840812683107 test_loss: 3.2688831329345702
epoch: 49 training_loss 3.0579054713249207 test_loss: 3.156528091430664
epoch: 50 training_loss 3.038016378879547 test_loss: 3.067880058288574
epoch: 51 training_loss 3.103847498893738 test_loss: 3.237640380859375
epoch: 52 training_loss 3.0733803510665894 test_loss: 3.0449243545532227
epoch: 53 training_loss 3.069079864025116 test_loss: 3.1368690490722657
epoch: 54 training_loss 3.0322084832191467 test_loss: 2.979588508605957
epoch: 55 training_loss 3.0940642309188844 test_loss: 2.942945671081543
epoch: 56 training_loss 2.9931584000587463 test_loss: 2.955795097351074
epoch: 57 training_loss 3.0251475858688353 test_loss: 2.9918922424316405
epoch: 58 training_loss 2.969372663497925 test_loss: 2.9996126174926756
epoch: 59 training_loss 2.969305925369263 test_loss: 2.948452949523926
epoch: 60 training_loss 2.9296904492378233 test_loss: 2.8614084243774416
epoch: 61 training_loss 2.845438849925995 test_loss: 2.9924957275390627
epoch: 62 training_loss 2.9164479184150696 test_loss: 2.917095184326172
epoch: 63 training_loss 2.8555818581581116 test_loss: 2.887151908874512
epoch: 64 training_loss 2.8840337204933166 test_loss: 2.7888252258300783
epoch: 65 training_loss 2.8726083278656005 test_loss: 2.868117904663086
epoch: 66 training_loss 2.894009509086609 test_loss: 2.908950424194336
epoch: 67 training_loss 2.8872355461120605 test_loss: 2.695318603515625
epoch: 68 training_loss 2.704520103931427 test_loss: 2.727705955505371
epoch: 69 training_loss 2.86480406999588 test_loss: 2.8038381576538085
epoch: 70 training_loss 2.746841969490051 test_loss: 2.8805932998657227
epoch: 71 training_loss 2.805419192314148 test_loss: 2.8799907684326174
epoch: 72 training_loss 2.7300253963470458 test_loss: 2.7483129501342773
epoch: 73 training_loss 2.696616413593292 test_loss: 3.040253829956055
epoch: 74 training_loss 2.735458724498749 test_loss: 2.717218208312988
epoch: 75 training_loss 2.6830094695091247 test_loss: 2.7639705657958986
epoch: 76 training_loss 2.7459195828437806 test_loss: 2.8000486373901365
epoch: 77 training_loss 2.7122619915008546 test_loss: 2.606399154663086
epoch: 78 training_loss 2.6253736925125124 test_loss: 2.6986923217773438
epoch: 79 training_loss 2.5992178988456724 test_loss: 2.6923553466796877
epoch: 80 training_loss 2.753576420545578 test_loss: 2.8659072875976563
epoch: 81 training_loss 2.6764452862739563 test_loss: 2.777584266662598
epoch: 82 training_loss 2.6271701836586 test_loss: 2.577381896972656
epoch: 83 training_loss 2.634998289346695 test_loss: 2.44381103515625
epoch: 84 training_loss 2.6554710602760316 test_loss: 2.759930229187012
epoch: 85 training_loss 2.6560997760295866 test_loss: 2.5578283309936523
epoch: 86 training_loss 2.5734377348423005 test_loss: 2.607086181640625
epoch: 87 training_loss 2.581286829710007 test_loss: 2.5451848983764647
epoch: 88 training_loss 2.5847077882289886 test_loss: 2.4987016677856446
epoch: 89 training_loss 2.593942332267761 test_loss: 2.6834318161010744
epoch: 90 training_loss 2.590363975763321 test_loss: 2.5721462249755858
epoch: 91 training_loss 2.556663222312927 test_loss: 2.5176435470581056
epoch: 92 training_loss 2.530260636806488 test_loss: 2.5551389694213866
epoch: 93 training_loss 2.5661815857887267 test_loss: 2.5413694381713867
epoch: 94 training_loss 2.5709245562553407 test_loss: 2.5231618881225586
epoch: 95 training_loss 2.557398874759674 test_loss: 2.5489330291748047
epoch: 96 training_loss 2.5408439087867736 test_loss: 2.6026172637939453
epoch: 97 training_loss 2.5144064021110535 test_loss: 2.6649328231811524
epoch: 98 training_loss 2.503614091873169 test_loss: 2.5629262924194336
epoch: 99 training_loss 2.4813510620594026 test_loss: 2.5983753204345703
epoch: 100 training_loss 2.4748211026191713 test_loss: 2.5662519454956056
epoch: 101 training_loss 2.4553419053554535 test_loss: 2.4267093658447267
epoch: 102 training_loss 2.484398446083069 test_loss: 2.4435192108154298
epoch: 103 training_loss 2.472446925640106 test_loss: 2.5995460510253907
epoch: 104 training_loss 2.475197262763977 test_loss: 2.4548450469970704
epoch: 105 training_loss 2.518913834095001 test_loss: 2.457902717590332
epoch: 106 training_loss 2.517810899019241 test_loss: 2.450157356262207
epoch: 107 training_loss 2.518203320503235 test_loss: 2.4929269790649413
epoch: 108 training_loss 2.4429905903339386 test_loss: 2.535420227050781
epoch: 109 training_loss 2.543317835330963 test_loss: 2.5510793685913087
epoch: 110 training_loss 2.386561598777771 test_loss: 2.4797794342041017
epoch: 111 training_loss 2.393775988817215 test_loss: 2.5335569381713867
epoch: 112 training_loss 2.4146649050712585 test_loss: 2.4902250289916994
epoch: 113 training_loss 2.325586929321289 test_loss: 2.4630674362182616
epoch: 114 training_loss 2.4087632846832276 test_loss: 2.519108200073242
epoch: 115 training_loss 2.4568859446048736 test_loss: 2.407326316833496
epoch: 116 training_loss 2.3903529715538023 test_loss: 2.487172317504883
epoch: 117 training_loss 2.4104316091537474 test_loss: 2.332558441162109
epoch: 118 training_loss 2.460137062072754 test_loss: 2.2879262924194337
epoch: 119 training_loss 2.350906324386597 test_loss: 2.3939882278442384
epoch: 120 training_loss 2.3781033754348755 test_loss: 2.507946014404297
epoch: 121 training_loss 2.399552503824234 test_loss: 2.4332300186157227
epoch: 122 training_loss 2.399026063680649 test_loss: 2.319449806213379
epoch: 123 training_loss 2.41508770108223 test_loss: 2.379741668701172
epoch: 124 training_loss 2.291170675754547 test_loss: 2.4454105377197264
epoch: 125 training_loss 2.399065432548523 test_loss: 2.3870849609375
epoch: 126 training_loss 2.3384751784801483 test_loss: 2.4394296646118163
epoch: 127 training_loss 2.3466243422031403 test_loss: 2.244899940490723
epoch: 128 training_loss 2.308815016746521 test_loss: 2.3096954345703127
epoch: 129 training_loss 2.3813157331943513 test_loss: 2.380611801147461
epoch: 130 training_loss 2.388754566907883 test_loss: 2.230533790588379
epoch: 131 training_loss 2.352928729057312 test_loss: 2.411715507507324
epoch: 132 training_loss 2.326317398548126 test_loss: 2.3071781158447267
epoch: 133 training_loss 2.3143301463127135 test_loss: 2.514389419555664
epoch: 134 training_loss 2.3613884973526003 test_loss: 2.2531837463378905
epoch: 135 training_loss 2.3076947283744813 test_loss: 2.418068695068359
epoch: 136 training_loss 2.3018330323696135 test_loss: 2.3942844390869142
epoch: 137 training_loss 2.271916811466217 test_loss: 2.506704330444336
epoch: 138 training_loss 2.3838428926467894 test_loss: 2.3452003479003904
epoch: 139 training_loss 2.333207930326462 test_loss: 2.4628334045410156
epoch: 140 training_loss 2.327800302505493 test_loss: 2.2282054901123045
epoch: 141 training_loss 2.3162471437454224 test_loss: 2.2771305084228515
epoch: 142 training_loss 2.2757525885105134 test_loss: 2.4011005401611327
epoch: 143 training_loss 2.287637948989868 test_loss: 2.2183780670166016
epoch: 144 training_loss 2.269150787591934 test_loss: 2.340720367431641
epoch: 145 training_loss 2.245462353229523 test_loss: 2.211936187744141
epoch: 146 training_loss 2.2584066247940062 test_loss: 2.273075294494629
epoch: 147 training_loss 2.321181296110153 test_loss: 2.2334142684936524
epoch: 148 training_loss 2.223047171831131 test_loss: 2.347031593322754
epoch: 149 training_loss 2.2489004147052767 test_loss: 2.193665885925293
2615.286838316904
episode: 0 training return: tensor(197.5497, device='cuda:0')
episode: 1 training return: tensor(205.4902, device='cuda:0')
episode: 2 training return: tensor(116.6375, device='cuda:0')
episode: 3 training return: tensor(242.1492, device='cuda:0')
epoch: 1 test_true_pfm: 1932.8568930257525 sim_pfm: 126.573976436009
episode: 4 training return: tensor(-239.0757, device='cuda:0')
episode: 5 training return: tensor(241.0429, device='cuda:0')
episode: 6 training return: tensor(256.7155, device='cuda:0')
episode: 7 training return: tensor(-260.2518, device='cuda:0')
epoch: 2 test_true_pfm: 2263.189977911181 sim_pfm: 22.665734952364193
episode: 8 training return: tensor(217.7915, device='cuda:0')
episode: 9 training return: tensor(-150.4301, device='cuda:0')
episode: 10 training return: tensor(267.2415, device='cuda:0')
episode: 11 training return: tensor(156.1600, device='cuda:0')
epoch: 3 test_true_pfm: 2078.461187429732 sim_pfm: 240.5424054771623
episode: 12 training return: tensor(241.7366, device='cuda:0')
episode: 13 training return: tensor(-150.6317, device='cuda:0')
episode: 14 training return: tensor(275.2831, device='cuda:0')
episode: 15 training return: tensor(226.0129, device='cuda:0')
epoch: 4 test_true_pfm: 2954.8114141536257 sim_pfm: 287.3828986515
episode: 16 training return: tensor(292.5885, device='cuda:0')
episode: 17 training return: tensor(214.3589, device='cuda:0')
episode: 18 training return: tensor(173.2041, device='cuda:0')
episode: 19 training return: tensor(212.2985, device='cuda:0')
epoch: 5 test_true_pfm: 2855.627411962783 sim_pfm: 85.92075324480538
episode: 20 training return: tensor(246.9109, device='cuda:0')
episode: 21 training return: tensor(286.5935, device='cuda:0')
episode: 22 training return: tensor(-385.2500, device='cuda:0')
episode: 23 training return: tensor(206.8051, device='cuda:0')
epoch: 6 test_true_pfm: 2016.561580699842 sim_pfm: -17.21058291043543
episode: 24 training return: tensor(-235.2446, device='cuda:0')
episode: 25 training return: tensor(186.8428, device='cuda:0')
episode: 26 training return: tensor(-47.6843, device='cuda:0')
episode: 27 training return: tensor(246.4725, device='cuda:0')
epoch: 7 test_true_pfm: 3140.449207648098 sim_pfm: 98.27732835168717
episode: 28 training return: tensor(198.8371, device='cuda:0')
episode: 29 training return: tensor(-327.6845, device='cuda:0')
episode: 30 training return: tensor(267.7564, device='cuda:0')
episode: 31 training return: tensor(287.6463, device='cuda:0')
epoch: 8 test_true_pfm: 2013.4531710249541 sim_pfm: 30.48081192432437
episode: 32 training return: tensor(50.6733, device='cuda:0')
episode: 33 training return: tensor(-1.2364, device='cuda:0')
episode: 34 training return: tensor(248.0999, device='cuda:0')
episode: 35 training return: tensor(-394.4880, device='cuda:0')
epoch: 9 test_true_pfm: 1969.8295986804333 sim_pfm: 36.72417190410973
episode: 36 training return: tensor(-405.4586, device='cuda:0')
episode: 37 training return: tensor(288.0373, device='cuda:0')
episode: 38 training return: tensor(-421.3465, device='cuda:0')
episode: 39 training return: tensor(62.9741, device='cuda:0')
epoch: 10 test_true_pfm: 2859.2556642033946 sim_pfm: 217.53237178033064
episode: 40 training return: tensor(36.0627, device='cuda:0')
episode: 41 training return: tensor(-211.6815, device='cuda:0')
episode: 42 training return: tensor(238.8165, device='cuda:0')
episode: 43 training return: tensor(-212.5035, device='cuda:0')
epoch: 11 test_true_pfm: 1903.4838761456879 sim_pfm: -129.38539173648073
episode: 44 training return: tensor(22.5639, device='cuda:0')
episode: 45 training return: tensor(116.6600, device='cuda:0')
episode: 46 training return: tensor(-212.3692, device='cuda:0')
episode: 47 training return: tensor(232.6789, device='cuda:0')
epoch: 12 test_true_pfm: 1796.4732347384768 sim_pfm: 19.59997541795019
episode: 48 training return: tensor(-118.4071, device='cuda:0')
episode: 49 training return: tensor(-71.7736, device='cuda:0')
episode: 50 training return: tensor(209.3425, device='cuda:0')
episode: 51 training return: tensor(60.0698, device='cuda:0')
epoch: 13 test_true_pfm: 3286.7534698796167 sim_pfm: 244.08785818745187
episode: 52 training return: tensor(262.8726, device='cuda:0')
episode: 53 training return: tensor(261.4749, device='cuda:0')
episode: 54 training return: tensor(263.5883, device='cuda:0')
episode: 55 training return: tensor(211.6060, device='cuda:0')
epoch: 14 test_true_pfm: 3024.640871428877 sim_pfm: 183.03007415976995
episode: 56 training return: tensor(-397.2593, device='cuda:0')
episode: 57 training return: tensor(276.1625, device='cuda:0')
episode: 58 training return: tensor(265.1536, device='cuda:0')
episode: 59 training return: tensor(169.6016, device='cuda:0')
epoch: 15 test_true_pfm: 3330.7596743343347 sim_pfm: -11.373952289296236
episode: 60 training return: tensor(220.7205, device='cuda:0')
episode: 61 training return: tensor(275.8692, device='cuda:0')
episode: 62 training return: tensor(-258.2571, device='cuda:0')
episode: 63 training return: tensor(225.8768, device='cuda:0')
epoch: 16 test_true_pfm: 3331.0263935780645 sim_pfm: 189.77380531622717
episode: 64 training return: tensor(235.6049, device='cuda:0')
episode: 65 training return: tensor(-164.0867, device='cuda:0')
episode: 66 training return: tensor(255.6422, device='cuda:0')
episode: 67 training return: tensor(283.5909, device='cuda:0')
epoch: 17 test_true_pfm: 3067.880952356654 sim_pfm: -0.9651798687021559
episode: 68 training return: tensor(-25.7194, device='cuda:0')
episode: 69 training return: tensor(242.9030, device='cuda:0')
episode: 70 training return: tensor(260.5379, device='cuda:0')
episode: 71 training return: tensor(292.3180, device='cuda:0')
epoch: 18 test_true_pfm: 3357.0683032597576 sim_pfm: 106.91261656268034
episode: 72 training return: tensor(233.5226, device='cuda:0')
episode: 73 training return: tensor(250.0576, device='cuda:0')
episode: 74 training return: tensor(264.8018, device='cuda:0')
episode: 75 training return: tensor(247.7854, device='cuda:0')
epoch: 19 test_true_pfm: 3376.919329506031 sim_pfm: 254.24371946352767
episode: 76 training return: tensor(241.8849, device='cuda:0')
episode: 77 training return: tensor(212.5766, device='cuda:0')
episode: 78 training return: tensor(271.8672, device='cuda:0')
episode: 79 training return: tensor(-264.6898, device='cuda:0')
epoch: 20 test_true_pfm: 3340.1430135575065 sim_pfm: 92.62959588650847
episode: 80 training return: tensor(226.6958, device='cuda:0')
episode: 81 training return: tensor(-198.6119, device='cuda:0')
episode: 82 training return: tensor(194.7830, device='cuda:0')
episode: 83 training return: tensor(89.0321, device='cuda:0')
epoch: 21 test_true_pfm: 3407.5256946918635 sim_pfm: 155.8151435998734
episode: 84 training return: tensor(238.3060, device='cuda:0')
episode: 85 training return: tensor(292.7104, device='cuda:0')
episode: 86 training return: tensor(292.6614, device='cuda:0')
episode: 87 training return: tensor(-365.4467, device='cuda:0')
epoch: 22 test_true_pfm: 2686.172571897931 sim_pfm: -151.39638975192793
episode: 88 training return: tensor(249.2852, device='cuda:0')
episode: 89 training return: tensor(252.8834, device='cuda:0')
episode: 90 training return: tensor(285.3987, device='cuda:0')
episode: 91 training return: tensor(210.1332, device='cuda:0')
epoch: 23 test_true_pfm: 3352.6957490055806 sim_pfm: 293.84633623082965
episode: 92 training return: tensor(-87.1941, device='cuda:0')
episode: 93 training return: tensor(229.4284, device='cuda:0')
episode: 94 training return: tensor(263.2260, device='cuda:0')
episode: 95 training return: tensor(-359.8841, device='cuda:0')
epoch: 24 test_true_pfm: 2499.560328585499 sim_pfm: 65.04075549881479
episode: 96 training return: tensor(261.5351, device='cuda:0')
episode: 97 training return: tensor(202.0259, device='cuda:0')
episode: 98 training return: tensor(258.4548, device='cuda:0')
episode: 99 training return: tensor(307.5017, device='cuda:0')
epoch: 25 test_true_pfm: 3391.0825050714634 sim_pfm: 246.3337906319357
episode: 100 training return: tensor(145.6379, device='cuda:0')
episode: 101 training return: tensor(284.8514, device='cuda:0')
episode: 102 training return: tensor(94.1003, device='cuda:0')
episode: 103 training return: tensor(145.0730, device='cuda:0')
epoch: 26 test_true_pfm: 2413.325514565667 sim_pfm: -46.641293352741435
episode: 104 training return: tensor(310.8475, device='cuda:0')
episode: 105 training return: tensor(67.2684, device='cuda:0')
episode: 106 training return: tensor(307.3288, device='cuda:0')
episode: 107 training return: tensor(220.1768, device='cuda:0')
epoch: 27 test_true_pfm: 3160.5796005488533 sim_pfm: 302.1527908330706
episode: 108 training return: tensor(234.2327, device='cuda:0')
episode: 109 training return: tensor(-129.9093, device='cuda:0')
episode: 110 training return: tensor(336.8613, device='cuda:0')
episode: 111 training return: tensor(188.3227, device='cuda:0')
epoch: 28 test_true_pfm: 3369.3869342100697 sim_pfm: 240.11346650870595
episode: 112 training return: tensor(284.5435, device='cuda:0')
episode: 113 training return: tensor(252.7475, device='cuda:0')
episode: 114 training return: tensor(284.9112, device='cuda:0')
episode: 115 training return: tensor(263.5925, device='cuda:0')
epoch: 29 test_true_pfm: 3250.2591705567706 sim_pfm: 76.35807094548363
episode: 116 training return: tensor(240.1899, device='cuda:0')
episode: 117 training return: tensor(248.1682, device='cuda:0')
episode: 118 training return: tensor(258.5471, device='cuda:0')
episode: 119 training return: tensor(262.5324, device='cuda:0')
epoch: 30 test_true_pfm: 3383.8898626945356 sim_pfm: 271.29798884453095
episode: 120 training return: tensor(284.3179, device='cuda:0')
episode: 121 training return: tensor(287.2478, device='cuda:0')
episode: 122 training return: tensor(186.9556, device='cuda:0')
episode: 123 training return: tensor(284.8531, device='cuda:0')
epoch: 31 test_true_pfm: 3276.998389473725 sim_pfm: 196.9588168404783
episode: 124 training return: tensor(339.3421, device='cuda:0')
episode: 125 training return: tensor(304.8590, device='cuda:0')
episode: 126 training return: tensor(308.8118, device='cuda:0')
episode: 127 training return: tensor(-135.4715, device='cuda:0')
epoch: 32 test_true_pfm: 3419.5199517143897 sim_pfm: 289.78272212364635
episode: 128 training return: tensor(294.4860, device='cuda:0')
episode: 129 training return: tensor(227.2597, device='cuda:0')
episode: 130 training return: tensor(226.1137, device='cuda:0')
episode: 131 training return: tensor(261.3390, device='cuda:0')
epoch: 33 test_true_pfm: 3373.416312315334 sim_pfm: 209.04081450172816
episode: 132 training return: tensor(264.7271, device='cuda:0')
episode: 133 training return: tensor(236.3859, device='cuda:0')
episode: 134 training return: tensor(242.9059, device='cuda:0')
episode: 135 training return: tensor(247.4903, device='cuda:0')
epoch: 34 test_true_pfm: 3386.1742952118307 sim_pfm: 171.88004355342127
episode: 136 training return: tensor(222.3452, device='cuda:0')
episode: 137 training return: tensor(273.6225, device='cuda:0')
episode: 138 training return: tensor(-212.2411, device='cuda:0')
episode: 139 training return: tensor(125.8165, device='cuda:0')
epoch: 35 test_true_pfm: 2908.0427513225654 sim_pfm: 294.3110321673448
episode: 140 training return: tensor(300.2078, device='cuda:0')
episode: 141 training return: tensor(314.8790, device='cuda:0')
episode: 142 training return: tensor(255.2894, device='cuda:0')
episode: 143 training return: tensor(324.2185, device='cuda:0')
epoch: 36 test_true_pfm: 3418.227472813966 sim_pfm: 290.4196079993174
episode: 144 training return: tensor(287.0546, device='cuda:0')
episode: 145 training return: tensor(277.9749, device='cuda:0')
episode: 146 training return: tensor(242.7739, device='cuda:0')
episode: 147 training return: tensor(261.9039, device='cuda:0')
epoch: 37 test_true_pfm: 2851.2302260936735 sim_pfm: 285.4952533467537
episode: 148 training return: tensor(328.2669, device='cuda:0')
episode: 149 training return: tensor(266.2573, device='cuda:0')
episode: 150 training return: tensor(250.3400, device='cuda:0')
episode: 151 training return: tensor(183.3490, device='cuda:0')
epoch: 38 test_true_pfm: 3378.050496093145 sim_pfm: 280.86339960593614
episode: 152 training return: tensor(326.4728, device='cuda:0')
episode: 153 training return: tensor(161.1423, device='cuda:0')
episode: 154 training return: tensor(238.3586, device='cuda:0')
episode: 155 training return: tensor(268.8373, device='cuda:0')
epoch: 39 test_true_pfm: 3406.061603701904 sim_pfm: 279.5579678812258
episode: 156 training return: tensor(281.3204, device='cuda:0')
episode: 157 training return: tensor(242.1312, device='cuda:0')
episode: 158 training return: tensor(243.0795, device='cuda:0')
episode: 159 training return: tensor(311.5390, device='cuda:0')
epoch: 40 test_true_pfm: 2989.461997963122 sim_pfm: 298.11963148032856
episode: 160 training return: tensor(296.1884, device='cuda:0')
episode: 161 training return: tensor(205.7850, device='cuda:0')
episode: 162 training return: tensor(270.5766, device='cuda:0')
episode: 163 training return: tensor(310.4182, device='cuda:0')
epoch: 41 test_true_pfm: 3453.948302228259 sim_pfm: 273.3856041013108
episode: 164 training return: tensor(279.3304, device='cuda:0')
episode: 165 training return: tensor(335.0426, device='cuda:0')
episode: 166 training return: tensor(265.4359, device='cuda:0')
episode: 167 training return: tensor(285.1206, device='cuda:0')
epoch: 42 test_true_pfm: 3425.4949964621787 sim_pfm: 263.13047537379316
episode: 168 training return: tensor(238.7085, device='cuda:0')
episode: 169 training return: tensor(259.3701, device='cuda:0')
episode: 170 training return: tensor(286.8166, device='cuda:0')
episode: 171 training return: tensor(317.0841, device='cuda:0')
epoch: 43 test_true_pfm: 3390.9651660976324 sim_pfm: 310.6503506382869
episode: 172 training return: tensor(142.8554, device='cuda:0')
episode: 173 training return: tensor(255.5748, device='cuda:0')
episode: 174 training return: tensor(322.7836, device='cuda:0')
episode: 175 training return: tensor(248.6817, device='cuda:0')
epoch: 44 test_true_pfm: 3417.217271531652 sim_pfm: 348.24098707101075
episode: 176 training return: tensor(-190.9097, device='cuda:0')
episode: 177 training return: tensor(344.9027, device='cuda:0')
episode: 178 training return: tensor(-188.2408, device='cuda:0')
episode: 179 training return: tensor(279.5968, device='cuda:0')
epoch: 45 test_true_pfm: 3419.1441949807345 sim_pfm: 206.20115838380298
episode: 180 training return: tensor(291.4543, device='cuda:0')
episode: 181 training return: tensor(272.7329, device='cuda:0')
episode: 182 training return: tensor(297.4423, device='cuda:0')
episode: 183 training return: tensor(366.8386, device='cuda:0')
epoch: 46 test_true_pfm: 3329.7727335662426 sim_pfm: 296.90697808004916
episode: 184 training return: tensor(254.4103, device='cuda:0')
episode: 185 training return: tensor(295.2875, device='cuda:0')
episode: 186 training return: tensor(331.4802, device='cuda:0')
episode: 187 training return: tensor(201.7573, device='cuda:0')
epoch: 47 test_true_pfm: 3398.012954899597 sim_pfm: 327.66713186633814
episode: 188 training return: tensor(345.3580, device='cuda:0')
episode: 189 training return: tensor(268.0200, device='cuda:0')
episode: 190 training return: tensor(193.2546, device='cuda:0')
episode: 191 training return: tensor(246.8447, device='cuda:0')
epoch: 48 test_true_pfm: 2901.8796785063155 sim_pfm: 358.08744201785885
episode: 192 training return: tensor(245.3095, device='cuda:0')
episode: 193 training return: tensor(250.7306, device='cuda:0')
episode: 194 training return: tensor(279.0018, device='cuda:0')
episode: 195 training return: tensor(326.5155, device='cuda:0')
epoch: 49 test_true_pfm: 3463.2045645635876 sim_pfm: 333.0115527540523
episode: 196 training return: tensor(338.7192, device='cuda:0')
episode: 197 training return: tensor(-173.0287, device='cuda:0')
episode: 198 training return: tensor(253.6680, device='cuda:0')
episode: 199 training return: tensor(110.6287, device='cuda:0')
epoch: 50 test_true_pfm: 3490.866412449743 sim_pfm: 286.89932289446006
episode: 200 training return: tensor(292.3740, device='cuda:0')
episode: 201 training return: tensor(259.8236, device='cuda:0')
episode: 202 training return: tensor(334.4255, device='cuda:0')
episode: 203 training return: tensor(253.0633, device='cuda:0')
epoch: 51 test_true_pfm: 3396.8220706296743 sim_pfm: 284.4006607471577
episode: 204 training return: tensor(-110.7641, device='cuda:0')
episode: 205 training return: tensor(311.1518, device='cuda:0')
episode: 206 training return: tensor(275.2307, device='cuda:0')
episode: 207 training return: tensor(250.0681, device='cuda:0')
epoch: 52 test_true_pfm: 3392.312566946967 sim_pfm: 308.0967681071682
episode: 208 training return: tensor(157.0379, device='cuda:0')
episode: 209 training return: tensor(289.3415, device='cuda:0')
episode: 210 training return: tensor(190.2610, device='cuda:0')
episode: 211 training return: tensor(284.7946, device='cuda:0')
epoch: 53 test_true_pfm: 3412.2352739816706 sim_pfm: 290.8562273236457
episode: 212 training return: tensor(-244.7384, device='cuda:0')
episode: 213 training return: tensor(261.8484, device='cuda:0')
episode: 214 training return: tensor(181.5596, device='cuda:0')
episode: 215 training return: tensor(354.4313, device='cuda:0')
epoch: 54 test_true_pfm: 3397.0642158130245 sim_pfm: 342.802919772473
episode: 216 training return: tensor(273.0344, device='cuda:0')
episode: 217 training return: tensor(260.4950, device='cuda:0')
episode: 218 training return: tensor(56.8946, device='cuda:0')
episode: 219 training return: tensor(312.0097, device='cuda:0')
epoch: 55 test_true_pfm: 3434.3161434562044 sim_pfm: 227.87637327621147
episode: 220 training return: tensor(368.8105, device='cuda:0')
episode: 221 training return: tensor(258.9308, device='cuda:0')
episode: 222 training return: tensor(240.7554, device='cuda:0')
episode: 223 training return: tensor(203.2978, device='cuda:0')
epoch: 56 test_true_pfm: 3028.203336608178 sim_pfm: 323.68819000883377
episode: 224 training return: tensor(280.9721, device='cuda:0')
episode: 225 training return: tensor(290.8618, device='cuda:0')
episode: 226 training return: tensor(341.6432, device='cuda:0')
episode: 227 training return: tensor(323.7216, device='cuda:0')
epoch: 57 test_true_pfm: 3407.3435770214205 sim_pfm: 313.16283344858675
episode: 228 training return: tensor(281.7627, device='cuda:0')
episode: 229 training return: tensor(323.9737, device='cuda:0')
episode: 230 training return: tensor(304.1841, device='cuda:0')
episode: 231 training return: tensor(213.9211, device='cuda:0')
epoch: 58 test_true_pfm: 3319.9957170075236 sim_pfm: 215.21175658594197
episode: 232 training return: tensor(241.6155, device='cuda:0')
episode: 233 training return: tensor(270.6162, device='cuda:0')
episode: 234 training return: tensor(-121.8590, device='cuda:0')
episode: 235 training return: tensor(-164.1185, device='cuda:0')
epoch: 59 test_true_pfm: 3443.3267038072204 sim_pfm: 234.7442513652495
episode: 236 training return: tensor(335.4760, device='cuda:0')
episode: 237 training return: tensor(284.6208, device='cuda:0')
episode: 238 training return: tensor(288.8145, device='cuda:0')
episode: 239 training return: tensor(300.0972, device='cuda:0')
epoch: 60 test_true_pfm: 3409.6836033914406 sim_pfm: 338.760490910024
episode: 240 training return: tensor(253.4916, device='cuda:0')
episode: 241 training return: tensor(293.8777, device='cuda:0')
episode: 242 training return: tensor(265.9276, device='cuda:0')
episode: 243 training return: tensor(281.5428, device='cuda:0')
epoch: 61 test_true_pfm: 3398.2357074861516 sim_pfm: 269.66233211765456
episode: 244 training return: tensor(246.1907, device='cuda:0')
episode: 245 training return: tensor(266.1052, device='cuda:0')
episode: 246 training return: tensor(279.2940, device='cuda:0')
episode: 247 training return: tensor(210.8280, device='cuda:0')
epoch: 62 test_true_pfm: 3416.8112098253964 sim_pfm: 330.41959037988755
episode: 248 training return: tensor(268.7558, device='cuda:0')
episode: 249 training return: tensor(247.6064, device='cuda:0')
episode: 250 training return: tensor(163.2599, device='cuda:0')
episode: 251 training return: tensor(342.8180, device='cuda:0')
epoch: 63 test_true_pfm: 3453.04939502251 sim_pfm: 314.0505247178953
episode: 252 training return: tensor(263.6927, device='cuda:0')
episode: 253 training return: tensor(287.7677, device='cuda:0')
episode: 254 training return: tensor(247.1126, device='cuda:0')
episode: 255 training return: tensor(351.8848, device='cuda:0')
epoch: 64 test_true_pfm: 3457.856804349162 sim_pfm: 293.78633945488644
episode: 256 training return: tensor(284.1333, device='cuda:0')
episode: 257 training return: tensor(249.0147, device='cuda:0')
episode: 258 training return: tensor(254.0861, device='cuda:0')
episode: 259 training return: tensor(304.5877, device='cuda:0')
epoch: 65 test_true_pfm: 3424.166740144337 sim_pfm: 351.484232780446
episode: 260 training return: tensor(238.5979, device='cuda:0')
episode: 261 training return: tensor(327.2453, device='cuda:0')
episode: 262 training return: tensor(161.3632, device='cuda:0')
episode: 263 training return: tensor(259.7122, device='cuda:0')
epoch: 66 test_true_pfm: 3329.3677208673107 sim_pfm: 185.93898202609853
episode: 264 training return: tensor(339.0800, device='cuda:0')
episode: 265 training return: tensor(268.2318, device='cuda:0')
episode: 266 training return: tensor(269.6369, device='cuda:0')
episode: 267 training return: tensor(262.6616, device='cuda:0')
epoch: 67 test_true_pfm: 3430.6270300456636 sim_pfm: 318.5168399249863
episode: 268 training return: tensor(241.7789, device='cuda:0')
episode: 269 training return: tensor(-125.7178, device='cuda:0')
episode: 270 training return: tensor(284.9493, device='cuda:0')
episode: 271 training return: tensor(114.3977, device='cuda:0')
epoch: 68 test_true_pfm: 3378.224489407576 sim_pfm: 309.5445520824093
episode: 272 training return: tensor(239.9057, device='cuda:0')
episode: 273 training return: tensor(-8.5314, device='cuda:0')
episode: 274 training return: tensor(303.0467, device='cuda:0')
episode: 275 training return: tensor(106.1047, device='cuda:0')
epoch: 69 test_true_pfm: 3409.036742720035 sim_pfm: 311.7102064434245
episode: 276 training return: tensor(338.4979, device='cuda:0')
episode: 277 training return: tensor(256.4005, device='cuda:0')
episode: 278 training return: tensor(332.4005, device='cuda:0')
episode: 279 training return: tensor(282.2388, device='cuda:0')
epoch: 70 test_true_pfm: 3464.3674737614224 sim_pfm: 315.3429647239973
episode: 280 training return: tensor(273.9607, device='cuda:0')
episode: 281 training return: tensor(317.8643, device='cuda:0')
episode: 282 training return: tensor(270.1468, device='cuda:0')
episode: 283 training return: tensor(289.3203, device='cuda:0')
epoch: 71 test_true_pfm: 3393.2038654657576 sim_pfm: 352.2407071853522
episode: 284 training return: tensor(268.4576, device='cuda:0')
episode: 285 training return: tensor(293.9720, device='cuda:0')
episode: 286 training return: tensor(386.9830, device='cuda:0')
episode: 287 training return: tensor(297.5863, device='cuda:0')
epoch: 72 test_true_pfm: 3432.268790061 sim_pfm: 276.9328823032556
episode: 288 training return: tensor(181.1542, device='cuda:0')
episode: 289 training return: tensor(-261.1687, device='cuda:0')
episode: 290 training return: tensor(371.0197, device='cuda:0')
episode: 291 training return: tensor(275.2592, device='cuda:0')
epoch: 73 test_true_pfm: 3397.8860757741554 sim_pfm: 270.6425605424253
episode: 292 training return: tensor(274.1699, device='cuda:0')
episode: 293 training return: tensor(203.4525, device='cuda:0')
episode: 294 training return: tensor(189.9964, device='cuda:0')
episode: 295 training return: tensor(290.2309, device='cuda:0')
epoch: 74 test_true_pfm: 3007.9787894046076 sim_pfm: 185.00094680127222
episode: 296 training return: tensor(278.7435, device='cuda:0')
episode: 297 training return: tensor(320.5949, device='cuda:0')
episode: 298 training return: tensor(279.1897, device='cuda:0')
episode: 299 training return: tensor(296.6135, device='cuda:0')
epoch: 75 test_true_pfm: 3412.8538950949874 sim_pfm: 299.42630258908804
episode: 300 training return: tensor(342.6072, device='cuda:0')
episode: 301 training return: tensor(285.1609, device='cuda:0')
episode: 302 training return: tensor(324.0424, device='cuda:0')
episode: 303 training return: tensor(281.8564, device='cuda:0')
epoch: 76 test_true_pfm: 3434.0361930575896 sim_pfm: 235.90152342327443
episode: 304 training return: tensor(359.9557, device='cuda:0')
episode: 305 training return: tensor(246.1530, device='cuda:0')
episode: 306 training return: tensor(295.9402, device='cuda:0')
episode: 307 training return: tensor(284.4931, device='cuda:0')
epoch: 77 test_true_pfm: 3490.8632420274703 sim_pfm: 271.82887040410424
episode: 308 training return: tensor(240.0542, device='cuda:0')
episode: 309 training return: tensor(-16.4901, device='cuda:0')
episode: 310 training return: tensor(209.1309, device='cuda:0')
episode: 311 training return: tensor(317.4074, device='cuda:0')
epoch: 78 test_true_pfm: 3411.6224579308387 sim_pfm: 246.13351983434404
episode: 312 training return: tensor(330.3781, device='cuda:0')
episode: 313 training return: tensor(290.3632, device='cuda:0')
episode: 314 training return: tensor(-160.2679, device='cuda:0')
episode: 315 training return: tensor(258.8955, device='cuda:0')
epoch: 79 test_true_pfm: 3376.0323814071126 sim_pfm: 265.2902009793518
episode: 316 training return: tensor(331.5046, device='cuda:0')
episode: 317 training return: tensor(51.2880, device='cuda:0')
episode: 318 training return: tensor(134.0370, device='cuda:0')
episode: 319 training return: tensor(283.8885, device='cuda:0')
epoch: 80 test_true_pfm: 3395.043873907515 sim_pfm: 229.75418396137925
episode: 320 training return: tensor(-47.8776, device='cuda:0')
episode: 321 training return: tensor(-47.1371, device='cuda:0')
episode: 322 training return: tensor(278.3423, device='cuda:0')
episode: 323 training return: tensor(300.8353, device='cuda:0')
epoch: 81 test_true_pfm: 3341.985887635265 sim_pfm: 129.79812634800328
episode: 324 training return: tensor(295.8428, device='cuda:0')
episode: 325 training return: tensor(329.7945, device='cuda:0')
episode: 326 training return: tensor(144.4459, device='cuda:0')
episode: 327 training return: tensor(283.7141, device='cuda:0')
epoch: 82 test_true_pfm: 3456.9740449977376 sim_pfm: 132.32100434423774
episode: 328 training return: tensor(-57.3936, device='cuda:0')
episode: 329 training return: tensor(255.3529, device='cuda:0')
episode: 330 training return: tensor(313.7127, device='cuda:0')
episode: 331 training return: tensor(338.4030, device='cuda:0')
epoch: 83 test_true_pfm: 3490.971476944938 sim_pfm: 317.15575569936
episode: 332 training return: tensor(262.5234, device='cuda:0')
episode: 333 training return: tensor(274.2131, device='cuda:0')
episode: 334 training return: tensor(293.6753, device='cuda:0')
episode: 335 training return: tensor(136.8017, device='cuda:0')
epoch: 84 test_true_pfm: 3486.0874038884917 sim_pfm: 240.3463608721116
episode: 336 training return: tensor(334.5938, device='cuda:0')
episode: 337 training return: tensor(341.4084, device='cuda:0')
episode: 338 training return: tensor(245.4492, device='cuda:0')
episode: 339 training return: tensor(297.4880, device='cuda:0')
epoch: 85 test_true_pfm: 3442.9062353549893 sim_pfm: 305.62182610237505
episode: 340 training return: tensor(321.6027, device='cuda:0')
episode: 341 training return: tensor(263.0969, device='cuda:0')
episode: 342 training return: tensor(254.6400, device='cuda:0')
episode: 343 training return: tensor(261.3075, device='cuda:0')
epoch: 86 test_true_pfm: 3384.308950029968 sim_pfm: 282.9172403193273
episode: 344 training return: tensor(302.9209, device='cuda:0')
episode: 345 training return: tensor(365.6012, device='cuda:0')
episode: 346 training return: tensor(257.4016, device='cuda:0')
episode: 347 training return: tensor(287.9092, device='cuda:0')
epoch: 87 test_true_pfm: 3395.4684626550575 sim_pfm: 274.7428839432153
episode: 348 training return: tensor(261.9992, device='cuda:0')
episode: 349 training return: tensor(-191.0331, device='cuda:0')
episode: 350 training return: tensor(249.6309, device='cuda:0')
episode: 351 training return: tensor(253.7995, device='cuda:0')
epoch: 88 test_true_pfm: 3067.421423417101 sim_pfm: 340.9451070155483
episode: 352 training return: tensor(247.3930, device='cuda:0')
episode: 353 training return: tensor(323.5728, device='cuda:0')
episode: 354 training return: tensor(292.8324, device='cuda:0')
episode: 355 training return: tensor(311.4333, device='cuda:0')
epoch: 89 test_true_pfm: 3480.0645011845095 sim_pfm: 312.43329328092904
episode: 356 training return: tensor(280.8210, device='cuda:0')
episode: 357 training return: tensor(295.3505, device='cuda:0')
episode: 358 training return: tensor(305.0890, device='cuda:0')
episode: 359 training return: tensor(269.9202, device='cuda:0')
epoch: 90 test_true_pfm: 3026.0621246221162 sim_pfm: 319.11696827712393
episode: 360 training return: tensor(257.9969, device='cuda:0')
episode: 361 training return: tensor(287.9329, device='cuda:0')
episode: 362 training return: tensor(328.1473, device='cuda:0')
episode: 363 training return: tensor(267.6889, device='cuda:0')
epoch: 91 test_true_pfm: 3447.5352465921183 sim_pfm: 295.0729085082615
episode: 364 training return: tensor(300.5643, device='cuda:0')
episode: 365 training return: tensor(-170.8023, device='cuda:0')
episode: 366 training return: tensor(293.7431, device='cuda:0')
episode: 367 training return: tensor(312.0048, device='cuda:0')
epoch: 92 test_true_pfm: 2840.942289076756 sim_pfm: 267.4102940082278
episode: 368 training return: tensor(124.0058, device='cuda:0')
episode: 369 training return: tensor(286.1020, device='cuda:0')
episode: 370 training return: tensor(214.1597, device='cuda:0')
episode: 371 training return: tensor(143.6663, device='cuda:0')
epoch: 93 test_true_pfm: 3119.8048468019783 sim_pfm: 297.1019282714794
episode: 372 training return: tensor(211.0828, device='cuda:0')
episode: 373 training return: tensor(328.6872, device='cuda:0')
episode: 374 training return: tensor(212.3974, device='cuda:0')
episode: 375 training return: tensor(297.9158, device='cuda:0')
epoch: 94 test_true_pfm: 3470.1229828720393 sim_pfm: 350.01737236533273
episode: 376 training return: tensor(105.5650, device='cuda:0')
episode: 377 training return: tensor(288.6924, device='cuda:0')
episode: 378 training return: tensor(268.7850, device='cuda:0')
episode: 379 training return: tensor(-216.8667, device='cuda:0')
epoch: 95 test_true_pfm: 3263.7935408805747 sim_pfm: 303.89370797401835
episode: 380 training return: tensor(165.7008, device='cuda:0')
episode: 381 training return: tensor(-141.9919, device='cuda:0')
episode: 382 training return: tensor(215.4640, device='cuda:0')
episode: 383 training return: tensor(363.2303, device='cuda:0')
epoch: 96 test_true_pfm: 2832.0043460795036 sim_pfm: 225.36637309287713
episode: 384 training return: tensor(199.2616, device='cuda:0')
episode: 385 training return: tensor(281.5559, device='cuda:0')
episode: 386 training return: tensor(278.1237, device='cuda:0')
episode: 387 training return: tensor(243.9516, device='cuda:0')
epoch: 97 test_true_pfm: 3433.6661103900624 sim_pfm: 307.07210768783506
episode: 388 training return: tensor(280.2907, device='cuda:0')
episode: 389 training return: tensor(223.1246, device='cuda:0')
episode: 390 training return: tensor(328.3702, device='cuda:0')
episode: 391 training return: tensor(352.8347, device='cuda:0')
epoch: 98 test_true_pfm: 3473.379952919051 sim_pfm: 264.211393931803
episode: 392 training return: tensor(261.9366, device='cuda:0')
episode: 393 training return: tensor(279.5754, device='cuda:0')
episode: 394 training return: tensor(247.2731, device='cuda:0')
episode: 395 training return: tensor(-163.3858, device='cuda:0')
epoch: 99 test_true_pfm: 3391.6102448882225 sim_pfm: 341.67765327334445
episode: 396 training return: tensor(-147.8497, device='cuda:0')
episode: 397 training return: tensor(296.8383, device='cuda:0')
episode: 398 training return: tensor(303.0197, device='cuda:0')
episode: 399 training return: tensor(239.5121, device='cuda:0')
epoch: 100 test_true_pfm: 3485.1242632104477 sim_pfm: 286.34002265655243
episode: 400 training return: tensor(280.9383, device='cuda:0')
episode: 401 training return: tensor(218.3148, device='cuda:0')
episode: 402 training return: tensor(217.9774, device='cuda:0')
episode: 403 training return: tensor(264.0214, device='cuda:0')
epoch: 101 test_true_pfm: 3459.1276391241313 sim_pfm: 307.98331998632057
episode: 404 training return: tensor(273.8243, device='cuda:0')
episode: 405 training return: tensor(267.0248, device='cuda:0')
episode: 406 training return: tensor(258.7661, device='cuda:0')
episode: 407 training return: tensor(327.6416, device='cuda:0')
epoch: 102 test_true_pfm: 3317.2063462786914 sim_pfm: 301.6103743739465
episode: 408 training return: tensor(309.0553, device='cuda:0')
episode: 409 training return: tensor(274.8731, device='cuda:0')
episode: 410 training return: tensor(18.5336, device='cuda:0')
episode: 411 training return: tensor(291.3181, device='cuda:0')
epoch: 103 test_true_pfm: 3451.6943544572628 sim_pfm: 275.85388436574914
episode: 412 training return: tensor(293.9285, device='cuda:0')
episode: 413 training return: tensor(232.5378, device='cuda:0')
episode: 414 training return: tensor(260.1426, device='cuda:0')
episode: 415 training return: tensor(251.1315, device='cuda:0')
epoch: 104 test_true_pfm: 3424.872619510581 sim_pfm: 319.5331394962365
episode: 416 training return: tensor(227.3107, device='cuda:0')
episode: 417 training return: tensor(228.6668, device='cuda:0')
episode: 418 training return: tensor(197.2699, device='cuda:0')
episode: 419 training return: tensor(262.8666, device='cuda:0')
epoch: 105 test_true_pfm: 3471.7444597452245 sim_pfm: 339.98259531656123
episode: 420 training return: tensor(246.0808, device='cuda:0')
episode: 421 training return: tensor(181.8401, device='cuda:0')
episode: 422 training return: tensor(281.3490, device='cuda:0')
episode: 423 training return: tensor(271.5939, device='cuda:0')
epoch: 106 test_true_pfm: 3425.1066702742883 sim_pfm: 278.6454980863491
episode: 424 training return: tensor(-88.7522, device='cuda:0')
episode: 425 training return: tensor(216.8682, device='cuda:0')
episode: 426 training return: tensor(202.4384, device='cuda:0')
episode: 427 training return: tensor(149.4243, device='cuda:0')
epoch: 107 test_true_pfm: 3452.0933630090717 sim_pfm: 303.97285166014143
episode: 428 training return: tensor(301.2099, device='cuda:0')
episode: 429 training return: tensor(204.0944, device='cuda:0')
episode: 430 training return: tensor(250.4602, device='cuda:0')
episode: 431 training return: tensor(231.5245, device='cuda:0')
epoch: 108 test_true_pfm: 3467.3124088922887 sim_pfm: 271.99547802748083
episode: 432 training return: tensor(231.2980, device='cuda:0')
episode: 433 training return: tensor(358.6766, device='cuda:0')
episode: 434 training return: tensor(12.7065, device='cuda:0')
episode: 435 training return: tensor(281.3064, device='cuda:0')
epoch: 109 test_true_pfm: 3461.2245495930056 sim_pfm: 312.38796106308774
episode: 436 training return: tensor(286.3268, device='cuda:0')
episode: 437 training return: tensor(160.8999, device='cuda:0')
episode: 438 training return: tensor(285.6375, device='cuda:0')
episode: 439 training return: tensor(287.8971, device='cuda:0')
epoch: 110 test_true_pfm: 3462.0698276768126 sim_pfm: 121.53014762335806
episode: 440 training return: tensor(-224.9452, device='cuda:0')
episode: 441 training return: tensor(251.2472, device='cuda:0')
episode: 442 training return: tensor(204.3851, device='cuda:0')
episode: 443 training return: tensor(197.9308, device='cuda:0')
epoch: 111 test_true_pfm: 2625.479260373226 sim_pfm: 38.36102059307935
episode: 444 training return: tensor(297.4217, device='cuda:0')
episode: 445 training return: tensor(205.9363, device='cuda:0')
episode: 446 training return: tensor(310.5456, device='cuda:0')
episode: 447 training return: tensor(260.9476, device='cuda:0')
epoch: 112 test_true_pfm: 3397.421488021957 sim_pfm: 313.00845046527684
episode: 448 training return: tensor(203.2834, device='cuda:0')
episode: 449 training return: tensor(172.1469, device='cuda:0')
episode: 450 training return: tensor(-16.9871, device='cuda:0')
episode: 451 training return: tensor(285.3971, device='cuda:0')
epoch: 113 test_true_pfm: 3434.5989419745915 sim_pfm: 337.02502765665605
episode: 452 training return: tensor(319.3534, device='cuda:0')
episode: 453 training return: tensor(296.9409, device='cuda:0')
episode: 454 training return: tensor(274.1892, device='cuda:0')
episode: 455 training return: tensor(230.2536, device='cuda:0')
epoch: 114 test_true_pfm: 3430.7518310907285 sim_pfm: 293.2176771200029
episode: 456 training return: tensor(162.9769, device='cuda:0')
episode: 457 training return: tensor(297.5951, device='cuda:0')
episode: 458 training return: tensor(270.8545, device='cuda:0')
episode: 459 training return: tensor(234.2205, device='cuda:0')
epoch: 115 test_true_pfm: 3397.0483769971056 sim_pfm: 316.43548025604105
episode: 460 training return: tensor(337.1116, device='cuda:0')
episode: 461 training return: tensor(265.4702, device='cuda:0')
episode: 462 training return: tensor(306.3327, device='cuda:0')
episode: 463 training return: tensor(278.7568, device='cuda:0')
epoch: 116 test_true_pfm: 3410.0349139928353 sim_pfm: 336.27264794067014
episode: 464 training return: tensor(314.6607, device='cuda:0')
episode: 465 training return: tensor(-276.7738, device='cuda:0')
episode: 466 training return: tensor(184.6234, device='cuda:0')
episode: 467 training return: tensor(217.2764, device='cuda:0')
epoch: 117 test_true_pfm: 3386.0593343817177 sim_pfm: 289.4327222871846
episode: 468 training return: tensor(265.6960, device='cuda:0')
episode: 469 training return: tensor(266.8631, device='cuda:0')
episode: 470 training return: tensor(52.2602, device='cuda:0')
episode: 471 training return: tensor(310.9166, device='cuda:0')
epoch: 118 test_true_pfm: 3484.609152585659 sim_pfm: 298.24979986595764
episode: 472 training return: tensor(295.8860, device='cuda:0')
episode: 473 training return: tensor(287.3911, device='cuda:0')
episode: 474 training return: tensor(279.0048, device='cuda:0')
episode: 475 training return: tensor(-118.6659, device='cuda:0')
epoch: 119 test_true_pfm: 2539.2198496907627 sim_pfm: 325.79282464511925
episode: 476 training return: tensor(299.6537, device='cuda:0')
episode: 477 training return: tensor(263.9083, device='cuda:0')
episode: 478 training return: tensor(285.0534, device='cuda:0')
episode: 479 training return: tensor(186.7406, device='cuda:0')
epoch: 120 test_true_pfm: 3402.980498314431 sim_pfm: 313.3363237791248
episode: 480 training return: tensor(346.3183, device='cuda:0')
episode: 481 training return: tensor(294.6574, device='cuda:0')
episode: 482 training return: tensor(270.5947, device='cuda:0')
episode: 483 training return: tensor(288.2855, device='cuda:0')
epoch: 121 test_true_pfm: 3515.1066226754106 sim_pfm: 326.40355203752796
episode: 484 training return: tensor(315.8762, device='cuda:0')
episode: 485 training return: tensor(235.4904, device='cuda:0')
episode: 486 training return: tensor(295.5776, device='cuda:0')
episode: 487 training return: tensor(232.8327, device='cuda:0')
epoch: 122 test_true_pfm: 3411.748720283797 sim_pfm: 338.159168662872
episode: 488 training return: tensor(288.7670, device='cuda:0')
episode: 489 training return: tensor(283.7661, device='cuda:0')
episode: 490 training return: tensor(234.6423, device='cuda:0')
episode: 491 training return: tensor(289.3583, device='cuda:0')
epoch: 123 test_true_pfm: 3420.3485990014537 sim_pfm: 277.69551068439614
episode: 492 training return: tensor(145.1871, device='cuda:0')
episode: 493 training return: tensor(396.5551, device='cuda:0')
episode: 494 training return: tensor(286.9320, device='cuda:0')
episode: 495 training return: tensor(258.1664, device='cuda:0')
epoch: 124 test_true_pfm: 3472.5022863128584 sim_pfm: 308.0548720724376
episode: 496 training return: tensor(294.1840, device='cuda:0')
episode: 497 training return: tensor(271.1246, device='cuda:0')
episode: 498 training return: tensor(320.5867, device='cuda:0')
episode: 499 training return: tensor(237.6935, device='cuda:0')
epoch: 125 test_true_pfm: 3030.3416780597786 sim_pfm: 315.1801981946531
episode: 500 training return: tensor(376.1345, device='cuda:0')
episode: 501 training return: tensor(302.7584, device='cuda:0')
episode: 502 training return: tensor(246.1268, device='cuda:0')
episode: 503 training return: tensor(233.4040, device='cuda:0')
epoch: 126 test_true_pfm: 3407.325390316757 sim_pfm: 296.9945641630329
episode: 504 training return: tensor(255.8476, device='cuda:0')
episode: 505 training return: tensor(283.8094, device='cuda:0')
episode: 506 training return: tensor(253.0271, device='cuda:0')
episode: 507 training return: tensor(301.9854, device='cuda:0')
epoch: 127 test_true_pfm: 3362.539090671464 sim_pfm: 349.30351307203335
episode: 508 training return: tensor(262.1425, device='cuda:0')
episode: 509 training return: tensor(269.9156, device='cuda:0')
episode: 510 training return: tensor(352.5070, device='cuda:0')
episode: 511 training return: tensor(261.7680, device='cuda:0')
epoch: 128 test_true_pfm: 3414.352121220538 sim_pfm: 257.0505688493431
episode: 512 training return: tensor(259.6772, device='cuda:0')
episode: 513 training return: tensor(216.7662, device='cuda:0')
episode: 514 training return: tensor(262.1755, device='cuda:0')
episode: 515 training return: tensor(271.2722, device='cuda:0')
epoch: 129 test_true_pfm: 2792.4299841707784 sim_pfm: 352.15878145066864
episode: 516 training return: tensor(275.2222, device='cuda:0')
episode: 517 training return: tensor(317.0091, device='cuda:0')
episode: 518 training return: tensor(345.4766, device='cuda:0')
episode: 519 training return: tensor(279.9906, device='cuda:0')
epoch: 130 test_true_pfm: 3040.0974132836777 sim_pfm: 325.5743105632525
episode: 520 training return: tensor(314.3640, device='cuda:0')
episode: 521 training return: tensor(116.8944, device='cuda:0')
episode: 522 training return: tensor(-180.5835, device='cuda:0')
episode: 523 training return: tensor(321.9462, device='cuda:0')
epoch: 131 test_true_pfm: 2677.006887165355 sim_pfm: 304.36573445608764
episode: 524 training return: tensor(310.0873, device='cuda:0')
episode: 525 training return: tensor(305.7740, device='cuda:0')
episode: 526 training return: tensor(251.1030, device='cuda:0')
episode: 527 training return: tensor(287.4647, device='cuda:0')
epoch: 132 test_true_pfm: 3432.7101353874623 sim_pfm: 316.80432899143005
episode: 528 training return: tensor(333.6672, device='cuda:0')
episode: 529 training return: tensor(355.0485, device='cuda:0')
episode: 530 training return: tensor(210.0031, device='cuda:0')
episode: 531 training return: tensor(323.7383, device='cuda:0')
epoch: 133 test_true_pfm: 3480.1184601091786 sim_pfm: 176.08675760341188
episode: 532 training return: tensor(267.0574, device='cuda:0')
episode: 533 training return: tensor(286.0600, device='cuda:0')
episode: 534 training return: tensor(105.6283, device='cuda:0')
episode: 535 training return: tensor(297.9451, device='cuda:0')
epoch: 134 test_true_pfm: 3384.296907655587 sim_pfm: 307.86214319888194
episode: 536 training return: tensor(296.9914, device='cuda:0')
episode: 537 training return: tensor(280.1950, device='cuda:0')
episode: 538 training return: tensor(298.9924, device='cuda:0')
episode: 539 training return: tensor(265.4212, device='cuda:0')
epoch: 135 test_true_pfm: 3138.7481645836597 sim_pfm: 305.001667762214
episode: 540 training return: tensor(235.1387, device='cuda:0')
episode: 541 training return: tensor(320.6355, device='cuda:0')
episode: 542 training return: tensor(262.6451, device='cuda:0')
episode: 543 training return: tensor(309.6824, device='cuda:0')
epoch: 136 test_true_pfm: 3465.4770762304283 sim_pfm: 316.8245580523314
episode: 544 training return: tensor(-375.8762, device='cuda:0')
episode: 545 training return: tensor(268.2615, device='cuda:0')
episode: 546 training return: tensor(299.0303, device='cuda:0')
episode: 547 training return: tensor(263.4346, device='cuda:0')
epoch: 137 test_true_pfm: 3478.553873412841 sim_pfm: 284.67940037336666
episode: 548 training return: tensor(274.8943, device='cuda:0')
episode: 549 training return: tensor(-222.8198, device='cuda:0')
episode: 550 training return: tensor(287.3968, device='cuda:0')
episode: 551 training return: tensor(303.8456, device='cuda:0')
epoch: 138 test_true_pfm: 3445.252526723941 sim_pfm: 214.68096154136583
episode: 552 training return: tensor(271.3722, device='cuda:0')
episode: 553 training return: tensor(340.5323, device='cuda:0')
episode: 554 training return: tensor(174.1169, device='cuda:0')
episode: 555 training return: tensor(307.6576, device='cuda:0')
epoch: 139 test_true_pfm: 3478.7802926642344 sim_pfm: 303.09040162782185
episode: 556 training return: tensor(258.4436, device='cuda:0')
episode: 557 training return: tensor(241.0629, device='cuda:0')
episode: 558 training return: tensor(-141.6552, device='cuda:0')
episode: 559 training return: tensor(152.9597, device='cuda:0')
epoch: 140 test_true_pfm: 3432.5211910854964 sim_pfm: 268.4121531728112
episode: 560 training return: tensor(-192.4578, device='cuda:0')
episode: 561 training return: tensor(289.8306, device='cuda:0')
episode: 562 training return: tensor(292.7143, device='cuda:0')
episode: 563 training return: tensor(252.8816, device='cuda:0')
epoch: 141 test_true_pfm: 3406.785924317162 sim_pfm: 297.6490789358407
episode: 564 training return: tensor(257.9493, device='cuda:0')
episode: 565 training return: tensor(325.1005, device='cuda:0')
episode: 566 training return: tensor(270.2269, device='cuda:0')
episode: 567 training return: tensor(-180.7498, device='cuda:0')
epoch: 142 test_true_pfm: 3427.5205484839157 sim_pfm: 322.26451664239477
episode: 568 training return: tensor(271.5563, device='cuda:0')
episode: 569 training return: tensor(292.9146, device='cuda:0')
episode: 570 training return: tensor(287.6450, device='cuda:0')
episode: 571 training return: tensor(251.1237, device='cuda:0')
epoch: 143 test_true_pfm: 3427.20906210853 sim_pfm: 307.92416946261073
episode: 572 training return: tensor(265.5745, device='cuda:0')
episode: 573 training return: tensor(295.8936, device='cuda:0')
episode: 574 training return: tensor(-4.5473, device='cuda:0')
episode: 575 training return: tensor(138.5774, device='cuda:0')
epoch: 144 test_true_pfm: 3467.296837839775 sim_pfm: 300.4997764028667
episode: 576 training return: tensor(273.9071, device='cuda:0')
episode: 577 training return: tensor(275.7029, device='cuda:0')
episode: 578 training return: tensor(274.8405, device='cuda:0')
episode: 579 training return: tensor(308.0645, device='cuda:0')
epoch: 145 test_true_pfm: 3444.377989586436 sim_pfm: 217.28540554422457
episode: 580 training return: tensor(332.8900, device='cuda:0')
episode: 581 training return: tensor(244.7868, device='cuda:0')
episode: 582 training return: tensor(188.1638, device='cuda:0')
episode: 583 training return: tensor(246.6871, device='cuda:0')
epoch: 146 test_true_pfm: 3272.286742072874 sim_pfm: 267.39733141303685
episode: 584 training return: tensor(339.1025, device='cuda:0')
episode: 585 training return: tensor(251.0536, device='cuda:0')
episode: 586 training return: tensor(325.2472, device='cuda:0')
episode: 587 training return: tensor(254.0822, device='cuda:0')
epoch: 147 test_true_pfm: 3427.1925820414076 sim_pfm: 333.2459298658262
episode: 588 training return: tensor(-166.4717, device='cuda:0')
episode: 589 training return: tensor(323.2608, device='cuda:0')
episode: 590 training return: tensor(277.0182, device='cuda:0')
episode: 591 training return: tensor(283.1049, device='cuda:0')
epoch: 148 test_true_pfm: 3478.7234277174775 sim_pfm: 314.8026254894212
episode: 592 training return: tensor(219.8439, device='cuda:0')
episode: 593 training return: tensor(315.9493, device='cuda:0')
episode: 594 training return: tensor(298.7156, device='cuda:0')
episode: 595 training return: tensor(279.7698, device='cuda:0')
epoch: 149 test_true_pfm: 3437.608423446725 sim_pfm: 239.68630440053917
episode: 596 training return: tensor(173.7029, device='cuda:0')
episode: 597 training return: tensor(284.2373, device='cuda:0')
episode: 598 training return: tensor(277.5658, device='cuda:0')
episode: 599 training return: tensor(323.5435, device='cuda:0')
epoch: 150 test_true_pfm: 3376.794442873714 sim_pfm: 269.3809625149782
