['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'behavior', '--traj', 'mixed', '--seed', '2', '--data', '100000']
epoch: 0 training_loss 0.27112774930894373 test_loss: 0.24101769924163818
epoch: 1 training_loss 0.1432266206294298 test_loss: 0.15290071964263915
epoch: 2 training_loss 0.11724961806088686 test_loss: 0.145027494430542
epoch: 3 training_loss 0.11668632131069899 test_loss: 0.10888065099716186
epoch: 4 training_loss 0.11336514772847295 test_loss: 0.13822569847106933
epoch: 5 training_loss 0.10941311065107584 test_loss: 0.1108751893043518
epoch: 6 training_loss 0.09766298351809383 test_loss: 0.11035592555999756
epoch: 7 training_loss 0.09140556178987026 test_loss: 0.09297934174537659
epoch: 8 training_loss 0.09544170135632157 test_loss: 0.0882264256477356
epoch: 9 training_loss 0.1029885571449995 test_loss: 0.09645084142684937
epoch: 10 training_loss 0.09434661388397217 test_loss: 0.0983031928539276
epoch: 11 training_loss 0.09573502546176314 test_loss: 0.11806538105010986
epoch: 12 training_loss 0.0946377107873559 test_loss: 0.10108059644699097
epoch: 13 training_loss 0.09705690108239651 test_loss: 0.08270128965377807
epoch: 14 training_loss 0.09279697749763727 test_loss: 0.10320903062820434
epoch: 15 training_loss 0.08840769112110138 test_loss: 0.07993915677070618
epoch: 16 training_loss 0.08743669368326663 test_loss: 0.11427706480026245
epoch: 17 training_loss 0.09278494350612164 test_loss: 0.09258977770805359
epoch: 18 training_loss 0.09328138554468751 test_loss: 0.08909084796905517
epoch: 19 training_loss 0.08544695755466819 test_loss: 0.12322324514389038
epoch: 20 training_loss 0.09035154290497303 test_loss: 0.08426707983016968
epoch: 21 training_loss 0.08734698524698616 test_loss: 0.09354549050331115
epoch: 22 training_loss 0.08792654989287257 test_loss: 0.08039488792419433
epoch: 23 training_loss 0.08358963755890728 test_loss: 0.10087660551071168
epoch: 24 training_loss 0.08852885859087109 test_loss: 0.07969691157341004
epoch: 25 training_loss 0.08696608362719417 test_loss: 0.1012802243232727
epoch: 26 training_loss 0.08322902044281363 test_loss: 0.07962859272956849
epoch: 27 training_loss 0.0861656854301691 test_loss: 0.09159177541732788
epoch: 28 training_loss 0.08071682011708617 test_loss: 0.08968952298164368
epoch: 29 training_loss 0.08931986453011632 test_loss: 0.08347829580307006
epoch: 30 training_loss 0.08557693686336279 test_loss: 0.08992246985435486
epoch: 31 training_loss 0.08225656216964125 test_loss: 0.08995685577392579
epoch: 32 training_loss 0.08816374219954014 test_loss: 0.08376960158348083
epoch: 33 training_loss 0.08677314372733236 test_loss: 0.07521924376487732
epoch: 34 training_loss 0.08697000374086201 test_loss: 0.08068262338638306
epoch: 35 training_loss 0.09056340385228395 test_loss: 0.07531212568283081
epoch: 36 training_loss 0.08528768809512258 test_loss: 0.10802962779998779
epoch: 37 training_loss 0.0804891848191619 test_loss: 0.09067822694778442
epoch: 38 training_loss 0.0823608360812068 test_loss: 0.085599285364151
epoch: 39 training_loss 0.08496844170615077 test_loss: 0.08338247537612915
epoch: 40 training_loss 0.07904160968959331 test_loss: 0.09981783628463745
epoch: 41 training_loss 0.08179544139653444 test_loss: 0.08806056380271912
epoch: 42 training_loss 0.08569945376366377 test_loss: 0.09143370985984803
epoch: 43 training_loss 0.07889930760487913 test_loss: 0.09017542600631714
epoch: 44 training_loss 0.08483901299536228 test_loss: 0.07670581340789795
epoch: 45 training_loss 0.07585460279136896 test_loss: 0.09532561898231506
epoch: 46 training_loss 0.0825288035813719 test_loss: 0.09068359732627869
epoch: 47 training_loss 0.08783324595540762 test_loss: 0.07926695346832276
epoch: 48 training_loss 0.07753138521686197 test_loss: 0.07825352549552918
epoch: 49 training_loss 0.07947684951126575 test_loss: 0.09581626653671264
epoch: 50 training_loss 0.0807674345932901 test_loss: 0.07334178686141968
epoch: 51 training_loss 0.08008472817018628 test_loss: 0.08238188624382019
epoch: 52 training_loss 0.08286231264472008 test_loss: 0.09504421949386596
epoch: 53 training_loss 0.07638379302807152 test_loss: 0.0906724214553833
epoch: 54 training_loss 0.07654252897948027 test_loss: 0.08620591163635254
epoch: 55 training_loss 0.07955525679513813 test_loss: 0.0814957082271576
epoch: 56 training_loss 0.0796238745097071 test_loss: 0.07111549973487855
epoch: 57 training_loss 0.08430370423942804 test_loss: 0.08435567617416381
epoch: 58 training_loss 0.07719048019498587 test_loss: 0.08056867718696595
epoch: 59 training_loss 0.07855499415658414 test_loss: 0.08075427412986755
epoch: 60 training_loss 0.08625952299684286 test_loss: 0.07410025596618652
epoch: 61 training_loss 0.08217984536662698 test_loss: 0.08872560858726501
epoch: 62 training_loss 0.079121331628412 test_loss: 0.08462693095207215
epoch: 63 training_loss 0.07727006116881967 test_loss: 0.08575631380081176
epoch: 64 training_loss 0.07825644569471478 test_loss: 0.08269364833831787
epoch: 65 training_loss 0.08157939860597253 test_loss: 0.08591447472572326
epoch: 66 training_loss 0.07596583690494299 test_loss: 0.08043029308319091
epoch: 67 training_loss 0.07554486783221365 test_loss: 0.08162233233451843
epoch: 68 training_loss 0.080210036970675 test_loss: 0.08137180805206298
epoch: 69 training_loss 0.07764999687671661 test_loss: 0.08380932211875916
epoch: 70 training_loss 0.08153146300464868 test_loss: 0.08626156449317932
epoch: 71 training_loss 0.07851503080688417 test_loss: 0.06999889016151428
epoch: 72 training_loss 0.07721341353841125 test_loss: 0.08064583539962769
epoch: 73 training_loss 0.07416975242085755 test_loss: 0.09041393399238587
epoch: 74 training_loss 0.08066207459196448 test_loss: 0.08236168026924133
epoch: 75 training_loss 0.0778117261081934 test_loss: 0.08108198642730713
epoch: 76 training_loss 0.0797008611354977 test_loss: 0.0860572874546051
epoch: 77 training_loss 0.07531553864479065 test_loss: 0.09503479599952698
epoch: 78 training_loss 0.0812825514934957 test_loss: 0.0735619306564331
epoch: 79 training_loss 0.08246122589334845 test_loss: 0.09677255749702454
epoch: 80 training_loss 0.07817622223868965 test_loss: 0.08489564657211304
epoch: 81 training_loss 0.07969453565776348 test_loss: 0.0926576316356659
epoch: 82 training_loss 0.07555045239627362 test_loss: 0.08189210295677185
epoch: 83 training_loss 0.07743230096064507 test_loss: 0.08503424525260925
epoch: 84 training_loss 0.08126045927405358 test_loss: 0.08829872012138366
epoch: 85 training_loss 0.08118310454301536 test_loss: 0.08817667365074158
epoch: 86 training_loss 0.07853836772963405 test_loss: 0.08364965319633484
epoch: 87 training_loss 0.07442989140748978 test_loss: 0.08664100170135498
epoch: 88 training_loss 0.07659212863072754 test_loss: 0.08880792856216431
epoch: 89 training_loss 0.07532553857192398 test_loss: 0.0852622628211975
epoch: 90 training_loss 0.0772250195313245 test_loss: 0.0784900963306427
epoch: 91 training_loss 0.07947935681790114 test_loss: 0.08071060180664062
epoch: 92 training_loss 0.08058176031336188 test_loss: 0.0771405577659607
epoch: 93 training_loss 0.0772026992496103 test_loss: 0.08693329691886902
epoch: 94 training_loss 0.0804787285812199 test_loss: 0.06984258890151977
epoch: 95 training_loss 0.07665399972349406 test_loss: 0.07364156246185302
epoch: 96 training_loss 0.07972217222675682 test_loss: 0.07477031350135803
epoch: 97 training_loss 0.07900023121386766 test_loss: 0.07430419325828552
epoch: 98 training_loss 0.0810964871942997 test_loss: 0.08465122580528259
epoch: 99 training_loss 0.07714488855563104 test_loss: 0.07431910037994385
epoch: 100 training_loss 0.07642773215658963 test_loss: 0.09089481234550476
epoch: 101 training_loss 0.07241955527104438 test_loss: 0.09362055659294129
epoch: 102 training_loss 0.084279606025666 test_loss: 0.1018073558807373
epoch: 103 training_loss 0.07636608503758907 test_loss: 0.07728613018989564
epoch: 104 training_loss 0.07556294117122889 test_loss: 0.08634647130966186
epoch: 105 training_loss 0.07307466134428978 test_loss: 0.09355836510658264
epoch: 106 training_loss 0.0726161077618599 test_loss: 0.07398133873939514
epoch: 107 training_loss 0.0749341775663197 test_loss: 0.09080508351325989
epoch: 108 training_loss 0.07683023625053466 test_loss: 0.07704852223396301
epoch: 109 training_loss 0.07798479525372386 test_loss: 0.0689010500907898
epoch: 110 training_loss 0.07818527871742845 test_loss: 0.061967271566390994
epoch: 111 training_loss 0.0722237831261009 test_loss: 0.0690934956073761
epoch: 112 training_loss 0.06915742104873061 test_loss: 0.07846711874008179
epoch: 113 training_loss 0.08080932436510921 test_loss: 0.08524941802024841
epoch: 114 training_loss 0.08072644663974643 test_loss: 0.09400512576103211
epoch: 115 training_loss 0.08140001267194748 test_loss: 0.07623668313026429
epoch: 116 training_loss 0.07760491965338588 test_loss: 0.08128697872161865
epoch: 117 training_loss 0.0808122918009758 test_loss: 0.07693955302238464
epoch: 118 training_loss 0.07060855401679873 test_loss: 0.08303104043006897
epoch: 119 training_loss 0.0754517168365419 test_loss: 0.07279777526855469
epoch: 120 training_loss 0.08472253991290928 test_loss: 0.07373614311218261
epoch: 121 training_loss 0.0712868397962302 test_loss: 0.08885966539382935
epoch: 122 training_loss 0.07532895194366575 test_loss: 0.06948592066764832
epoch: 123 training_loss 0.08067833724431693 test_loss: 0.0708726167678833
epoch: 124 training_loss 0.07726734129711986 test_loss: 0.08334094882011414
epoch: 125 training_loss 0.0732338254712522 test_loss: 0.08427770137786865
epoch: 126 training_loss 0.08391979018226266 test_loss: 0.09223405122756959
epoch: 127 training_loss 0.07242348521947861 test_loss: 0.09212483763694763
epoch: 128 training_loss 0.07269130397588014 test_loss: 0.09126660823822022
epoch: 129 training_loss 0.0780555922538042 test_loss: 0.0858980655670166
epoch: 130 training_loss 0.07962548257783056 test_loss: 0.09555625319480895
epoch: 131 training_loss 0.07779822370037437 test_loss: 0.07046805024147033
epoch: 132 training_loss 0.07146196458488703 test_loss: 0.08991592526435851
epoch: 133 training_loss 0.07678272418677806 test_loss: 0.07362907528877258
epoch: 134 training_loss 0.07761037181131542 test_loss: 0.07627018094062805
epoch: 135 training_loss 0.0732279153726995 test_loss: 0.08676685690879822
epoch: 136 training_loss 0.07842684768140316 test_loss: 0.08223029375076293
epoch: 137 training_loss 0.07122088994365186 test_loss: 0.08386932611465454
epoch: 138 training_loss 0.07431993981823325 test_loss: 0.07435550689697265
epoch: 139 training_loss 0.08058531373739243 test_loss: 0.09552172422409058
epoch: 140 training_loss 0.07569081279449165 test_loss: 0.08295826315879821
epoch: 141 training_loss 0.08096302689984441 test_loss: 0.07823932766914368
epoch: 142 training_loss 0.07291260177269578 test_loss: 0.09238439798355103
epoch: 143 training_loss 0.07498877907171846 test_loss: 0.08845370411872863
epoch: 144 training_loss 0.07215236363932491 test_loss: 0.06817669272422791
epoch: 145 training_loss 0.07638318285346031 test_loss: 0.09241070747375488
epoch: 146 training_loss 0.07990045910701156 test_loss: 0.08738157153129578
epoch: 147 training_loss 0.07821861987933516 test_loss: 0.06883346438407897
epoch: 148 training_loss 0.08047087451443076 test_loss: 0.07293412685394288
epoch: 149 training_loss 0.07585143031552434 test_loss: 0.07691798210144044
epoch: 0 training_loss 36.739563961029056 test_loss: 20.496194458007814
epoch: 1 training_loss 16.340992603302002 test_loss: 13.09655303955078
epoch: 2 training_loss 12.315836486816407 test_loss: 10.9510498046875
epoch: 3 training_loss 10.209191899299622 test_loss: 9.623337554931641
epoch: 4 training_loss 9.034161825180053 test_loss: 8.386787414550781
epoch: 5 training_loss 8.285962071418762 test_loss: 7.691129302978515
epoch: 6 training_loss 7.559858045578003 test_loss: 7.3945068359375
epoch: 7 training_loss 7.052324547767639 test_loss: 6.677996063232422
epoch: 8 training_loss 6.747642936706543 test_loss: 6.819629669189453
epoch: 9 training_loss 6.334430108070373 test_loss: 6.3766735076904295
epoch: 10 training_loss 5.999928202629089 test_loss: 5.97490348815918
epoch: 11 training_loss 5.712550482749939 test_loss: 5.5491687774658205
epoch: 12 training_loss 5.667730412483215 test_loss: 5.560300064086914
epoch: 13 training_loss 5.304924747943878 test_loss: 5.248556137084961
epoch: 14 training_loss 5.206686787605285 test_loss: 4.881143569946289
epoch: 15 training_loss 5.129236826896667 test_loss: 5.193718719482422
epoch: 16 training_loss 4.813421742916107 test_loss: 4.999176406860352
epoch: 17 training_loss 4.8343025088310245 test_loss: 4.982372283935547
epoch: 18 training_loss 4.6855142259597775 test_loss: 4.800663757324219
epoch: 19 training_loss 4.5327333641052245 test_loss: 4.614718627929688
epoch: 20 training_loss 4.517776870727539 test_loss: 4.4629570007324215
epoch: 21 training_loss 4.375433328151703 test_loss: 4.181898498535157
epoch: 22 training_loss 4.254783747196197 test_loss: 4.0784751892089846
epoch: 23 training_loss 4.196220436096191 test_loss: 4.139841461181641
epoch: 24 training_loss 4.063716540336609 test_loss: 4.248351287841797
epoch: 25 training_loss 4.129473526477813 test_loss: 3.9149959564208983
epoch: 26 training_loss 4.063954026699066 test_loss: 4.079482650756836
epoch: 27 training_loss 3.929285409450531 test_loss: 4.02416763305664
epoch: 28 training_loss 3.980316894054413 test_loss: 3.962916946411133
epoch: 29 training_loss 3.8449575018882753 test_loss: 3.765462875366211
epoch: 30 training_loss 3.840290138721466 test_loss: 3.9111438751220704
epoch: 31 training_loss 3.790007872581482 test_loss: 3.8541927337646484
epoch: 32 training_loss 3.765192391872406 test_loss: 3.7027824401855467
epoch: 33 training_loss 3.656377055644989 test_loss: 3.646402359008789
epoch: 34 training_loss 3.801467971801758 test_loss: 3.6136920928955076
epoch: 35 training_loss 3.5737301301956177 test_loss: 3.6849849700927733
epoch: 36 training_loss 3.5442010879516603 test_loss: 3.4411941528320313
epoch: 37 training_loss 3.60748726606369 test_loss: 3.453190231323242
epoch: 38 training_loss 3.469506640434265 test_loss: 3.604596710205078
epoch: 39 training_loss 3.53533447265625 test_loss: 3.482067108154297
epoch: 40 training_loss 3.4856380462646483 test_loss: 3.5700942993164064
epoch: 41 training_loss 3.4711092281341553 test_loss: 3.419974136352539
epoch: 42 training_loss 3.4027245616912842 test_loss: 3.2211021423339843
epoch: 43 training_loss 3.2531732320785522 test_loss: 3.258950424194336
epoch: 44 training_loss 3.2773685574531557 test_loss: 3.3480709075927733
epoch: 45 training_loss 3.32761714220047 test_loss: 3.310141372680664
epoch: 46 training_loss 3.3128641772270204 test_loss: 3.1671525955200197
epoch: 47 training_loss 3.3846444606781008 test_loss: 3.1677480697631837
epoch: 48 training_loss 3.2494049048423768 test_loss: 3.3485271453857424
epoch: 49 training_loss 3.286074893474579 test_loss: 3.3095142364501955
epoch: 50 training_loss 3.2153051495552063 test_loss: 3.2636787414550783
epoch: 51 training_loss 3.1592376255989074 test_loss: 3.126055908203125
epoch: 52 training_loss 3.081698548793793 test_loss: 3.031671905517578
epoch: 53 training_loss 3.1886301374435426 test_loss: 3.2446517944335938
epoch: 54 training_loss 3.036651418209076 test_loss: 3.130422019958496
epoch: 55 training_loss 3.1367609810829165 test_loss: 3.1847558975219727
epoch: 56 training_loss 3.0767325496673585 test_loss: 3.108908843994141
epoch: 57 training_loss 3.0352626061439514 test_loss: 3.048287200927734
epoch: 58 training_loss 3.1445275020599364 test_loss: 2.91848087310791
epoch: 59 training_loss 3.0307131099700926 test_loss: 2.899897575378418
epoch: 60 training_loss 3.071653916835785 test_loss: 3.0524126052856446
epoch: 61 training_loss 2.983636598587036 test_loss: 2.8859634399414062
epoch: 62 training_loss 2.9562490129470826 test_loss: 3.031464767456055
epoch: 63 training_loss 2.963095738887787 test_loss: 2.9833093643188477
epoch: 64 training_loss 2.9542526268959044 test_loss: 2.940765380859375
epoch: 65 training_loss 3.0103283524513245 test_loss: 2.9956594467163087
epoch: 66 training_loss 2.992172908782959 test_loss: 2.9917118072509767
epoch: 67 training_loss 2.880934178829193 test_loss: 2.8409908294677733
epoch: 68 training_loss 2.8677166080474854 test_loss: 2.7949682235717774
epoch: 69 training_loss 2.7752501153945923 test_loss: 2.8565656661987306
epoch: 70 training_loss 2.861325706243515 test_loss: 3.100933074951172
epoch: 71 training_loss 2.8274719309806824 test_loss: 2.836400032043457
epoch: 72 training_loss 2.863281590938568 test_loss: 2.8306707382202148
epoch: 73 training_loss 2.8511747455596925 test_loss: 2.7728248596191407
epoch: 74 training_loss 2.8094220685958864 test_loss: 2.9604806900024414
epoch: 75 training_loss 2.695129292011261 test_loss: 2.8643352508544924
epoch: 76 training_loss 2.8284555006027223 test_loss: 2.6770442962646483
epoch: 77 training_loss 2.814703130722046 test_loss: 2.982371520996094
epoch: 78 training_loss 2.8158639335632323 test_loss: 2.824693489074707
epoch: 79 training_loss 2.7487581968307495 test_loss: 2.7412364959716795
epoch: 80 training_loss 2.830142102241516 test_loss: 2.657158088684082
epoch: 81 training_loss 2.831818380355835 test_loss: 2.7739912033081056
epoch: 82 training_loss 2.7865630340576173 test_loss: 2.8210800170898436
epoch: 83 training_loss 2.736832842826843 test_loss: 2.8497711181640626
epoch: 84 training_loss 2.7395748400688174 test_loss: 2.601476860046387
epoch: 85 training_loss 2.7034199476242065 test_loss: 2.8137447357177736
epoch: 86 training_loss 2.628103768825531 test_loss: 2.704495429992676
epoch: 87 training_loss 2.7307294535636903 test_loss: 2.844341278076172
epoch: 88 training_loss 2.7049531054496767 test_loss: 2.659925842285156
epoch: 89 training_loss 2.6668856036663056 test_loss: 2.632479667663574
epoch: 90 training_loss 2.684141491651535 test_loss: 2.651708984375
epoch: 91 training_loss 2.68144700884819 test_loss: 2.805164909362793
epoch: 92 training_loss 2.6755133748054503 test_loss: 2.7934734344482424
epoch: 93 training_loss 2.6565844237804415 test_loss: 2.5991098403930666
epoch: 94 training_loss 2.673717427253723 test_loss: 2.676359939575195
epoch: 95 training_loss 2.666764341592789 test_loss: 2.5749753952026366
epoch: 96 training_loss 2.640835688114166 test_loss: 2.5651079177856446
epoch: 97 training_loss 2.6144197261333466 test_loss: 2.7515268325805664
epoch: 98 training_loss 2.531380842924118 test_loss: 2.5682722091674806
epoch: 99 training_loss 2.6175724172592165 test_loss: 2.547163200378418
epoch: 100 training_loss 2.642370603084564 test_loss: 2.619817924499512
epoch: 101 training_loss 2.618470892906189 test_loss: 2.619301605224609
epoch: 102 training_loss 2.6161794781684877 test_loss: 2.536910057067871
epoch: 103 training_loss 2.5706753659248354 test_loss: 2.71032772064209
epoch: 104 training_loss 2.540373054742813 test_loss: 2.518269729614258
epoch: 105 training_loss 2.588240908384323 test_loss: 2.656614875793457
epoch: 106 training_loss 2.6096691942214965 test_loss: 2.5632659912109377
epoch: 107 training_loss 2.5018400621414183 test_loss: 2.399156379699707
epoch: 108 training_loss 2.521472688913345 test_loss: 2.435369300842285
epoch: 109 training_loss 2.543295533657074 test_loss: 2.596550178527832
epoch: 110 training_loss 2.5014796817302702 test_loss: 2.4373884201049805
epoch: 111 training_loss 2.5588737046718597 test_loss: 2.511083984375
epoch: 112 training_loss 2.5508810091018677 test_loss: 2.5692800521850585
epoch: 113 training_loss 2.5156380844116213 test_loss: 2.508006286621094
epoch: 114 training_loss 2.4814727365970612 test_loss: 2.7017873764038085
epoch: 115 training_loss 2.508431944847107 test_loss: 2.341134262084961
epoch: 116 training_loss 2.485271552801132 test_loss: 2.3899688720703125
epoch: 117 training_loss 2.517171660661697 test_loss: 2.319442939758301
epoch: 118 training_loss 2.4319882905483245 test_loss: 2.366526794433594
epoch: 119 training_loss 2.457320394515991 test_loss: 2.460746002197266
epoch: 120 training_loss 2.500403573513031 test_loss: 2.526455879211426
epoch: 121 training_loss 2.4650826478004455 test_loss: 2.6577566146850584
epoch: 122 training_loss 2.4655415844917297 test_loss: 2.651806640625
epoch: 123 training_loss 2.470255967378616 test_loss: 2.513483428955078
epoch: 124 training_loss 2.458256120681763 test_loss: 2.3859079360961912
epoch: 125 training_loss 2.4172070276737214 test_loss: 2.374003601074219
epoch: 126 training_loss 2.431854249238968 test_loss: 2.400810432434082
epoch: 127 training_loss 2.4763183426856994 test_loss: 2.408140182495117
epoch: 128 training_loss 2.4078693759441374 test_loss: 2.462346076965332
epoch: 129 training_loss 2.4040255868434905 test_loss: 2.380367469787598
epoch: 130 training_loss 2.4143816208839417 test_loss: 2.4332775115966796
epoch: 131 training_loss 2.365068978071213 test_loss: 2.534174156188965
epoch: 132 training_loss 2.396986645460129 test_loss: 2.3579330444335938
epoch: 133 training_loss 2.4212636053562164 test_loss: 2.4130630493164062
epoch: 134 training_loss 2.389337363243103 test_loss: 2.464454460144043
epoch: 135 training_loss 2.368293422460556 test_loss: 2.3130908966064454
epoch: 136 training_loss 2.4251552486419676 test_loss: 2.3456865310668946
epoch: 137 training_loss 2.3225694847106935 test_loss: 2.328490447998047
epoch: 138 training_loss 2.3844240427017214 test_loss: 2.388831901550293
epoch: 139 training_loss 2.454260503053665 test_loss: 2.3511938095092773
epoch: 140 training_loss 2.389186818599701 test_loss: 2.3983762741088865
epoch: 141 training_loss 2.3513014733791353 test_loss: 2.2647069931030273
epoch: 142 training_loss 2.3181918144226072 test_loss: 2.4147449493408204
epoch: 143 training_loss 2.4380591106414795 test_loss: 2.4201501846313476
epoch: 144 training_loss 2.3616695404052734 test_loss: 2.402140998840332
epoch: 145 training_loss 2.371112954616547 test_loss: 2.429019546508789
epoch: 146 training_loss 2.309745055437088 test_loss: 2.361937141418457
epoch: 147 training_loss 2.2870417034626005 test_loss: 2.3576669692993164
epoch: 148 training_loss 2.3334310948848724 test_loss: 2.351384162902832
epoch: 149 training_loss 2.3423524463176726 test_loss: 2.3141828536987306
3505.3797591884604
episode: 0 training return: tensor(224.9219, device='cuda:0')
episode: 1 training return: tensor(29.3771, device='cuda:0')
episode: 2 training return: tensor(101.1083, device='cuda:0')
episode: 3 training return: tensor(-819.9824, device='cuda:0')
epoch: 1 test_true_pfm: 3703.071782361811 sim_pfm: 330.0271284388048
episode: 4 training return: tensor(-432.8214, device='cuda:0')
episode: 5 training return: tensor(86.4025, device='cuda:0')
episode: 6 training return: tensor(297.3853, device='cuda:0')
episode: 7 training return: tensor(-11.5553, device='cuda:0')
epoch: 2 test_true_pfm: 3702.5183798602734 sim_pfm: 260.880542295917
episode: 8 training return: tensor(186.8032, device='cuda:0')
episode: 9 training return: tensor(67.3765, device='cuda:0')
episode: 10 training return: tensor(470.2709, device='cuda:0')
episode: 11 training return: tensor(287.4631, device='cuda:0')
epoch: 3 test_true_pfm: 3709.133430352856 sim_pfm: 225.1479495273185
episode: 12 training return: tensor(421.7039, device='cuda:0')
episode: 13 training return: tensor(426.3659, device='cuda:0')
episode: 14 training return: tensor(308.9225, device='cuda:0')
episode: 15 training return: tensor(446.6103, device='cuda:0')
epoch: 4 test_true_pfm: 3941.2986177035436 sim_pfm: 307.91712445387384
episode: 16 training return: tensor(130.2932, device='cuda:0')
episode: 17 training return: tensor(346.4284, device='cuda:0')
episode: 18 training return: tensor(405.7617, device='cuda:0')
episode: 19 training return: tensor(321.7461, device='cuda:0')
epoch: 5 test_true_pfm: 3791.6489480369114 sim_pfm: 223.90292877485626
episode: 20 training return: tensor(447.9274, device='cuda:0')
episode: 21 training return: tensor(196.2015, device='cuda:0')
episode: 22 training return: tensor(348.6708, device='cuda:0')
episode: 23 training return: tensor(380.6723, device='cuda:0')
epoch: 6 test_true_pfm: 3806.0582188898843 sim_pfm: 340.59592869877815
episode: 24 training return: tensor(362.4319, device='cuda:0')
episode: 25 training return: tensor(169.5153, device='cuda:0')
episode: 26 training return: tensor(-811.4799, device='cuda:0')
episode: 27 training return: tensor(320.4102, device='cuda:0')
epoch: 7 test_true_pfm: 3935.4436619668327 sim_pfm: 296.8993671212617
episode: 28 training return: tensor(138.7169, device='cuda:0')
episode: 29 training return: tensor(214.9269, device='cuda:0')
episode: 30 training return: tensor(247.6821, device='cuda:0')
episode: 31 training return: tensor(246.9575, device='cuda:0')
epoch: 8 test_true_pfm: 3848.984629969665 sim_pfm: 298.14849829894956
episode: 32 training return: tensor(382.0188, device='cuda:0')
episode: 33 training return: tensor(-802.1839, device='cuda:0')
episode: 34 training return: tensor(352.3944, device='cuda:0')
episode: 35 training return: tensor(181.2055, device='cuda:0')
epoch: 9 test_true_pfm: 3846.100206141677 sim_pfm: 307.3634406612643
episode: 36 training return: tensor(250.6993, device='cuda:0')
episode: 37 training return: tensor(210.8682, device='cuda:0')
episode: 38 training return: tensor(348.1669, device='cuda:0')
episode: 39 training return: tensor(364.7230, device='cuda:0')
epoch: 10 test_true_pfm: 2727.281983117206 sim_pfm: -95.14177852125916
episode: 40 training return: tensor(164.3504, device='cuda:0')
episode: 41 training return: tensor(349.1423, device='cuda:0')
episode: 42 training return: tensor(382.7417, device='cuda:0')
episode: 43 training return: tensor(310.9167, device='cuda:0')
epoch: 11 test_true_pfm: 3757.1050213407048 sim_pfm: 324.4247317268358
episode: 44 training return: tensor(338.0874, device='cuda:0')
episode: 45 training return: tensor(-824.5756, device='cuda:0')
episode: 46 training return: tensor(165.6037, device='cuda:0')
episode: 47 training return: tensor(383.8674, device='cuda:0')
epoch: 12 test_true_pfm: 3914.2176811937934 sim_pfm: 380.3144345499459
episode: 48 training return: tensor(280.0738, device='cuda:0')
episode: 49 training return: tensor(277.0433, device='cuda:0')
episode: 50 training return: tensor(257.2621, device='cuda:0')
episode: 51 training return: tensor(293.5222, device='cuda:0')
epoch: 13 test_true_pfm: 3843.9398099224545 sim_pfm: 329.8032391098095
episode: 52 training return: tensor(350.7982, device='cuda:0')
episode: 53 training return: tensor(477.2321, device='cuda:0')
episode: 54 training return: tensor(370.7266, device='cuda:0')
episode: 55 training return: tensor(300.8291, device='cuda:0')
epoch: 14 test_true_pfm: 3817.8000532725505 sim_pfm: 314.3192843525515
episode: 56 training return: tensor(365.0498, device='cuda:0')
episode: 57 training return: tensor(454.3869, device='cuda:0')
episode: 58 training return: tensor(182.7146, device='cuda:0')
episode: 59 training return: tensor(471.5829, device='cuda:0')
epoch: 15 test_true_pfm: 3883.944608197559 sim_pfm: 314.7827447723054
episode: 60 training return: tensor(460.3677, device='cuda:0')
episode: 61 training return: tensor(381.1656, device='cuda:0')
episode: 62 training return: tensor(227.4387, device='cuda:0')
episode: 63 training return: tensor(428.9981, device='cuda:0')
epoch: 16 test_true_pfm: 3841.69971659044 sim_pfm: 396.8970170559866
episode: 64 training return: tensor(251.2343, device='cuda:0')
episode: 65 training return: tensor(306.5685, device='cuda:0')
episode: 66 training return: tensor(405.9294, device='cuda:0')
episode: 67 training return: tensor(229.5502, device='cuda:0')
epoch: 17 test_true_pfm: 3820.3154722270415 sim_pfm: 289.7788272529239
episode: 68 training return: tensor(-572.3912, device='cuda:0')
episode: 69 training return: tensor(275.7416, device='cuda:0')
episode: 70 training return: tensor(392.7388, device='cuda:0')
episode: 71 training return: tensor(353.5865, device='cuda:0')
epoch: 18 test_true_pfm: 3788.736858299452 sim_pfm: 344.5507552770432
episode: 72 training return: tensor(417.5619, device='cuda:0')
episode: 73 training return: tensor(355.1449, device='cuda:0')
episode: 74 training return: tensor(317.5538, device='cuda:0')
episode: 75 training return: tensor(205.8504, device='cuda:0')
epoch: 19 test_true_pfm: 3723.9524543142666 sim_pfm: 308.1944577489242
episode: 76 training return: tensor(388.4529, device='cuda:0')
episode: 77 training return: tensor(268.4586, device='cuda:0')
episode: 78 training return: tensor(358.2733, device='cuda:0')
episode: 79 training return: tensor(354.4780, device='cuda:0')
epoch: 20 test_true_pfm: 3894.4303580447954 sim_pfm: 351.3144855397404
episode: 80 training return: tensor(410.1620, device='cuda:0')
episode: 81 training return: tensor(445.0293, device='cuda:0')
episode: 82 training return: tensor(296.5387, device='cuda:0')
episode: 83 training return: tensor(259.5632, device='cuda:0')
epoch: 21 test_true_pfm: 3859.7223809562115 sim_pfm: 344.54222342264256
episode: 84 training return: tensor(-701.5340, device='cuda:0')
episode: 85 training return: tensor(252.4766, device='cuda:0')
episode: 86 training return: tensor(211.3740, device='cuda:0')
episode: 87 training return: tensor(393.8554, device='cuda:0')
epoch: 22 test_true_pfm: 3809.6362895610905 sim_pfm: -11.431674696388654
episode: 88 training return: tensor(342.2026, device='cuda:0')
episode: 89 training return: tensor(410.5657, device='cuda:0')
episode: 90 training return: tensor(-719.4295, device='cuda:0')
episode: 91 training return: tensor(412.8891, device='cuda:0')
epoch: 23 test_true_pfm: 3903.158363500444 sim_pfm: 373.35796009811264
episode: 92 training return: tensor(218.9017, device='cuda:0')
episode: 93 training return: tensor(382.4133, device='cuda:0')
episode: 94 training return: tensor(394.3537, device='cuda:0')
episode: 95 training return: tensor(281.4133, device='cuda:0')
epoch: 24 test_true_pfm: 3872.284155917498 sim_pfm: 288.44800776421715
episode: 96 training return: tensor(271.7825, device='cuda:0')
episode: 97 training return: tensor(303.7929, device='cuda:0')
episode: 98 training return: tensor(-662.3432, device='cuda:0')
episode: 99 training return: tensor(245.2824, device='cuda:0')
epoch: 25 test_true_pfm: 3932.9145538479274 sim_pfm: 368.5426137144095
episode: 100 training return: tensor(291.7756, device='cuda:0')
episode: 101 training return: tensor(415.8745, device='cuda:0')
episode: 102 training return: tensor(393.9333, device='cuda:0')
episode: 103 training return: tensor(-636.7292, device='cuda:0')
epoch: 26 test_true_pfm: 3826.7689709789297 sim_pfm: 332.17090650078416
episode: 104 training return: tensor(329.5980, device='cuda:0')
episode: 105 training return: tensor(316.6660, device='cuda:0')
episode: 106 training return: tensor(232.1545, device='cuda:0')
episode: 107 training return: tensor(351.5834, device='cuda:0')
epoch: 27 test_true_pfm: 3910.3758655340084 sim_pfm: 358.26725677686045
episode: 108 training return: tensor(316.4542, device='cuda:0')
episode: 109 training return: tensor(359.4148, device='cuda:0')
episode: 110 training return: tensor(425.7394, device='cuda:0')
episode: 111 training return: tensor(334.3849, device='cuda:0')
epoch: 28 test_true_pfm: 3928.7109183136513 sim_pfm: 295.21087266965577
episode: 112 training return: tensor(379.8098, device='cuda:0')
episode: 113 training return: tensor(383.0238, device='cuda:0')
episode: 114 training return: tensor(261.7740, device='cuda:0')
episode: 115 training return: tensor(324.5905, device='cuda:0')
epoch: 29 test_true_pfm: 3903.346126779315 sim_pfm: -378.1775108343766
episode: 116 training return: tensor(268.4627, device='cuda:0')
episode: 117 training return: tensor(294.8094, device='cuda:0')
episode: 118 training return: tensor(331.4075, device='cuda:0')
episode: 119 training return: tensor(312.6825, device='cuda:0')
epoch: 30 test_true_pfm: 3827.32217058762 sim_pfm: 337.59113910689484
episode: 120 training return: tensor(421.6532, device='cuda:0')
episode: 121 training return: tensor(344.1909, device='cuda:0')
episode: 122 training return: tensor(383.1753, device='cuda:0')
episode: 123 training return: tensor(376.7551, device='cuda:0')
epoch: 31 test_true_pfm: 3936.305529828923 sim_pfm: 311.0636753103463
episode: 124 training return: tensor(212.2402, device='cuda:0')
episode: 125 training return: tensor(417.8630, device='cuda:0')
episode: 126 training return: tensor(378.2947, device='cuda:0')
episode: 127 training return: tensor(252.1369, device='cuda:0')
epoch: 32 test_true_pfm: 3896.9023245618687 sim_pfm: 322.85808855988824
episode: 128 training return: tensor(313.5807, device='cuda:0')
episode: 129 training return: tensor(415.3129, device='cuda:0')
episode: 130 training return: tensor(233.5955, device='cuda:0')
episode: 131 training return: tensor(343.1671, device='cuda:0')
epoch: 33 test_true_pfm: 3985.3252010368524 sim_pfm: 387.0666174937505
episode: 132 training return: tensor(270.3201, device='cuda:0')
episode: 133 training return: tensor(352.7018, device='cuda:0')
episode: 134 training return: tensor(249.6761, device='cuda:0')
episode: 135 training return: tensor(318.5279, device='cuda:0')
epoch: 34 test_true_pfm: 4075.3418266924036 sim_pfm: 383.7098130293986
episode: 136 training return: tensor(313.2776, device='cuda:0')
episode: 137 training return: tensor(396.3539, device='cuda:0')
episode: 138 training return: tensor(338.9492, device='cuda:0')
episode: 139 training return: tensor(240.4345, device='cuda:0')
epoch: 35 test_true_pfm: 3882.020672151984 sim_pfm: 298.22663544503547
episode: 140 training return: tensor(274.9403, device='cuda:0')
episode: 141 training return: tensor(403.7520, device='cuda:0')
episode: 142 training return: tensor(283.8350, device='cuda:0')
episode: 143 training return: tensor(456.0224, device='cuda:0')
epoch: 36 test_true_pfm: 2786.1736552005154 sim_pfm: 19.629035869962536
episode: 144 training return: tensor(460.1196, device='cuda:0')
episode: 145 training return: tensor(327.1263, device='cuda:0')
episode: 146 training return: tensor(321.3546, device='cuda:0')
episode: 147 training return: tensor(200.2463, device='cuda:0')
epoch: 37 test_true_pfm: 2818.2974537331265 sim_pfm: 335.6784721587707
episode: 148 training return: tensor(276.1286, device='cuda:0')
episode: 149 training return: tensor(292.4162, device='cuda:0')
episode: 150 training return: tensor(334.1347, device='cuda:0')
episode: 151 training return: tensor(165.8478, device='cuda:0')
epoch: 38 test_true_pfm: 4107.131596100822 sim_pfm: 365.62057027207146
episode: 152 training return: tensor(393.7646, device='cuda:0')
episode: 153 training return: tensor(334.7823, device='cuda:0')
episode: 154 training return: tensor(301.3785, device='cuda:0')
episode: 155 training return: tensor(284.5393, device='cuda:0')
epoch: 39 test_true_pfm: 3865.2803545402912 sim_pfm: 368.09594290168025
episode: 156 training return: tensor(329.3135, device='cuda:0')
episode: 157 training return: tensor(299.8924, device='cuda:0')
episode: 158 training return: tensor(432.9631, device='cuda:0')
episode: 159 training return: tensor(381.9050, device='cuda:0')
epoch: 40 test_true_pfm: 3961.5933612088284 sim_pfm: 341.6784502143758
episode: 160 training return: tensor(496.7516, device='cuda:0')
episode: 161 training return: tensor(304.6104, device='cuda:0')
episode: 162 training return: tensor(427.0177, device='cuda:0')
episode: 163 training return: tensor(296.2539, device='cuda:0')
epoch: 41 test_true_pfm: 3928.5152831969776 sim_pfm: 335.71425904524705
episode: 164 training return: tensor(301.2132, device='cuda:0')
episode: 165 training return: tensor(398.7106, device='cuda:0')
episode: 166 training return: tensor(393.3430, device='cuda:0')
episode: 167 training return: tensor(387.5507, device='cuda:0')
epoch: 42 test_true_pfm: 4038.8843714810714 sim_pfm: 22.3571622382442
episode: 168 training return: tensor(307.7691, device='cuda:0')
episode: 169 training return: tensor(368.6563, device='cuda:0')
episode: 170 training return: tensor(220.2361, device='cuda:0')
episode: 171 training return: tensor(298.9113, device='cuda:0')
epoch: 43 test_true_pfm: 3817.673012809646 sim_pfm: 297.32352434964076
episode: 172 training return: tensor(-732.3444, device='cuda:0')
episode: 173 training return: tensor(344.8642, device='cuda:0')
episode: 174 training return: tensor(330.0243, device='cuda:0')
episode: 175 training return: tensor(445.6734, device='cuda:0')
epoch: 44 test_true_pfm: 3907.458122390836 sim_pfm: 45.7045672602059
episode: 176 training return: tensor(360.5441, device='cuda:0')
episode: 177 training return: tensor(418.1944, device='cuda:0')
episode: 178 training return: tensor(235.3991, device='cuda:0')
episode: 179 training return: tensor(465.9213, device='cuda:0')
epoch: 45 test_true_pfm: 3920.0889751315776 sim_pfm: 362.7323032526668
episode: 180 training return: tensor(415.8031, device='cuda:0')
episode: 181 training return: tensor(368.8409, device='cuda:0')
episode: 182 training return: tensor(145.7430, device='cuda:0')
episode: 183 training return: tensor(343.3093, device='cuda:0')
epoch: 46 test_true_pfm: 4065.4358956303245 sim_pfm: 402.9585943123481
episode: 184 training return: tensor(388.4637, device='cuda:0')
episode: 185 training return: tensor(393.9736, device='cuda:0')
episode: 186 training return: tensor(355.7702, device='cuda:0')
episode: 187 training return: tensor(206.2786, device='cuda:0')
epoch: 47 test_true_pfm: 3974.7080338803794 sim_pfm: 333.4027820479241
episode: 188 training return: tensor(350.8318, device='cuda:0')
episode: 189 training return: tensor(435.0916, device='cuda:0')
episode: 190 training return: tensor(329.1116, device='cuda:0')
episode: 191 training return: tensor(494.1342, device='cuda:0')
epoch: 48 test_true_pfm: 3818.6054736302644 sim_pfm: 351.2204153285905
episode: 192 training return: tensor(351.0570, device='cuda:0')
episode: 193 training return: tensor(288.6965, device='cuda:0')
episode: 194 training return: tensor(449.3000, device='cuda:0')
episode: 195 training return: tensor(387.1093, device='cuda:0')
epoch: 49 test_true_pfm: 3896.6875533815705 sim_pfm: 378.79177288981737
episode: 196 training return: tensor(353.0182, device='cuda:0')
episode: 197 training return: tensor(251.8699, device='cuda:0')
episode: 198 training return: tensor(433.3955, device='cuda:0')
episode: 199 training return: tensor(347.1769, device='cuda:0')
epoch: 50 test_true_pfm: 3983.8523218119212 sim_pfm: 357.04333253239747
episode: 200 training return: tensor(164.0938, device='cuda:0')
episode: 201 training return: tensor(390.2644, device='cuda:0')
episode: 202 training return: tensor(413.5526, device='cuda:0')
episode: 203 training return: tensor(425.2638, device='cuda:0')
epoch: 51 test_true_pfm: 4031.5584697695454 sim_pfm: 458.79592161951587
episode: 204 training return: tensor(373.4300, device='cuda:0')
episode: 205 training return: tensor(313.2262, device='cuda:0')
episode: 206 training return: tensor(431.1874, device='cuda:0')
episode: 207 training return: tensor(293.7527, device='cuda:0')
epoch: 52 test_true_pfm: 3717.836709258458 sim_pfm: 430.3883030454551
episode: 208 training return: tensor(371.9255, device='cuda:0')
episode: 209 training return: tensor(279.9197, device='cuda:0')
episode: 210 training return: tensor(336.0854, device='cuda:0')
episode: 211 training return: tensor(410.1637, device='cuda:0')
epoch: 53 test_true_pfm: 3877.7055826695137 sim_pfm: 358.2306021150046
episode: 212 training return: tensor(363.6362, device='cuda:0')
episode: 213 training return: tensor(225.7612, device='cuda:0')
episode: 214 training return: tensor(386.0615, device='cuda:0')
episode: 215 training return: tensor(318.1089, device='cuda:0')
epoch: 54 test_true_pfm: 3966.7529836828676 sim_pfm: 347.15491421202506
episode: 216 training return: tensor(267.7332, device='cuda:0')
episode: 217 training return: tensor(427.8810, device='cuda:0')
episode: 218 training return: tensor(466.8335, device='cuda:0')
episode: 219 training return: tensor(391.6740, device='cuda:0')
epoch: 55 test_true_pfm: 3994.7456580091443 sim_pfm: 423.39230811735615
episode: 220 training return: tensor(370.1862, device='cuda:0')
episode: 221 training return: tensor(333.8946, device='cuda:0')
episode: 222 training return: tensor(292.8746, device='cuda:0')
episode: 223 training return: tensor(319.9444, device='cuda:0')
epoch: 56 test_true_pfm: 4008.552628926607 sim_pfm: 357.3532401105622
episode: 224 training return: tensor(463.1803, device='cuda:0')
episode: 225 training return: tensor(167.5090, device='cuda:0')
episode: 226 training return: tensor(475.0163, device='cuda:0')
episode: 227 training return: tensor(390.1038, device='cuda:0')
epoch: 57 test_true_pfm: 3924.959111313144 sim_pfm: 376.3586657381966
episode: 228 training return: tensor(370.0875, device='cuda:0')
episode: 229 training return: tensor(307.1549, device='cuda:0')
episode: 230 training return: tensor(263.5552, device='cuda:0')
episode: 231 training return: tensor(450.9113, device='cuda:0')
epoch: 58 test_true_pfm: 3977.36508668265 sim_pfm: 439.8860118340138
episode: 232 training return: tensor(400.7304, device='cuda:0')
episode: 233 training return: tensor(246.5381, device='cuda:0')
episode: 234 training return: tensor(283.0027, device='cuda:0')
episode: 235 training return: tensor(404.1990, device='cuda:0')
epoch: 59 test_true_pfm: 4032.440327414975 sim_pfm: 350.25184573756997
episode: 236 training return: tensor(425.2826, device='cuda:0')
episode: 237 training return: tensor(233.0720, device='cuda:0')
episode: 238 training return: tensor(329.1513, device='cuda:0')
episode: 239 training return: tensor(290.6369, device='cuda:0')
epoch: 60 test_true_pfm: 3998.714285890743 sim_pfm: 392.30724745515425
episode: 240 training return: tensor(393.8383, device='cuda:0')
episode: 241 training return: tensor(386.1646, device='cuda:0')
episode: 242 training return: tensor(307.2199, device='cuda:0')
episode: 243 training return: tensor(359.4073, device='cuda:0')
epoch: 61 test_true_pfm: 3949.6419557704344 sim_pfm: 331.4087050876212
episode: 244 training return: tensor(297.3246, device='cuda:0')
episode: 245 training return: tensor(396.6255, device='cuda:0')
episode: 246 training return: tensor(319.1423, device='cuda:0')
episode: 247 training return: tensor(473.9617, device='cuda:0')
epoch: 62 test_true_pfm: 3903.115407998201 sim_pfm: 360.6862082462564
episode: 248 training return: tensor(344.8691, device='cuda:0')
episode: 249 training return: tensor(458.8980, device='cuda:0')
episode: 250 training return: tensor(325.4030, device='cuda:0')
episode: 251 training return: tensor(406.9685, device='cuda:0')
epoch: 63 test_true_pfm: 3863.0475550148644 sim_pfm: 369.63057663062744
episode: 252 training return: tensor(216.3145, device='cuda:0')
episode: 253 training return: tensor(302.9619, device='cuda:0')
episode: 254 training return: tensor(309.7466, device='cuda:0')
episode: 255 training return: tensor(405.1574, device='cuda:0')
epoch: 64 test_true_pfm: 3943.2476139642845 sim_pfm: 290.8961422270028
episode: 256 training return: tensor(139.4155, device='cuda:0')
episode: 257 training return: tensor(492.2926, device='cuda:0')
episode: 258 training return: tensor(261.5268, device='cuda:0')
episode: 259 training return: tensor(284.6634, device='cuda:0')
epoch: 65 test_true_pfm: 3963.3018648268935 sim_pfm: 302.20148260577116
episode: 260 training return: tensor(352.9389, device='cuda:0')
episode: 261 training return: tensor(296.9928, device='cuda:0')
episode: 262 training return: tensor(258.6523, device='cuda:0')
episode: 263 training return: tensor(462.5499, device='cuda:0')
epoch: 66 test_true_pfm: 3833.453142662143 sim_pfm: 310.09976688571624
episode: 264 training return: tensor(404.1686, device='cuda:0')
episode: 265 training return: tensor(446.2553, device='cuda:0')
episode: 266 training return: tensor(280.3838, device='cuda:0')
episode: 267 training return: tensor(331.4807, device='cuda:0')
epoch: 67 test_true_pfm: 3967.6032741024615 sim_pfm: 400.57806035170023
episode: 268 training return: tensor(309.9187, device='cuda:0')
episode: 269 training return: tensor(442.4339, device='cuda:0')
episode: 270 training return: tensor(373.2121, device='cuda:0')
episode: 271 training return: tensor(368.6410, device='cuda:0')
epoch: 68 test_true_pfm: 3913.280820246589 sim_pfm: 340.59659555872594
episode: 272 training return: tensor(312.4870, device='cuda:0')
episode: 273 training return: tensor(369.7370, device='cuda:0')
episode: 274 training return: tensor(334.6018, device='cuda:0')
episode: 275 training return: tensor(330.2724, device='cuda:0')
epoch: 69 test_true_pfm: 3839.7572362184897 sim_pfm: 345.8447831068188
episode: 276 training return: tensor(327.6214, device='cuda:0')
episode: 277 training return: tensor(149.6565, device='cuda:0')
episode: 278 training return: tensor(304.6778, device='cuda:0')
episode: 279 training return: tensor(462.9363, device='cuda:0')
epoch: 70 test_true_pfm: 3899.9834717437766 sim_pfm: 342.39340063755907
episode: 280 training return: tensor(424.8520, device='cuda:0')
episode: 281 training return: tensor(313.4659, device='cuda:0')
episode: 282 training return: tensor(358.6153, device='cuda:0')
episode: 283 training return: tensor(311.4789, device='cuda:0')
epoch: 71 test_true_pfm: 4013.3476540575193 sim_pfm: 434.9028187928877
episode: 284 training return: tensor(382.2458, device='cuda:0')
episode: 285 training return: tensor(382.8470, device='cuda:0')
episode: 286 training return: tensor(329.1993, device='cuda:0')
episode: 287 training return: tensor(344.7397, device='cuda:0')
epoch: 72 test_true_pfm: 3979.0235287041555 sim_pfm: 408.3645411870869
episode: 288 training return: tensor(439.7059, device='cuda:0')
episode: 289 training return: tensor(269.1433, device='cuda:0')
episode: 290 training return: tensor(295.5506, device='cuda:0')
episode: 291 training return: tensor(492.2535, device='cuda:0')
epoch: 73 test_true_pfm: 3953.5329449702162 sim_pfm: 80.73237281071488
episode: 292 training return: tensor(361.9726, device='cuda:0')
episode: 293 training return: tensor(352.9650, device='cuda:0')
episode: 294 training return: tensor(417.5877, device='cuda:0')
episode: 295 training return: tensor(415.1118, device='cuda:0')
epoch: 74 test_true_pfm: 3903.4342904362543 sim_pfm: 349.3533576274543
episode: 296 training return: tensor(391.5896, device='cuda:0')
episode: 297 training return: tensor(307.5326, device='cuda:0')
episode: 298 training return: tensor(319.0789, device='cuda:0')
episode: 299 training return: tensor(271.4579, device='cuda:0')
epoch: 75 test_true_pfm: 4021.4368531863347 sim_pfm: 339.96648768490803
episode: 300 training return: tensor(340.3120, device='cuda:0')
episode: 301 training return: tensor(310.8680, device='cuda:0')
episode: 302 training return: tensor(489.0649, device='cuda:0')
episode: 303 training return: tensor(279.2896, device='cuda:0')
epoch: 76 test_true_pfm: 3855.856839867655 sim_pfm: 343.83321434035315
episode: 304 training return: tensor(389.5639, device='cuda:0')
episode: 305 training return: tensor(394.5359, device='cuda:0')
episode: 306 training return: tensor(378.9789, device='cuda:0')
episode: 307 training return: tensor(318.9296, device='cuda:0')
epoch: 77 test_true_pfm: 3989.8601147475692 sim_pfm: 405.61916986278567
episode: 308 training return: tensor(296.7524, device='cuda:0')
episode: 309 training return: tensor(272.1669, device='cuda:0')
episode: 310 training return: tensor(191.7728, device='cuda:0')
episode: 311 training return: tensor(112.2242, device='cuda:0')
epoch: 78 test_true_pfm: 3965.3571698535175 sim_pfm: 476.99484440265223
episode: 312 training return: tensor(340.0329, device='cuda:0')
episode: 313 training return: tensor(416.1864, device='cuda:0')
episode: 314 training return: tensor(350.0481, device='cuda:0')
episode: 315 training return: tensor(353.7692, device='cuda:0')
epoch: 79 test_true_pfm: 3914.3617858013326 sim_pfm: 293.6073266656119
episode: 316 training return: tensor(299.1113, device='cuda:0')
episode: 317 training return: tensor(383.6174, device='cuda:0')
episode: 318 training return: tensor(-739.1727, device='cuda:0')
episode: 319 training return: tensor(418.4108, device='cuda:0')
epoch: 80 test_true_pfm: 3970.3942014373733 sim_pfm: 370.22842842443305
episode: 320 training return: tensor(470.3845, device='cuda:0')
episode: 321 training return: tensor(369.8957, device='cuda:0')
episode: 322 training return: tensor(375.3853, device='cuda:0')
episode: 323 training return: tensor(445.7007, device='cuda:0')
epoch: 81 test_true_pfm: 4059.205801466163 sim_pfm: 400.0056065848233
episode: 324 training return: tensor(550.3820, device='cuda:0')
episode: 325 training return: tensor(323.2171, device='cuda:0')
episode: 326 training return: tensor(442.1646, device='cuda:0')
episode: 327 training return: tensor(404.3808, device='cuda:0')
epoch: 82 test_true_pfm: 4060.8450917591563 sim_pfm: 437.8975123905887
episode: 328 training return: tensor(437.3441, device='cuda:0')
episode: 329 training return: tensor(237.3405, device='cuda:0')
episode: 330 training return: tensor(332.0001, device='cuda:0')
episode: 331 training return: tensor(384.7400, device='cuda:0')
epoch: 83 test_true_pfm: 3010.6106199374226 sim_pfm: 404.0584375523419
episode: 332 training return: tensor(414.2779, device='cuda:0')
episode: 333 training return: tensor(376.4715, device='cuda:0')
episode: 334 training return: tensor(353.3503, device='cuda:0')
episode: 335 training return: tensor(363.9594, device='cuda:0')
epoch: 84 test_true_pfm: 4001.424529488317 sim_pfm: 424.0581755961175
episode: 336 training return: tensor(433.4367, device='cuda:0')
episode: 337 training return: tensor(500.8003, device='cuda:0')
episode: 338 training return: tensor(243.7300, device='cuda:0')
episode: 339 training return: tensor(308.8556, device='cuda:0')
epoch: 85 test_true_pfm: 4005.4116509787314 sim_pfm: 406.9260465654855
episode: 340 training return: tensor(455.2092, device='cuda:0')
episode: 341 training return: tensor(314.5973, device='cuda:0')
episode: 342 training return: tensor(304.3965, device='cuda:0')
episode: 343 training return: tensor(342.8761, device='cuda:0')
epoch: 86 test_true_pfm: 4049.44174735836 sim_pfm: 324.26888384188834
episode: 344 training return: tensor(431.8502, device='cuda:0')
episode: 345 training return: tensor(399.0808, device='cuda:0')
episode: 346 training return: tensor(372.1803, device='cuda:0')
episode: 347 training return: tensor(389.2939, device='cuda:0')
epoch: 87 test_true_pfm: 3936.151517161508 sim_pfm: 353.89661636334495
episode: 348 training return: tensor(396.6764, device='cuda:0')
episode: 349 training return: tensor(393.5076, device='cuda:0')
episode: 350 training return: tensor(441.2686, device='cuda:0')
episode: 351 training return: tensor(453.0026, device='cuda:0')
epoch: 88 test_true_pfm: 3955.8783650333985 sim_pfm: 417.45187080218847
episode: 352 training return: tensor(401.2202, device='cuda:0')
episode: 353 training return: tensor(258.9009, device='cuda:0')
episode: 354 training return: tensor(348.5054, device='cuda:0')
episode: 355 training return: tensor(253.8784, device='cuda:0')
epoch: 89 test_true_pfm: 4014.617843565234 sim_pfm: 433.34684055886464
episode: 356 training return: tensor(434.6898, device='cuda:0')
episode: 357 training return: tensor(433.0735, device='cuda:0')
episode: 358 training return: tensor(269.1244, device='cuda:0')
episode: 359 training return: tensor(400.7791, device='cuda:0')
epoch: 90 test_true_pfm: 3938.0894895355955 sim_pfm: 317.3251301550966
episode: 360 training return: tensor(233.6427, device='cuda:0')
episode: 361 training return: tensor(379.5481, device='cuda:0')
episode: 362 training return: tensor(321.9789, device='cuda:0')
episode: 363 training return: tensor(245.8618, device='cuda:0')
epoch: 91 test_true_pfm: 4124.747629430389 sim_pfm: 500.84727913325577
episode: 364 training return: tensor(319.2887, device='cuda:0')
episode: 365 training return: tensor(443.3101, device='cuda:0')
episode: 366 training return: tensor(310.9920, device='cuda:0')
episode: 367 training return: tensor(382.8029, device='cuda:0')
epoch: 92 test_true_pfm: 3965.1040777658563 sim_pfm: 404.67209911819856
episode: 368 training return: tensor(287.3797, device='cuda:0')
episode: 369 training return: tensor(489.4022, device='cuda:0')
episode: 370 training return: tensor(448.2596, device='cuda:0')
episode: 371 training return: tensor(318.3057, device='cuda:0')
epoch: 93 test_true_pfm: 4112.894593184478 sim_pfm: 420.8519052786675
episode: 372 training return: tensor(391.2977, device='cuda:0')
episode: 373 training return: tensor(376.1812, device='cuda:0')
episode: 374 training return: tensor(432.4997, device='cuda:0')
episode: 375 training return: tensor(408.9786, device='cuda:0')
epoch: 94 test_true_pfm: 3845.633831890735 sim_pfm: 305.6983411744586
episode: 376 training return: tensor(348.9299, device='cuda:0')
episode: 377 training return: tensor(407.5388, device='cuda:0')
episode: 378 training return: tensor(376.1087, device='cuda:0')
episode: 379 training return: tensor(362.3300, device='cuda:0')
epoch: 95 test_true_pfm: 4029.1242154801407 sim_pfm: 375.45285653996206
episode: 380 training return: tensor(407.0691, device='cuda:0')
episode: 381 training return: tensor(462.3330, device='cuda:0')
episode: 382 training return: tensor(412.8279, device='cuda:0')
episode: 383 training return: tensor(351.3568, device='cuda:0')
epoch: 96 test_true_pfm: 3967.9503051255033 sim_pfm: 356.94744511631626
episode: 384 training return: tensor(316.7250, device='cuda:0')
episode: 385 training return: tensor(320.9496, device='cuda:0')
episode: 386 training return: tensor(305.9122, device='cuda:0')
episode: 387 training return: tensor(202.1893, device='cuda:0')
epoch: 97 test_true_pfm: 4066.706426446142 sim_pfm: 456.4844221631356
episode: 388 training return: tensor(262.4476, device='cuda:0')
episode: 389 training return: tensor(413.8558, device='cuda:0')
episode: 390 training return: tensor(378.3460, device='cuda:0')
episode: 391 training return: tensor(290.2569, device='cuda:0')
epoch: 98 test_true_pfm: 3946.418891001404 sim_pfm: 400.9476043206232
episode: 392 training return: tensor(379.1285, device='cuda:0')
episode: 393 training return: tensor(360.6902, device='cuda:0')
episode: 394 training return: tensor(457.0516, device='cuda:0')
episode: 395 training return: tensor(467.4302, device='cuda:0')
epoch: 99 test_true_pfm: 3947.556851460608 sim_pfm: 434.22682434147765
episode: 396 training return: tensor(389.7579, device='cuda:0')
episode: 397 training return: tensor(351.0929, device='cuda:0')
episode: 398 training return: tensor(461.5197, device='cuda:0')
episode: 399 training return: tensor(353.4003, device='cuda:0')
epoch: 100 test_true_pfm: 4011.006729366791 sim_pfm: 334.39342424554826
episode: 400 training return: tensor(358.1306, device='cuda:0')
episode: 401 training return: tensor(427.7407, device='cuda:0')
episode: 402 training return: tensor(340.3531, device='cuda:0')
episode: 403 training return: tensor(384.2382, device='cuda:0')
epoch: 101 test_true_pfm: 3967.3374769104853 sim_pfm: 438.3683241431718
episode: 404 training return: tensor(365.4976, device='cuda:0')
episode: 405 training return: tensor(404.4040, device='cuda:0')
episode: 406 training return: tensor(326.7195, device='cuda:0')
episode: 407 training return: tensor(351.1200, device='cuda:0')
epoch: 102 test_true_pfm: 4006.157449108757 sim_pfm: 431.23625196533976
episode: 408 training return: tensor(346.7496, device='cuda:0')
episode: 409 training return: tensor(423.4440, device='cuda:0')
episode: 410 training return: tensor(341.3857, device='cuda:0')
episode: 411 training return: tensor(415.6384, device='cuda:0')
epoch: 103 test_true_pfm: 4017.1770533259782 sim_pfm: 427.7187356593786
episode: 412 training return: tensor(403.8719, device='cuda:0')
episode: 413 training return: tensor(351.7677, device='cuda:0')
episode: 414 training return: tensor(277.6585, device='cuda:0')
episode: 415 training return: tensor(426.6205, device='cuda:0')
epoch: 104 test_true_pfm: 3911.7846121208127 sim_pfm: 356.02336423786863
episode: 416 training return: tensor(337.4838, device='cuda:0')
episode: 417 training return: tensor(412.7561, device='cuda:0')
episode: 418 training return: tensor(399.6623, device='cuda:0')
episode: 419 training return: tensor(430.0511, device='cuda:0')
epoch: 105 test_true_pfm: 3883.2989785113155 sim_pfm: 298.94108515973977
episode: 420 training return: tensor(282.7733, device='cuda:0')
episode: 421 training return: tensor(456.1574, device='cuda:0')
episode: 422 training return: tensor(395.5176, device='cuda:0')
episode: 423 training return: tensor(421.5926, device='cuda:0')
epoch: 106 test_true_pfm: 4060.2691918221485 sim_pfm: 424.95390657381114
episode: 424 training return: tensor(320.5193, device='cuda:0')
episode: 425 training return: tensor(309.7413, device='cuda:0')
episode: 426 training return: tensor(273.1472, device='cuda:0')
episode: 427 training return: tensor(326.9463, device='cuda:0')
epoch: 107 test_true_pfm: 4023.677918231122 sim_pfm: 408.82509875087027
episode: 428 training return: tensor(328.9882, device='cuda:0')
episode: 429 training return: tensor(466.2033, device='cuda:0')
episode: 430 training return: tensor(367.1391, device='cuda:0')
episode: 431 training return: tensor(234.3859, device='cuda:0')
epoch: 108 test_true_pfm: 3965.4949927105645 sim_pfm: 398.3945539947211
episode: 432 training return: tensor(396.4093, device='cuda:0')
episode: 433 training return: tensor(427.2417, device='cuda:0')
episode: 434 training return: tensor(435.9663, device='cuda:0')
episode: 435 training return: tensor(435.1193, device='cuda:0')
epoch: 109 test_true_pfm: 4018.2897283402385 sim_pfm: 397.1362389600642
episode: 436 training return: tensor(419.9723, device='cuda:0')
episode: 437 training return: tensor(414.2753, device='cuda:0')
episode: 438 training return: tensor(405.4012, device='cuda:0')
episode: 439 training return: tensor(252.8104, device='cuda:0')
epoch: 110 test_true_pfm: 4048.2677150788204 sim_pfm: 457.5832338242811
episode: 440 training return: tensor(440.1609, device='cuda:0')
episode: 441 training return: tensor(489.0867, device='cuda:0')
episode: 442 training return: tensor(491.8703, device='cuda:0')
episode: 443 training return: tensor(267.7313, device='cuda:0')
epoch: 111 test_true_pfm: 3946.656216953563 sim_pfm: 360.1352544986876
episode: 444 training return: tensor(270.7150, device='cuda:0')
episode: 445 training return: tensor(357.3803, device='cuda:0')
episode: 446 training return: tensor(308.8320, device='cuda:0')
episode: 447 training return: tensor(421.1525, device='cuda:0')
epoch: 112 test_true_pfm: 3904.0789334111273 sim_pfm: 373.1597604500421
episode: 448 training return: tensor(333.0742, device='cuda:0')
episode: 449 training return: tensor(401.0659, device='cuda:0')
episode: 450 training return: tensor(429.1948, device='cuda:0')
episode: 451 training return: tensor(343.8215, device='cuda:0')
epoch: 113 test_true_pfm: 3990.054252032635 sim_pfm: 430.6464184368281
episode: 452 training return: tensor(449.0110, device='cuda:0')
episode: 453 training return: tensor(381.0562, device='cuda:0')
episode: 454 training return: tensor(258.7671, device='cuda:0')
episode: 455 training return: tensor(385.6900, device='cuda:0')
epoch: 114 test_true_pfm: 4078.9334778921498 sim_pfm: 448.39480982923607
episode: 456 training return: tensor(330.3014, device='cuda:0')
episode: 457 training return: tensor(337.9510, device='cuda:0')
episode: 458 training return: tensor(427.2061, device='cuda:0')
episode: 459 training return: tensor(422.2928, device='cuda:0')
epoch: 115 test_true_pfm: 3854.79380286039 sim_pfm: 387.56063257995993
episode: 460 training return: tensor(300.8964, device='cuda:0')
episode: 461 training return: tensor(269.5541, device='cuda:0')
episode: 462 training return: tensor(319.8133, device='cuda:0')
episode: 463 training return: tensor(362.6323, device='cuda:0')
epoch: 116 test_true_pfm: 4005.7738880864963 sim_pfm: 409.5277182382027
episode: 464 training return: tensor(318.8931, device='cuda:0')
episode: 465 training return: tensor(397.6080, device='cuda:0')
episode: 466 training return: tensor(395.8936, device='cuda:0')
episode: 467 training return: tensor(449.0524, device='cuda:0')
epoch: 117 test_true_pfm: 3981.5216146370826 sim_pfm: 383.94184690338443
episode: 468 training return: tensor(361.7336, device='cuda:0')
episode: 469 training return: tensor(278.4921, device='cuda:0')
episode: 470 training return: tensor(382.0973, device='cuda:0')
episode: 471 training return: tensor(474.3748, device='cuda:0')
epoch: 118 test_true_pfm: 3886.7906163746143 sim_pfm: 384.2614885441047
episode: 472 training return: tensor(298.0857, device='cuda:0')
episode: 473 training return: tensor(432.8506, device='cuda:0')
episode: 474 training return: tensor(401.5047, device='cuda:0')
episode: 475 training return: tensor(320.2123, device='cuda:0')
epoch: 119 test_true_pfm: 3939.947722584091 sim_pfm: 377.8451502005725
episode: 476 training return: tensor(375.6835, device='cuda:0')
episode: 477 training return: tensor(437.6516, device='cuda:0')
episode: 478 training return: tensor(395.4446, device='cuda:0')
episode: 479 training return: tensor(429.0764, device='cuda:0')
epoch: 120 test_true_pfm: 3908.1883321709415 sim_pfm: 344.35583153056604
episode: 480 training return: tensor(412.0033, device='cuda:0')
episode: 481 training return: tensor(389.8748, device='cuda:0')
episode: 482 training return: tensor(433.5807, device='cuda:0')
episode: 483 training return: tensor(347.2977, device='cuda:0')
epoch: 121 test_true_pfm: 3915.835459659891 sim_pfm: 392.6836000831002
episode: 484 training return: tensor(310.1999, device='cuda:0')
episode: 485 training return: tensor(483.7068, device='cuda:0')
episode: 486 training return: tensor(401.1701, device='cuda:0')
episode: 487 training return: tensor(383.0796, device='cuda:0')
epoch: 122 test_true_pfm: 3975.208522346625 sim_pfm: 374.278584546623
episode: 488 training return: tensor(301.3096, device='cuda:0')
episode: 489 training return: tensor(390.9385, device='cuda:0')
episode: 490 training return: tensor(443.8109, device='cuda:0')
episode: 491 training return: tensor(392.0548, device='cuda:0')
epoch: 123 test_true_pfm: 3908.592394050256 sim_pfm: 322.14979184931144
episode: 492 training return: tensor(426.8004, device='cuda:0')
episode: 493 training return: tensor(360.3141, device='cuda:0')
episode: 494 training return: tensor(362.7593, device='cuda:0')
episode: 495 training return: tensor(443.0613, device='cuda:0')
epoch: 124 test_true_pfm: 3956.2818440791666 sim_pfm: 404.10135845370434
episode: 496 training return: tensor(392.2292, device='cuda:0')
episode: 497 training return: tensor(321.7912, device='cuda:0')
episode: 498 training return: tensor(283.4136, device='cuda:0')
episode: 499 training return: tensor(438.7698, device='cuda:0')
epoch: 125 test_true_pfm: 3973.9847655156736 sim_pfm: 378.38882572834456
episode: 500 training return: tensor(505.2020, device='cuda:0')
episode: 501 training return: tensor(408.4406, device='cuda:0')
episode: 502 training return: tensor(254.0122, device='cuda:0')
episode: 503 training return: tensor(356.0116, device='cuda:0')
epoch: 126 test_true_pfm: 3898.04785328339 sim_pfm: 334.41766028356506
episode: 504 training return: tensor(396.3693, device='cuda:0')
episode: 505 training return: tensor(356.6710, device='cuda:0')
episode: 506 training return: tensor(501.3213, device='cuda:0')
episode: 507 training return: tensor(340.3681, device='cuda:0')
epoch: 127 test_true_pfm: 3981.718854203549 sim_pfm: 405.9024107353277
episode: 508 training return: tensor(402.0678, device='cuda:0')
episode: 509 training return: tensor(326.1055, device='cuda:0')
episode: 510 training return: tensor(390.6501, device='cuda:0')
episode: 511 training return: tensor(289.0036, device='cuda:0')
epoch: 128 test_true_pfm: 3965.2674161404952 sim_pfm: 396.3216694778724
episode: 512 training return: tensor(400.4054, device='cuda:0')
episode: 513 training return: tensor(366.3459, device='cuda:0')
episode: 514 training return: tensor(289.4565, device='cuda:0')
episode: 515 training return: tensor(334.9344, device='cuda:0')
epoch: 129 test_true_pfm: 3903.9059984017827 sim_pfm: 333.4540491182027
episode: 516 training return: tensor(273.8723, device='cuda:0')
episode: 517 training return: tensor(358.0219, device='cuda:0')
episode: 518 training return: tensor(386.7456, device='cuda:0')
episode: 519 training return: tensor(258.7498, device='cuda:0')
epoch: 130 test_true_pfm: 3870.4634872689076 sim_pfm: 369.23425868390285
episode: 520 training return: tensor(409.1747, device='cuda:0')
episode: 521 training return: tensor(292.1209, device='cuda:0')
episode: 522 training return: tensor(446.5017, device='cuda:0')
episode: 523 training return: tensor(479.2694, device='cuda:0')
epoch: 131 test_true_pfm: 4158.340366089651 sim_pfm: 493.72336998267565
episode: 524 training return: tensor(472.3148, device='cuda:0')
episode: 525 training return: tensor(479.5710, device='cuda:0')
episode: 526 training return: tensor(263.7550, device='cuda:0')
episode: 527 training return: tensor(351.8531, device='cuda:0')
epoch: 132 test_true_pfm: 3942.8248016746343 sim_pfm: 386.7221361878328
episode: 528 training return: tensor(419.3747, device='cuda:0')
episode: 529 training return: tensor(279.3380, device='cuda:0')
episode: 530 training return: tensor(313.4126, device='cuda:0')
episode: 531 training return: tensor(419.9762, device='cuda:0')
epoch: 133 test_true_pfm: 4045.064867891588 sim_pfm: 382.9932068043466
episode: 532 training return: tensor(329.2659, device='cuda:0')
episode: 533 training return: tensor(336.7553, device='cuda:0')
episode: 534 training return: tensor(271.2433, device='cuda:0')
episode: 535 training return: tensor(420.2232, device='cuda:0')
epoch: 134 test_true_pfm: 4044.2841570782402 sim_pfm: 430.48957828399335
episode: 536 training return: tensor(372.4746, device='cuda:0')
episode: 537 training return: tensor(321.1632, device='cuda:0')
episode: 538 training return: tensor(453.5324, device='cuda:0')
episode: 539 training return: tensor(367.6879, device='cuda:0')
epoch: 135 test_true_pfm: 3994.8138936323044 sim_pfm: 344.0983364212734
episode: 540 training return: tensor(413.3326, device='cuda:0')
episode: 541 training return: tensor(415.1051, device='cuda:0')
episode: 542 training return: tensor(484.0824, device='cuda:0')
episode: 543 training return: tensor(291.4615, device='cuda:0')
epoch: 136 test_true_pfm: 4093.2525262149647 sim_pfm: 455.001410341841
episode: 544 training return: tensor(330.4857, device='cuda:0')
episode: 545 training return: tensor(466.2864, device='cuda:0')
episode: 546 training return: tensor(425.7537, device='cuda:0')
episode: 547 training return: tensor(424.5767, device='cuda:0')
epoch: 137 test_true_pfm: 4011.8622757100725 sim_pfm: 424.5117425514812
episode: 548 training return: tensor(309.9664, device='cuda:0')
episode: 549 training return: tensor(397.7167, device='cuda:0')
episode: 550 training return: tensor(477.5393, device='cuda:0')
episode: 551 training return: tensor(298.1066, device='cuda:0')
epoch: 138 test_true_pfm: 3906.9207479922256 sim_pfm: 374.76024466122425
episode: 552 training return: tensor(272.0407, device='cuda:0')
episode: 553 training return: tensor(414.1932, device='cuda:0')
episode: 554 training return: tensor(356.0215, device='cuda:0')
episode: 555 training return: tensor(391.1864, device='cuda:0')
epoch: 139 test_true_pfm: 4028.744942578111 sim_pfm: 423.399162493976
episode: 556 training return: tensor(338.4213, device='cuda:0')
episode: 557 training return: tensor(410.5528, device='cuda:0')
episode: 558 training return: tensor(429.8963, device='cuda:0')
episode: 559 training return: tensor(340.4556, device='cuda:0')
epoch: 140 test_true_pfm: 4127.333697263629 sim_pfm: 490.88903366896557
episode: 560 training return: tensor(336.5041, device='cuda:0')
episode: 561 training return: tensor(272.9825, device='cuda:0')
episode: 562 training return: tensor(323.3380, device='cuda:0')
episode: 563 training return: tensor(305.1519, device='cuda:0')
epoch: 141 test_true_pfm: 3831.504012437108 sim_pfm: 301.8815237211529
episode: 564 training return: tensor(344.9125, device='cuda:0')
episode: 565 training return: tensor(242.7454, device='cuda:0')
episode: 566 training return: tensor(231.8491, device='cuda:0')
episode: 567 training return: tensor(415.7662, device='cuda:0')
epoch: 142 test_true_pfm: 3920.7883867253113 sim_pfm: 349.04898653784767
episode: 568 training return: tensor(222.6608, device='cuda:0')
episode: 569 training return: tensor(446.4425, device='cuda:0')
episode: 570 training return: tensor(359.2971, device='cuda:0')
episode: 571 training return: tensor(248.5857, device='cuda:0')
epoch: 143 test_true_pfm: 3922.349459849074 sim_pfm: 337.6061654066919
episode: 572 training return: tensor(233.6788, device='cuda:0')
episode: 573 training return: tensor(358.1180, device='cuda:0')
episode: 574 training return: tensor(275.9453, device='cuda:0')
episode: 575 training return: tensor(311.0357, device='cuda:0')
epoch: 144 test_true_pfm: 4035.3291572641215 sim_pfm: 419.8230576468729
episode: 576 training return: tensor(416.5885, device='cuda:0')
episode: 577 training return: tensor(342.5291, device='cuda:0')
episode: 578 training return: tensor(424.2914, device='cuda:0')
episode: 579 training return: tensor(353.0089, device='cuda:0')
epoch: 145 test_true_pfm: 4086.9117470670612 sim_pfm: 465.23304481995484
episode: 580 training return: tensor(420.3035, device='cuda:0')
episode: 581 training return: tensor(473.2890, device='cuda:0')
episode: 582 training return: tensor(370.1556, device='cuda:0')
episode: 583 training return: tensor(246.6318, device='cuda:0')
epoch: 146 test_true_pfm: 4014.757450428924 sim_pfm: 422.8959103507611
episode: 584 training return: tensor(424.6772, device='cuda:0')
episode: 585 training return: tensor(422.5988, device='cuda:0')
episode: 586 training return: tensor(343.7262, device='cuda:0')
episode: 587 training return: tensor(208.3907, device='cuda:0')
epoch: 147 test_true_pfm: 4002.0720784558484 sim_pfm: 408.8658513687939
episode: 588 training return: tensor(465.2977, device='cuda:0')
episode: 589 training return: tensor(192.1021, device='cuda:0')
episode: 590 training return: tensor(276.2892, device='cuda:0')
episode: 591 training return: tensor(412.2020, device='cuda:0')
epoch: 148 test_true_pfm: 3982.248111402496 sim_pfm: 435.66257163511665
episode: 592 training return: tensor(460.2073, device='cuda:0')
episode: 593 training return: tensor(443.0645, device='cuda:0')
episode: 594 training return: tensor(345.6156, device='cuda:0')
episode: 595 training return: tensor(379.1352, device='cuda:0')
epoch: 149 test_true_pfm: 4053.2254097278965 sim_pfm: 442.00722060086747
episode: 596 training return: tensor(385.9271, device='cuda:0')
episode: 597 training return: tensor(435.3838, device='cuda:0')
episode: 598 training return: tensor(289.2709, device='cuda:0')
episode: 599 training return: tensor(303.7878, device='cuda:0')
epoch: 150 test_true_pfm: 3975.2281926049363 sim_pfm: 446.3625763260643
