['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'behavior', '--traj', 'mixed', '--seed', '1', '--data', '100000']
epoch: 0 training_loss 0.24954995810985564 test_loss: 0.14438512325286865
epoch: 1 training_loss 0.13113116979599 test_loss: 0.14520972967147827
epoch: 2 training_loss 0.12818498864769937 test_loss: 0.11246511936187745
epoch: 3 training_loss 0.11619974981993436 test_loss: 0.0945152223110199
epoch: 4 training_loss 0.10766032952815294 test_loss: 0.1064216136932373
epoch: 5 training_loss 0.10550992611795663 test_loss: 0.1018957257270813
epoch: 6 training_loss 0.11046400735154747 test_loss: 0.10964933633804322
epoch: 7 training_loss 0.1090878006350249 test_loss: 0.1597150444984436
epoch: 8 training_loss 0.1041497352719307 test_loss: 0.0948568344116211
epoch: 9 training_loss 0.1006402259785682 test_loss: 0.08759388327598572
epoch: 10 training_loss 0.10032044244930148 test_loss: 0.10296717882156373
epoch: 11 training_loss 0.10028798369690776 test_loss: 0.07710232138633728
epoch: 12 training_loss 0.09432105302810669 test_loss: 0.0941825807094574
epoch: 13 training_loss 0.08921530172228813 test_loss: 0.10069470405578614
epoch: 14 training_loss 0.09558253183960914 test_loss: 0.07672963738441467
epoch: 15 training_loss 0.09755028249695898 test_loss: 0.08270300030708314
epoch: 16 training_loss 0.08905065469443799 test_loss: 0.08757913112640381
epoch: 17 training_loss 0.08483711229637265 test_loss: 0.07002093195915222
epoch: 18 training_loss 0.08935001041740179 test_loss: 0.07834179997444153
epoch: 19 training_loss 0.08605915427207947 test_loss: 0.07910830974578857
epoch: 20 training_loss 0.08036592695862055 test_loss: 0.08038681745529175
epoch: 21 training_loss 0.08961763335391879 test_loss: 0.08398513197898864
epoch: 22 training_loss 0.08212752684950829 test_loss: 0.06570743918418884
epoch: 23 training_loss 0.0921176285482943 test_loss: 0.08718418478965759
epoch: 24 training_loss 0.08871276866644622 test_loss: 0.07757450342178344
epoch: 25 training_loss 0.08645358664914965 test_loss: 0.07588224411010742
epoch: 26 training_loss 0.08583231046795844 test_loss: 0.07551516890525818
epoch: 27 training_loss 0.08377803305163979 test_loss: 0.07883535027503967
epoch: 28 training_loss 0.08768976852297783 test_loss: 0.08146032691001892
epoch: 29 training_loss 0.08427749155089259 test_loss: 0.08858110904693603
epoch: 30 training_loss 0.0830588660016656 test_loss: 0.09893975257873536
epoch: 31 training_loss 0.08689316580072046 test_loss: 0.0828905463218689
epoch: 32 training_loss 0.08677926414646209 test_loss: 0.07743238210678101
epoch: 33 training_loss 0.09411015016958117 test_loss: 0.07445106506347657
epoch: 34 training_loss 0.08570293627679348 test_loss: 0.082789146900177
epoch: 35 training_loss 0.08838608361780644 test_loss: 0.09416251182556153
epoch: 36 training_loss 0.0910221616923809 test_loss: 0.08971923589706421
epoch: 37 training_loss 0.08181351970881223 test_loss: 0.06401514410972595
epoch: 38 training_loss 0.08478322308510541 test_loss: 0.1026349663734436
epoch: 39 training_loss 0.07709206860512495 test_loss: 0.10036410093307495
epoch: 40 training_loss 0.08708908552303911 test_loss: 0.08039732575416565
epoch: 41 training_loss 0.07935648046433925 test_loss: 0.08134523034095764
epoch: 42 training_loss 0.09387565214186906 test_loss: 0.0786392867565155
epoch: 43 training_loss 0.0841935477219522 test_loss: 0.07959173917770386
epoch: 44 training_loss 0.0828823334351182 test_loss: 0.06629793047904968
epoch: 45 training_loss 0.08607799731194973 test_loss: 0.09245707392692566
epoch: 46 training_loss 0.08833602951839566 test_loss: 0.0773827612400055
epoch: 47 training_loss 0.08080278500914574 test_loss: 0.09761159420013428
epoch: 48 training_loss 0.08218162996694446 test_loss: 0.07211201190948487
epoch: 49 training_loss 0.07977995653636753 test_loss: 0.08124664425849915
epoch: 50 training_loss 0.08019834890961647 test_loss: 0.09524072408676147
epoch: 51 training_loss 0.08387895261868834 test_loss: 0.07073630094528198
epoch: 52 training_loss 0.0799876642972231 test_loss: 0.08201591968536377
epoch: 53 training_loss 0.08362763412296773 test_loss: 0.09306080341339111
epoch: 54 training_loss 0.07700791464187205 test_loss: 0.072419273853302
epoch: 55 training_loss 0.07822838746011257 test_loss: 0.07841872572898864
epoch: 56 training_loss 0.08948459688574076 test_loss: 0.08183264136314392
epoch: 57 training_loss 0.0827438460662961 test_loss: 0.07531018257141113
epoch: 58 training_loss 0.08084548976272345 test_loss: 0.08982482552528381
epoch: 59 training_loss 0.08553294172510505 test_loss: 0.09162421822547913
epoch: 60 training_loss 0.08400947041809559 test_loss: 0.0779799222946167
epoch: 61 training_loss 0.07420935115776955 test_loss: 0.09046655297279357
epoch: 62 training_loss 0.07822364484891295 test_loss: 0.08976284265518189
epoch: 63 training_loss 0.07753766812384129 test_loss: 0.08846479058265685
epoch: 64 training_loss 0.08308549031615257 test_loss: 0.07674777507781982
epoch: 65 training_loss 0.08124445330351591 test_loss: 0.07832096219062805
epoch: 66 training_loss 0.07942611625418067 test_loss: 0.077005535364151
epoch: 67 training_loss 0.08104420579969883 test_loss: 0.09143124222755432
epoch: 68 training_loss 0.07492122363299131 test_loss: 0.07455622553825378
epoch: 69 training_loss 0.08606282053515316 test_loss: 0.07579314112663268
epoch: 70 training_loss 0.08691025221720337 test_loss: 0.08013872504234314
epoch: 71 training_loss 0.07665180050767958 test_loss: 0.06403492093086242
epoch: 72 training_loss 0.08038246721029281 test_loss: 0.08719852566719055
epoch: 73 training_loss 0.07861545767635107 test_loss: 0.08306594491004944
epoch: 74 training_loss 0.07923494284972549 test_loss: 0.07147873044013978
epoch: 75 training_loss 0.0815513732470572 test_loss: 0.07710097432136535
epoch: 76 training_loss 0.08138259815983474 test_loss: 0.07861201763153076
epoch: 77 training_loss 0.07860095972195268 test_loss: 0.07417963147163391
epoch: 78 training_loss 0.08061943287961185 test_loss: 0.0710616648197174
epoch: 79 training_loss 0.08113068193197251 test_loss: 0.08995757102966309
epoch: 80 training_loss 0.086161632463336 test_loss: 0.07179288864135742
epoch: 81 training_loss 0.07758109092712402 test_loss: 0.07777752280235291
epoch: 82 training_loss 0.07768228115513921 test_loss: 0.09347164034843444
epoch: 83 training_loss 0.0832332752738148 test_loss: 0.09364647269248963
epoch: 84 training_loss 0.08081634029746056 test_loss: 0.0788402259349823
epoch: 85 training_loss 0.08094831285998225 test_loss: 0.07872636914253235
epoch: 86 training_loss 0.07853773668408394 test_loss: 0.06847641468048096
epoch: 87 training_loss 0.08085293862968683 test_loss: 0.0600580096244812
epoch: 88 training_loss 0.08140239800326526 test_loss: 0.07446660399436951
epoch: 89 training_loss 0.07865429396741092 test_loss: 0.08467739820480347
epoch: 90 training_loss 0.08006470953114331 test_loss: 0.07418265342712402
epoch: 91 training_loss 0.08335741506889463 test_loss: 0.08248995542526245
epoch: 92 training_loss 0.07992317225784064 test_loss: 0.0655170738697052
epoch: 93 training_loss 0.08234472960233688 test_loss: 0.0684907615184784
epoch: 94 training_loss 0.07932485776022076 test_loss: 0.07797912359237671
epoch: 95 training_loss 0.08599624924361705 test_loss: 0.07775337100028992
epoch: 96 training_loss 0.07826835526153446 test_loss: 0.0735575258731842
epoch: 97 training_loss 0.0856438715569675 test_loss: 0.08221859335899354
epoch: 98 training_loss 0.0715450587309897 test_loss: 0.07696975469589233
epoch: 99 training_loss 0.0786276344768703 test_loss: 0.08336286544799805
epoch: 100 training_loss 0.07908518506214023 test_loss: 0.07440686225891113
epoch: 101 training_loss 0.07988387844525277 test_loss: 0.07868232131004334
epoch: 102 training_loss 0.07800873443484306 test_loss: 0.07290719747543335
epoch: 103 training_loss 0.07823941862210632 test_loss: 0.08821463584899902
epoch: 104 training_loss 0.07872977437451482 test_loss: 0.07459532618522643
epoch: 105 training_loss 0.09016242500394583 test_loss: 0.08375876545906066
epoch: 106 training_loss 0.07375345917418599 test_loss: 0.07185344696044922
epoch: 107 training_loss 0.07695373417809606 test_loss: 0.08130832314491272
epoch: 108 training_loss 0.07296741351485253 test_loss: 0.07692666053771972
epoch: 109 training_loss 0.08411591058596969 test_loss: 0.07314186096191407
epoch: 110 training_loss 0.07580386344343423 test_loss: 0.08295536041259766
epoch: 111 training_loss 0.08093186555430293 test_loss: 0.08819994330406189
epoch: 112 training_loss 0.07809782275930047 test_loss: 0.08376826643943787
epoch: 113 training_loss 0.07645238852128386 test_loss: 0.08408393263816834
epoch: 114 training_loss 0.07491419091820717 test_loss: 0.07966328263282776
epoch: 115 training_loss 0.07691250615753234 test_loss: 0.07452993988990783
epoch: 116 training_loss 0.07993919560685754 test_loss: 0.06737294197082519
epoch: 117 training_loss 0.08193573972210288 test_loss: 0.06196932196617126
epoch: 118 training_loss 0.0799584867246449 test_loss: 0.07267196178436279
epoch: 119 training_loss 0.07923506101593375 test_loss: 0.0766750693321228
epoch: 120 training_loss 0.07992553430609405 test_loss: 0.06749558448791504
epoch: 121 training_loss 0.07727340328507125 test_loss: 0.08063812255859375
epoch: 122 training_loss 0.07874641839414835 test_loss: 0.0806792438030243
epoch: 123 training_loss 0.08156551610678435 test_loss: 0.06817670464515686
epoch: 124 training_loss 0.07631761074066162 test_loss: 0.07234907746315003
epoch: 125 training_loss 0.07823191396892071 test_loss: 0.08157243132591248
epoch: 126 training_loss 0.07263986527919769 test_loss: 0.08282929062843322
epoch: 127 training_loss 0.07825795549899339 test_loss: 0.07972033023834228
epoch: 128 training_loss 0.08183961747214198 test_loss: 0.07858103513717651
epoch: 129 training_loss 0.07324648877605795 test_loss: 0.08419337272644042
epoch: 130 training_loss 0.07794040584936739 test_loss: 0.0793357253074646
epoch: 131 training_loss 0.07693526593968272 test_loss: 0.08191661238670349
epoch: 132 training_loss 0.0785562128946185 test_loss: 0.07053658366203308
epoch: 133 training_loss 0.07844458051025867 test_loss: 0.08558903932571411
epoch: 134 training_loss 0.07191705614328385 test_loss: 0.07753309607505798
epoch: 135 training_loss 0.07849027650430798 test_loss: 0.09500114917755127
epoch: 136 training_loss 0.07861316320486367 test_loss: 0.07509340047836303
epoch: 137 training_loss 0.07886502837762237 test_loss: 0.07176716327667236
epoch: 138 training_loss 0.08085516167804599 test_loss: 0.07330011129379273
epoch: 139 training_loss 0.07841542957350611 test_loss: 0.08815178871154786
epoch: 140 training_loss 0.07790281557478011 test_loss: 0.08902769684791564
epoch: 141 training_loss 0.08063624029979109 test_loss: 0.0631631076335907
epoch: 142 training_loss 0.0808583329245448 test_loss: 0.07341831922531128
epoch: 143 training_loss 0.07993871860206127 test_loss: 0.07126237154006958
epoch: 144 training_loss 0.07383875161409378 test_loss: 0.07696990966796875
epoch: 145 training_loss 0.07374564703553915 test_loss: 0.07887369394302368
epoch: 146 training_loss 0.07334888579323888 test_loss: 0.06705191135406494
epoch: 147 training_loss 0.075801932066679 test_loss: 0.07278058528900147
epoch: 148 training_loss 0.07880444552749395 test_loss: 0.07705779075622558
epoch: 149 training_loss 0.08710606960579753 test_loss: 0.06558091640472412
epoch: 0 training_loss 38.64669166564941 test_loss: 20.926960754394532
epoch: 1 training_loss 17.17078630447388 test_loss: 14.694807434082032
epoch: 2 training_loss 12.66069972038269 test_loss: 11.913911437988281
epoch: 3 training_loss 10.671376752853394 test_loss: 10.121051025390624
epoch: 4 training_loss 9.513179101943969 test_loss: 8.97216339111328
epoch: 5 training_loss 8.488751649856567 test_loss: 8.060934448242188
epoch: 6 training_loss 7.858682374954224 test_loss: 7.349921417236328
epoch: 7 training_loss 7.479839262962341 test_loss: 7.046985626220703
epoch: 8 training_loss 7.008257174491883 test_loss: 6.541657257080078
epoch: 9 training_loss 6.681831607818603 test_loss: 6.911411285400391
epoch: 10 training_loss 6.463745670318604 test_loss: 6.1596527099609375
epoch: 11 training_loss 6.065030946731567 test_loss: 5.9815013885498045
epoch: 12 training_loss 5.9095262956619266 test_loss: 5.632760620117187
epoch: 13 training_loss 5.6688583111763 test_loss: 5.873859405517578
epoch: 14 training_loss 5.451222672462463 test_loss: 5.590275573730469
epoch: 15 training_loss 5.4028116226196286 test_loss: 5.258802795410157
epoch: 16 training_loss 5.195468373298645 test_loss: 5.128419876098633
epoch: 17 training_loss 4.981731419563293 test_loss: 5.17676887512207
epoch: 18 training_loss 4.9483539938926695 test_loss: 4.706642150878906
epoch: 19 training_loss 4.829706871509552 test_loss: 4.7949974060058596
epoch: 20 training_loss 4.697194957733155 test_loss: 4.79371223449707
epoch: 21 training_loss 4.611059460639954 test_loss: 4.568473052978516
epoch: 22 training_loss 4.4047705268859865 test_loss: 4.506819152832032
epoch: 23 training_loss 4.593278193473816 test_loss: 4.477302932739258
epoch: 24 training_loss 4.393709735870361 test_loss: 4.3383739471435545
epoch: 25 training_loss 4.299285049438477 test_loss: 4.352941513061523
epoch: 26 training_loss 4.121753849983215 test_loss: 3.9384578704833983
epoch: 27 training_loss 4.129186747074127 test_loss: 3.9814403533935545
epoch: 28 training_loss 4.0622045612335205 test_loss: 3.9853374481201174
epoch: 29 training_loss 4.021537351608276 test_loss: 4.0429035186767575
epoch: 30 training_loss 4.001025607585907 test_loss: 3.9785003662109375
epoch: 31 training_loss 3.885264596939087 test_loss: 3.943043899536133
epoch: 32 training_loss 3.905387041568756 test_loss: 3.9524169921875
epoch: 33 training_loss 3.8423833298683165 test_loss: 3.9418697357177734
epoch: 34 training_loss 3.82372615814209 test_loss: 4.0099845886230465
epoch: 35 training_loss 3.7667136359214783 test_loss: 3.871665191650391
epoch: 36 training_loss 3.634854757785797 test_loss: 3.7531578063964846
epoch: 37 training_loss 3.637797148227692 test_loss: 3.7929714202880858
epoch: 38 training_loss 3.6383065295219423 test_loss: 3.5797000885009767
epoch: 39 training_loss 3.5105870151519776 test_loss: 3.603894805908203
epoch: 40 training_loss 3.6475289607048036 test_loss: 3.7870105743408202
epoch: 41 training_loss 3.5769594502449036 test_loss: 3.671711730957031
epoch: 42 training_loss 3.549319941997528 test_loss: 3.385603332519531
epoch: 43 training_loss 3.4089426374435425 test_loss: 3.4234241485595702
epoch: 44 training_loss 3.5034579992294312 test_loss: 3.524149703979492
epoch: 45 training_loss 3.3997969675064086 test_loss: 3.521636962890625
epoch: 46 training_loss 3.490391666889191 test_loss: 3.440754699707031
epoch: 47 training_loss 3.3392853927612305 test_loss: 3.233956527709961
epoch: 48 training_loss 3.345864713191986 test_loss: 3.319134521484375
epoch: 49 training_loss 3.3733359599113464 test_loss: 3.3358989715576173
epoch: 50 training_loss 3.332149133682251 test_loss: 3.343830871582031
epoch: 51 training_loss 3.2455536913871765 test_loss: 3.3080387115478516
epoch: 52 training_loss 3.2105055713653563 test_loss: 3.0761796951293947
epoch: 53 training_loss 3.2076984310150145 test_loss: 3.2650478363037108
epoch: 54 training_loss 3.169242870807648 test_loss: 3.0988664627075195
epoch: 55 training_loss 3.1850209736824038 test_loss: 3.186076354980469
epoch: 56 training_loss 3.2938127756118774 test_loss: 3.218050003051758
epoch: 57 training_loss 3.1658559584617616 test_loss: 3.3069149017333985
epoch: 58 training_loss 3.160495038032532 test_loss: 3.292966842651367
epoch: 59 training_loss 3.0809330582618712 test_loss: 3.3158523559570314
epoch: 60 training_loss 3.1400840377807615 test_loss: 3.355364990234375
epoch: 61 training_loss 3.127781252861023 test_loss: 3.2123092651367187
epoch: 62 training_loss 3.157199146747589 test_loss: 3.1929487228393554
epoch: 63 training_loss 3.059248342514038 test_loss: 3.1278242111206054
epoch: 64 training_loss 3.1146593141555785 test_loss: 3.294676971435547
epoch: 65 training_loss 3.0758416748046873 test_loss: 3.134869384765625
epoch: 66 training_loss 3.061386561393738 test_loss: 3.0929019927978514
epoch: 67 training_loss 3.0565094256401064 test_loss: 3.024664878845215
epoch: 68 training_loss 3.029493169784546 test_loss: 2.918463706970215
epoch: 69 training_loss 3.069828255176544 test_loss: 2.949760627746582
epoch: 70 training_loss 2.974675760269165 test_loss: 3.048398017883301
epoch: 71 training_loss 2.946747753620148 test_loss: 2.9368152618408203
epoch: 72 training_loss 2.961445302963257 test_loss: 3.0367050170898438
epoch: 73 training_loss 2.95295156955719 test_loss: 3.2087451934814455
epoch: 74 training_loss 2.924606201648712 test_loss: 2.9225341796875
epoch: 75 training_loss 2.9639324712753297 test_loss: 2.9584346771240235
epoch: 76 training_loss 2.9019503021240234 test_loss: 2.7442445755004883
epoch: 77 training_loss 2.8808760905265807 test_loss: 2.9244817733764648
epoch: 78 training_loss 2.9073025155067445 test_loss: 2.7949228286743164
epoch: 79 training_loss 2.884340217113495 test_loss: 2.9284170150756834
epoch: 80 training_loss 2.8089896273612975 test_loss: 2.9650211334228516
epoch: 81 training_loss 2.869017126560211 test_loss: 3.1229108810424804
epoch: 82 training_loss 2.8250608003139495 test_loss: 2.9503631591796875
epoch: 83 training_loss 2.8030163478851318 test_loss: 2.975566291809082
epoch: 84 training_loss 2.8340720081329347 test_loss: 2.7258478164672852
epoch: 85 training_loss 2.7922447299957276 test_loss: 2.6726818084716797
epoch: 86 training_loss 2.9072409772872927 test_loss: 2.8114559173583986
epoch: 87 training_loss 2.8325159192085265 test_loss: 2.846473503112793
epoch: 88 training_loss 2.7412778615951536 test_loss: 3.029562568664551
epoch: 89 training_loss 2.7229870700836183 test_loss: 2.7463848114013674
epoch: 90 training_loss 2.7788856232166292 test_loss: 2.763844108581543
epoch: 91 training_loss 2.8340191316604613 test_loss: 2.855992317199707
epoch: 92 training_loss 2.817823045253754 test_loss: 2.8670392990112306
epoch: 93 training_loss 2.72050089597702 test_loss: 2.816667366027832
epoch: 94 training_loss 2.675359134674072 test_loss: 2.6626686096191405
epoch: 95 training_loss 2.6596632194519043 test_loss: 2.835688018798828
epoch: 96 training_loss 2.7260434663295747 test_loss: 2.6801525115966798
epoch: 97 training_loss 2.7312847495079042 test_loss: 2.727174758911133
epoch: 98 training_loss 2.6830882453918456 test_loss: 2.7103507995605467
epoch: 99 training_loss 2.691907293796539 test_loss: 2.7103082656860353
epoch: 100 training_loss 2.6707570719718934 test_loss: 2.6380487442016602
epoch: 101 training_loss 2.6950130963325503 test_loss: 2.704370880126953
epoch: 102 training_loss 2.6855885231494905 test_loss: 2.6504037857055662
epoch: 103 training_loss 2.621089668273926 test_loss: 2.74508056640625
epoch: 104 training_loss 2.6028564131259917 test_loss: 2.6843582153320313
epoch: 105 training_loss 2.64632395029068 test_loss: 2.675873947143555
epoch: 106 training_loss 2.5784759855270387 test_loss: 2.658808708190918
epoch: 107 training_loss 2.6928675246238707 test_loss: 2.7679775238037108
epoch: 108 training_loss 2.5983179891109467 test_loss: 2.6395254135131836
epoch: 109 training_loss 2.5767703688144685 test_loss: 2.5036455154418946
epoch: 110 training_loss 2.6180478703975676 test_loss: 2.5055503845214844
epoch: 111 training_loss 2.639234793186188 test_loss: 2.642903709411621
epoch: 112 training_loss 2.6047652339935303 test_loss: 2.617510986328125
epoch: 113 training_loss 2.555601712465286 test_loss: 2.53416748046875
epoch: 114 training_loss 2.518124362230301 test_loss: 2.7444265365600584
epoch: 115 training_loss 2.576787210702896 test_loss: 2.646470642089844
epoch: 116 training_loss 2.605209188461304 test_loss: 2.5813104629516603
epoch: 117 training_loss 2.644040374755859 test_loss: 2.6433317184448244
epoch: 118 training_loss 2.530240191221237 test_loss: 2.517518424987793
epoch: 119 training_loss 2.5087298691272735 test_loss: 2.6623998641967774
epoch: 120 training_loss 2.5568922638893126 test_loss: 2.448751449584961
epoch: 121 training_loss 2.55951780796051 test_loss: 2.6755298614501952
epoch: 122 training_loss 2.5575843954086306 test_loss: 2.395293045043945
epoch: 123 training_loss 2.583830913305283 test_loss: 2.4884464263916017
epoch: 124 training_loss 2.5408081817626953 test_loss: 2.4327890396118166
epoch: 125 training_loss 2.527657812833786 test_loss: 2.538017463684082
epoch: 126 training_loss 2.5944621896743776 test_loss: 2.640085983276367
epoch: 127 training_loss 2.510599921941757 test_loss: 2.6205322265625
epoch: 128 training_loss 2.482046377658844 test_loss: 2.5695425033569337
epoch: 129 training_loss 2.5059753251075745 test_loss: 2.5131479263305665
epoch: 130 training_loss 2.479442985057831 test_loss: 2.61108341217041
epoch: 131 training_loss 2.4411622107028963 test_loss: 2.36385440826416
epoch: 132 training_loss 2.5329579591751097 test_loss: 2.434141159057617
epoch: 133 training_loss 2.5226972568035126 test_loss: 2.3923097610473634
epoch: 134 training_loss 2.4004788255691527 test_loss: 2.5700201034545898
epoch: 135 training_loss 2.541772737503052 test_loss: 2.4833736419677734
epoch: 136 training_loss 2.5473910009860994 test_loss: 2.534307861328125
epoch: 137 training_loss 2.5350472354888915 test_loss: 2.4554426193237306
epoch: 138 training_loss 2.532214957475662 test_loss: 2.4851314544677736
epoch: 139 training_loss 2.4952284824848174 test_loss: 2.4718284606933594
epoch: 140 training_loss 2.435163823366165 test_loss: 2.432521629333496
epoch: 141 training_loss 2.507749993801117 test_loss: 2.323108100891113
epoch: 142 training_loss 2.4368313086032867 test_loss: 2.46893310546875
epoch: 143 training_loss 2.4424754512310027 test_loss: 2.3788387298583986
epoch: 144 training_loss 2.4278286480903626 test_loss: 2.4736587524414064
epoch: 145 training_loss 2.3602406191825867 test_loss: 2.4307201385498045
epoch: 146 training_loss 2.4265960574150087 test_loss: 2.3978702545166017
epoch: 147 training_loss 2.396338014602661 test_loss: 2.5205793380737305
epoch: 148 training_loss 2.4856182527542114 test_loss: 2.3552133560180666
epoch: 149 training_loss 2.433257911205292 test_loss: 2.504878807067871
3949.893512858569
episode: 0 training return: tensor(422.8246, device='cuda:0')
episode: 1 training return: tensor(322.4528, device='cuda:0')
episode: 2 training return: tensor(308.0037, device='cuda:0')
episode: 3 training return: tensor(211.7423, device='cuda:0')
epoch: 1 test_true_pfm: 3988.0173752489295 sim_pfm: 415.3552312185445
episode: 4 training return: tensor(370.4762, device='cuda:0')
episode: 5 training return: tensor(448.8824, device='cuda:0')
episode: 6 training return: tensor(328.5860, device='cuda:0')
episode: 7 training return: tensor(185.6766, device='cuda:0')
epoch: 2 test_true_pfm: 3904.0786318339688 sim_pfm: 319.32213748110615
episode: 8 training return: tensor(341.6178, device='cuda:0')
episode: 9 training return: tensor(234.2334, device='cuda:0')
episode: 10 training return: tensor(255.5777, device='cuda:0')
episode: 11 training return: tensor(218.9932, device='cuda:0')
epoch: 3 test_true_pfm: 3994.6140443143927 sim_pfm: 325.1733547525073
episode: 12 training return: tensor(306.0995, device='cuda:0')
episode: 13 training return: tensor(258.4446, device='cuda:0')
episode: 14 training return: tensor(427.2148, device='cuda:0')
episode: 15 training return: tensor(2.8225, device='cuda:0')
epoch: 4 test_true_pfm: 4041.1577774721513 sim_pfm: 363.7659520080003
episode: 16 training return: tensor(318.8359, device='cuda:0')
episode: 17 training return: tensor(450.1677, device='cuda:0')
episode: 18 training return: tensor(357.6041, device='cuda:0')
episode: 19 training return: tensor(322.1736, device='cuda:0')
epoch: 5 test_true_pfm: 3984.6830250058047 sim_pfm: 418.0751823548926
episode: 20 training return: tensor(346.6850, device='cuda:0')
episode: 21 training return: tensor(379.4558, device='cuda:0')
episode: 22 training return: tensor(453.9121, device='cuda:0')
episode: 23 training return: tensor(478.5627, device='cuda:0')
epoch: 6 test_true_pfm: 3986.792924365889 sim_pfm: 393.79461542524706
episode: 24 training return: tensor(-757.3796, device='cuda:0')
episode: 25 training return: tensor(299.4661, device='cuda:0')
episode: 26 training return: tensor(-628.8091, device='cuda:0')
episode: 27 training return: tensor(378.0178, device='cuda:0')
epoch: 7 test_true_pfm: 4066.861731193842 sim_pfm: 392.5182746044496
episode: 28 training return: tensor(226.9170, device='cuda:0')
episode: 29 training return: tensor(402.1945, device='cuda:0')
episode: 30 training return: tensor(250.4889, device='cuda:0')
episode: 31 training return: tensor(-745.8847, device='cuda:0')
epoch: 8 test_true_pfm: 4123.834599714489 sim_pfm: 509.0865291050674
episode: 32 training return: tensor(343.6701, device='cuda:0')
episode: 33 training return: tensor(429.0511, device='cuda:0')
episode: 34 training return: tensor(388.2169, device='cuda:0')
episode: 35 training return: tensor(356.1575, device='cuda:0')
epoch: 9 test_true_pfm: 4071.564588887171 sim_pfm: 401.19101810049807
episode: 36 training return: tensor(469.6604, device='cuda:0')
episode: 37 training return: tensor(430.2778, device='cuda:0')
episode: 38 training return: tensor(436.3042, device='cuda:0')
episode: 39 training return: tensor(408.4644, device='cuda:0')
epoch: 10 test_true_pfm: 3576.5512462059305 sim_pfm: 348.0905144481415
episode: 40 training return: tensor(243.3936, device='cuda:0')
episode: 41 training return: tensor(386.4659, device='cuda:0')
episode: 42 training return: tensor(480.0526, device='cuda:0')
episode: 43 training return: tensor(476.1135, device='cuda:0')
epoch: 11 test_true_pfm: 4103.099007875978 sim_pfm: 386.9259373959697
episode: 44 training return: tensor(414.3759, device='cuda:0')
episode: 45 training return: tensor(388.9801, device='cuda:0')
episode: 46 training return: tensor(393.3407, device='cuda:0')
episode: 47 training return: tensor(468.0976, device='cuda:0')
epoch: 12 test_true_pfm: 4114.143046559501 sim_pfm: 469.6377787748643
episode: 48 training return: tensor(103.8328, device='cuda:0')
episode: 49 training return: tensor(393.7933, device='cuda:0')
episode: 50 training return: tensor(310.9475, device='cuda:0')
episode: 51 training return: tensor(408.1094, device='cuda:0')
epoch: 13 test_true_pfm: 3979.429555817895 sim_pfm: 452.0171594081039
episode: 52 training return: tensor(442.5529, device='cuda:0')
episode: 53 training return: tensor(439.9690, device='cuda:0')
episode: 54 training return: tensor(365.0642, device='cuda:0')
episode: 55 training return: tensor(29.1714, device='cuda:0')
epoch: 14 test_true_pfm: 3883.9826131877835 sim_pfm: 378.2480593538494
episode: 56 training return: tensor(277.3127, device='cuda:0')
episode: 57 training return: tensor(345.3875, device='cuda:0')
episode: 58 training return: tensor(414.3007, device='cuda:0')
episode: 59 training return: tensor(507.3001, device='cuda:0')
epoch: 15 test_true_pfm: 4063.999657377659 sim_pfm: 450.372389001733
episode: 60 training return: tensor(-720.3442, device='cuda:0')
episode: 61 training return: tensor(441.6849, device='cuda:0')
episode: 62 training return: tensor(422.4115, device='cuda:0')
episode: 63 training return: tensor(336.5485, device='cuda:0')
epoch: 16 test_true_pfm: 4200.433118133843 sim_pfm: 349.45817936360254
episode: 64 training return: tensor(6.3784, device='cuda:0')
episode: 65 training return: tensor(356.7267, device='cuda:0')
episode: 66 training return: tensor(395.3182, device='cuda:0')
episode: 67 training return: tensor(-771.5294, device='cuda:0')
epoch: 17 test_true_pfm: 4072.406172643838 sim_pfm: 475.2999428405116
episode: 68 training return: tensor(401.6480, device='cuda:0')
episode: 69 training return: tensor(424.0955, device='cuda:0')
episode: 70 training return: tensor(488.8106, device='cuda:0')
episode: 71 training return: tensor(391.9548, device='cuda:0')
epoch: 18 test_true_pfm: 4048.3523529587424 sim_pfm: 463.5660333664079
episode: 72 training return: tensor(413.3634, device='cuda:0')
episode: 73 training return: tensor(474.0215, device='cuda:0')
episode: 74 training return: tensor(354.5650, device='cuda:0')
episode: 75 training return: tensor(433.3679, device='cuda:0')
epoch: 19 test_true_pfm: 4080.3577733743173 sim_pfm: 476.0493555305002
episode: 76 training return: tensor(352.9551, device='cuda:0')
episode: 77 training return: tensor(206.7829, device='cuda:0')
episode: 78 training return: tensor(373.2707, device='cuda:0')
episode: 79 training return: tensor(503.1629, device='cuda:0')
epoch: 20 test_true_pfm: 4111.613593969134 sim_pfm: 486.27337003150024
episode: 80 training return: tensor(539.3745, device='cuda:0')
episode: 81 training return: tensor(348.8606, device='cuda:0')
episode: 82 training return: tensor(449.4800, device='cuda:0')
episode: 83 training return: tensor(363.3578, device='cuda:0')
epoch: 21 test_true_pfm: 4002.6653436554825 sim_pfm: 385.67528106853325
episode: 84 training return: tensor(369.1974, device='cuda:0')
episode: 85 training return: tensor(405.5800, device='cuda:0')
episode: 86 training return: tensor(440.5352, device='cuda:0')
episode: 87 training return: tensor(433.6058, device='cuda:0')
epoch: 22 test_true_pfm: 4155.355286974069 sim_pfm: 483.3534458274371
episode: 88 training return: tensor(380.0187, device='cuda:0')
episode: 89 training return: tensor(392.4091, device='cuda:0')
episode: 90 training return: tensor(-400.3684, device='cuda:0')
episode: 91 training return: tensor(383.9786, device='cuda:0')
epoch: 23 test_true_pfm: 3730.518211093525 sim_pfm: 510.91250411424943
episode: 92 training return: tensor(508.0609, device='cuda:0')
episode: 93 training return: tensor(525.4803, device='cuda:0')
episode: 94 training return: tensor(458.5345, device='cuda:0')
episode: 95 training return: tensor(441.1178, device='cuda:0')
epoch: 24 test_true_pfm: 4131.500887120702 sim_pfm: 335.31065736705204
episode: 96 training return: tensor(364.9910, device='cuda:0')
episode: 97 training return: tensor(15.9908, device='cuda:0')
episode: 98 training return: tensor(459.4679, device='cuda:0')
episode: 99 training return: tensor(429.6436, device='cuda:0')
epoch: 25 test_true_pfm: 4099.472985103745 sim_pfm: 417.5824961176647
episode: 100 training return: tensor(406.0761, device='cuda:0')
episode: 101 training return: tensor(464.2916, device='cuda:0')
episode: 102 training return: tensor(164.0318, device='cuda:0')
episode: 103 training return: tensor(329.3144, device='cuda:0')
epoch: 26 test_true_pfm: 4070.6426743663214 sim_pfm: 403.4488031852331
episode: 104 training return: tensor(399.0627, device='cuda:0')
episode: 105 training return: tensor(409.4624, device='cuda:0')
episode: 106 training return: tensor(377.4019, device='cuda:0')
episode: 107 training return: tensor(395.3176, device='cuda:0')
epoch: 27 test_true_pfm: 4126.926668244748 sim_pfm: 527.7631434314729
episode: 108 training return: tensor(431.7923, device='cuda:0')
episode: 109 training return: tensor(424.3705, device='cuda:0')
episode: 110 training return: tensor(450.0613, device='cuda:0')
episode: 111 training return: tensor(496.1671, device='cuda:0')
epoch: 28 test_true_pfm: 4244.8910796468745 sim_pfm: 439.98471513646655
episode: 112 training return: tensor(405.9125, device='cuda:0')
episode: 113 training return: tensor(480.2775, device='cuda:0')
episode: 114 training return: tensor(409.0232, device='cuda:0')
episode: 115 training return: tensor(436.4339, device='cuda:0')
epoch: 29 test_true_pfm: 4021.2557160331667 sim_pfm: 413.4638600993591
episode: 116 training return: tensor(72.9512, device='cuda:0')
episode: 117 training return: tensor(469.0865, device='cuda:0')
episode: 118 training return: tensor(276.8522, device='cuda:0')
episode: 119 training return: tensor(498.7348, device='cuda:0')
epoch: 30 test_true_pfm: 3931.397256410985 sim_pfm: 401.81009462584433
episode: 120 training return: tensor(297.9987, device='cuda:0')
episode: 121 training return: tensor(430.5474, device='cuda:0')
episode: 122 training return: tensor(320.0951, device='cuda:0')
episode: 123 training return: tensor(333.1917, device='cuda:0')
epoch: 31 test_true_pfm: 4107.0368100200185 sim_pfm: 472.7748296239879
episode: 124 training return: tensor(424.2797, device='cuda:0')
episode: 125 training return: tensor(488.2623, device='cuda:0')
episode: 126 training return: tensor(489.4718, device='cuda:0')
episode: 127 training return: tensor(479.9014, device='cuda:0')
epoch: 32 test_true_pfm: 4104.418303195352 sim_pfm: 457.19976503262296
episode: 128 training return: tensor(491.6301, device='cuda:0')
episode: 129 training return: tensor(392.2644, device='cuda:0')
episode: 130 training return: tensor(346.8917, device='cuda:0')
episode: 131 training return: tensor(489.0775, device='cuda:0')
epoch: 33 test_true_pfm: 4151.980606255814 sim_pfm: 437.76969654107234
episode: 132 training return: tensor(324.5968, device='cuda:0')
episode: 133 training return: tensor(375.8241, device='cuda:0')
episode: 134 training return: tensor(315.5349, device='cuda:0')
episode: 135 training return: tensor(452.9890, device='cuda:0')
epoch: 34 test_true_pfm: 4124.385442961619 sim_pfm: 419.05927359727986
episode: 136 training return: tensor(425.0471, device='cuda:0')
episode: 137 training return: tensor(489.0385, device='cuda:0')
episode: 138 training return: tensor(430.8424, device='cuda:0')
episode: 139 training return: tensor(435.6742, device='cuda:0')
epoch: 35 test_true_pfm: 4037.244245806772 sim_pfm: 414.1351376666377
episode: 140 training return: tensor(426.1719, device='cuda:0')
episode: 141 training return: tensor(508.0036, device='cuda:0')
episode: 142 training return: tensor(470.5966, device='cuda:0')
episode: 143 training return: tensor(481.3512, device='cuda:0')
epoch: 36 test_true_pfm: 4029.560445112426 sim_pfm: 448.9620150375219
episode: 144 training return: tensor(447.7498, device='cuda:0')
episode: 145 training return: tensor(448.0865, device='cuda:0')
episode: 146 training return: tensor(412.7970, device='cuda:0')
episode: 147 training return: tensor(436.5097, device='cuda:0')
epoch: 37 test_true_pfm: 4062.41009994515 sim_pfm: 518.6871838446532
episode: 148 training return: tensor(526.9310, device='cuda:0')
episode: 149 training return: tensor(480.7612, device='cuda:0')
episode: 150 training return: tensor(344.3230, device='cuda:0')
episode: 151 training return: tensor(406.2992, device='cuda:0')
epoch: 38 test_true_pfm: 4132.16680260368 sim_pfm: 470.4762110258259
episode: 152 training return: tensor(384.8427, device='cuda:0')
episode: 153 training return: tensor(393.1879, device='cuda:0')
episode: 154 training return: tensor(383.4376, device='cuda:0')
episode: 155 training return: tensor(434.2071, device='cuda:0')
epoch: 39 test_true_pfm: 4137.873740059557 sim_pfm: 450.8800586652651
episode: 156 training return: tensor(368.7392, device='cuda:0')
episode: 157 training return: tensor(373.9209, device='cuda:0')
episode: 158 training return: tensor(365.0832, device='cuda:0')
episode: 159 training return: tensor(466.5972, device='cuda:0')
epoch: 40 test_true_pfm: 4198.802210303026 sim_pfm: 519.540170945072
episode: 160 training return: tensor(434.4719, device='cuda:0')
episode: 161 training return: tensor(228.5504, device='cuda:0')
episode: 162 training return: tensor(250.6235, device='cuda:0')
episode: 163 training return: tensor(208.3425, device='cuda:0')
epoch: 41 test_true_pfm: 4193.975655719375 sim_pfm: 436.72649881072965
episode: 164 training return: tensor(409.1364, device='cuda:0')
episode: 165 training return: tensor(445.9120, device='cuda:0')
episode: 166 training return: tensor(462.6520, device='cuda:0')
episode: 167 training return: tensor(362.6914, device='cuda:0')
epoch: 42 test_true_pfm: 4093.908765238963 sim_pfm: 478.64842792235623
episode: 168 training return: tensor(128.2408, device='cuda:0')
episode: 169 training return: tensor(300.2255, device='cuda:0')
episode: 170 training return: tensor(458.8365, device='cuda:0')
episode: 171 training return: tensor(513.2206, device='cuda:0')
epoch: 43 test_true_pfm: 4123.164159270555 sim_pfm: 522.6329510452924
episode: 172 training return: tensor(339.4294, device='cuda:0')
episode: 173 training return: tensor(429.3530, device='cuda:0')
episode: 174 training return: tensor(403.5289, device='cuda:0')
episode: 175 training return: tensor(299.6521, device='cuda:0')
epoch: 44 test_true_pfm: 4142.7092761157965 sim_pfm: 388.85269944682176
episode: 176 training return: tensor(487.7243, device='cuda:0')
episode: 177 training return: tensor(422.8517, device='cuda:0')
episode: 178 training return: tensor(464.8982, device='cuda:0')
episode: 179 training return: tensor(422.5196, device='cuda:0')
epoch: 45 test_true_pfm: 4074.057801169792 sim_pfm: 474.67172556787654
episode: 180 training return: tensor(466.6297, device='cuda:0')
episode: 181 training return: tensor(448.8097, device='cuda:0')
episode: 182 training return: tensor(470.4936, device='cuda:0')
episode: 183 training return: tensor(375.7215, device='cuda:0')
epoch: 46 test_true_pfm: 4125.327394879236 sim_pfm: 426.3270969592656
episode: 184 training return: tensor(405.8418, device='cuda:0')
episode: 185 training return: tensor(453.4872, device='cuda:0')
episode: 186 training return: tensor(202.0994, device='cuda:0')
episode: 187 training return: tensor(393.2103, device='cuda:0')
epoch: 47 test_true_pfm: 4168.615708884542 sim_pfm: 505.12182086353033
episode: 188 training return: tensor(477.8951, device='cuda:0')
episode: 189 training return: tensor(454.5916, device='cuda:0')
episode: 190 training return: tensor(509.6698, device='cuda:0')
episode: 191 training return: tensor(349.2091, device='cuda:0')
epoch: 48 test_true_pfm: 4167.664489701371 sim_pfm: 473.1263489893948
episode: 192 training return: tensor(507.5744, device='cuda:0')
episode: 193 training return: tensor(492.3946, device='cuda:0')
episode: 194 training return: tensor(465.9641, device='cuda:0')
episode: 195 training return: tensor(416.0970, device='cuda:0')
epoch: 49 test_true_pfm: 4094.5408936968943 sim_pfm: 451.11127414335107
episode: 196 training return: tensor(442.8901, device='cuda:0')
episode: 197 training return: tensor(310.8873, device='cuda:0')
episode: 198 training return: tensor(470.3400, device='cuda:0')
episode: 199 training return: tensor(481.8624, device='cuda:0')
epoch: 50 test_true_pfm: 3960.1222928451025 sim_pfm: 390.5910365927072
episode: 200 training return: tensor(421.8153, device='cuda:0')
episode: 201 training return: tensor(472.9656, device='cuda:0')
episode: 202 training return: tensor(342.4016, device='cuda:0')
episode: 203 training return: tensor(402.1007, device='cuda:0')
epoch: 51 test_true_pfm: 4178.647031589667 sim_pfm: 500.79718507184106
episode: 204 training return: tensor(479.7411, device='cuda:0')
episode: 205 training return: tensor(436.9945, device='cuda:0')
episode: 206 training return: tensor(366.8768, device='cuda:0')
episode: 207 training return: tensor(515.9165, device='cuda:0')
epoch: 52 test_true_pfm: 4010.680551818323 sim_pfm: 454.35034309729235
episode: 208 training return: tensor(525.9381, device='cuda:0')
episode: 209 training return: tensor(435.4449, device='cuda:0')
episode: 210 training return: tensor(443.2341, device='cuda:0')
episode: 211 training return: tensor(399.4449, device='cuda:0')
epoch: 53 test_true_pfm: 4107.62574487055 sim_pfm: 522.7945201045756
episode: 212 training return: tensor(359.5296, device='cuda:0')
episode: 213 training return: tensor(449.6118, device='cuda:0')
episode: 214 training return: tensor(424.1680, device='cuda:0')
episode: 215 training return: tensor(493.0965, device='cuda:0')
epoch: 54 test_true_pfm: 4047.450690621108 sim_pfm: 416.2442000172353
episode: 216 training return: tensor(421.3586, device='cuda:0')
episode: 217 training return: tensor(460.2779, device='cuda:0')
episode: 218 training return: tensor(466.0664, device='cuda:0')
episode: 219 training return: tensor(494.2179, device='cuda:0')
epoch: 55 test_true_pfm: 4006.829648429592 sim_pfm: 434.70233680546517
episode: 220 training return: tensor(478.8005, device='cuda:0')
episode: 221 training return: tensor(330.7241, device='cuda:0')
episode: 222 training return: tensor(482.8758, device='cuda:0')
episode: 223 training return: tensor(437.1394, device='cuda:0')
epoch: 56 test_true_pfm: 4104.560465280448 sim_pfm: 463.0425729043006
episode: 224 training return: tensor(395.5563, device='cuda:0')
episode: 225 training return: tensor(486.1235, device='cuda:0')
episode: 226 training return: tensor(497.9999, device='cuda:0')
episode: 227 training return: tensor(441.2643, device='cuda:0')
epoch: 57 test_true_pfm: 4182.762194665462 sim_pfm: 523.3837538471076
episode: 228 training return: tensor(495.1623, device='cuda:0')
episode: 229 training return: tensor(440.1954, device='cuda:0')
episode: 230 training return: tensor(495.2738, device='cuda:0')
episode: 231 training return: tensor(493.6121, device='cuda:0')
epoch: 58 test_true_pfm: 4141.7033672606585 sim_pfm: 482.4035273484963
episode: 232 training return: tensor(463.4633, device='cuda:0')
episode: 233 training return: tensor(402.2716, device='cuda:0')
episode: 234 training return: tensor(481.0699, device='cuda:0')
episode: 235 training return: tensor(497.1321, device='cuda:0')
epoch: 59 test_true_pfm: 4104.774661436561 sim_pfm: 456.1145476967795
episode: 236 training return: tensor(336.9636, device='cuda:0')
episode: 237 training return: tensor(233.2257, device='cuda:0')
episode: 238 training return: tensor(460.1075, device='cuda:0')
episode: 239 training return: tensor(426.8379, device='cuda:0')
epoch: 60 test_true_pfm: 4175.140695023821 sim_pfm: 479.82605925772805
episode: 240 training return: tensor(508.6396, device='cuda:0')
episode: 241 training return: tensor(207.3919, device='cuda:0')
episode: 242 training return: tensor(456.1669, device='cuda:0')
episode: 243 training return: tensor(393.0392, device='cuda:0')
epoch: 61 test_true_pfm: 4055.646131903501 sim_pfm: 444.9853237264324
episode: 244 training return: tensor(484.2135, device='cuda:0')
episode: 245 training return: tensor(225.0937, device='cuda:0')
episode: 246 training return: tensor(464.3356, device='cuda:0')
episode: 247 training return: tensor(445.5281, device='cuda:0')
epoch: 62 test_true_pfm: 4130.608972970525 sim_pfm: 473.5040786936297
episode: 248 training return: tensor(377.1417, device='cuda:0')
episode: 249 training return: tensor(414.3061, device='cuda:0')
episode: 250 training return: tensor(502.1617, device='cuda:0')
episode: 251 training return: tensor(351.8604, device='cuda:0')
epoch: 63 test_true_pfm: 4172.571551989476 sim_pfm: 453.7701874310442
episode: 252 training return: tensor(426.2226, device='cuda:0')
episode: 253 training return: tensor(146.5698, device='cuda:0')
episode: 254 training return: tensor(426.0345, device='cuda:0')
episode: 255 training return: tensor(437.0831, device='cuda:0')
epoch: 64 test_true_pfm: 4139.867012322556 sim_pfm: 481.00338070688304
episode: 256 training return: tensor(442.9733, device='cuda:0')
episode: 257 training return: tensor(420.2136, device='cuda:0')
episode: 258 training return: tensor(452.9993, device='cuda:0')
episode: 259 training return: tensor(516.1055, device='cuda:0')
epoch: 65 test_true_pfm: 4142.097124243896 sim_pfm: 484.43818918659235
episode: 260 training return: tensor(472.5973, device='cuda:0')
episode: 261 training return: tensor(497.6357, device='cuda:0')
episode: 262 training return: tensor(440.4796, device='cuda:0')
episode: 263 training return: tensor(499.4934, device='cuda:0')
epoch: 66 test_true_pfm: 4113.225660502024 sim_pfm: 460.21540293235256
episode: 264 training return: tensor(438.3904, device='cuda:0')
episode: 265 training return: tensor(444.0346, device='cuda:0')
episode: 266 training return: tensor(514.7579, device='cuda:0')
episode: 267 training return: tensor(427.4102, device='cuda:0')
epoch: 67 test_true_pfm: 4172.320374047896 sim_pfm: 499.60249914834276
episode: 268 training return: tensor(498.9333, device='cuda:0')
episode: 269 training return: tensor(491.8012, device='cuda:0')
episode: 270 training return: tensor(410.1227, device='cuda:0')
episode: 271 training return: tensor(416.7420, device='cuda:0')
epoch: 68 test_true_pfm: 4176.7417124817875 sim_pfm: 452.8433621287307
episode: 272 training return: tensor(440.4066, device='cuda:0')
episode: 273 training return: tensor(397.2186, device='cuda:0')
episode: 274 training return: tensor(433.9761, device='cuda:0')
episode: 275 training return: tensor(468.1194, device='cuda:0')
epoch: 69 test_true_pfm: 4077.8425966654936 sim_pfm: 433.3222445114807
episode: 276 training return: tensor(458.7628, device='cuda:0')
episode: 277 training return: tensor(396.5576, device='cuda:0')
episode: 278 training return: tensor(360.7177, device='cuda:0')
episode: 279 training return: tensor(521.2638, device='cuda:0')
epoch: 70 test_true_pfm: 4151.229554454637 sim_pfm: 504.85958483927726
episode: 280 training return: tensor(500.0783, device='cuda:0')
episode: 281 training return: tensor(476.7745, device='cuda:0')
episode: 282 training return: tensor(474.4877, device='cuda:0')
episode: 283 training return: tensor(334.2496, device='cuda:0')
epoch: 71 test_true_pfm: 4162.821454628153 sim_pfm: 510.068050728684
episode: 284 training return: tensor(446.8308, device='cuda:0')
episode: 285 training return: tensor(465.5052, device='cuda:0')
episode: 286 training return: tensor(427.2838, device='cuda:0')
episode: 287 training return: tensor(472.0334, device='cuda:0')
epoch: 72 test_true_pfm: 4133.914959212416 sim_pfm: 457.3116767291892
episode: 288 training return: tensor(456.8069, device='cuda:0')
episode: 289 training return: tensor(359.0858, device='cuda:0')
episode: 290 training return: tensor(485.1727, device='cuda:0')
episode: 291 training return: tensor(485.1364, device='cuda:0')
epoch: 73 test_true_pfm: 4162.14677484211 sim_pfm: 508.2308811485612
episode: 292 training return: tensor(388.7207, device='cuda:0')
episode: 293 training return: tensor(461.7300, device='cuda:0')
episode: 294 training return: tensor(373.4638, device='cuda:0')
episode: 295 training return: tensor(480.7500, device='cuda:0')
epoch: 74 test_true_pfm: 4143.732613762544 sim_pfm: 517.0617152703247
episode: 296 training return: tensor(441.7466, device='cuda:0')
episode: 297 training return: tensor(504.8047, device='cuda:0')
episode: 298 training return: tensor(376.8871, device='cuda:0')
episode: 299 training return: tensor(465.3538, device='cuda:0')
epoch: 75 test_true_pfm: 4177.689528554988 sim_pfm: 505.5189235806077
episode: 300 training return: tensor(245.4564, device='cuda:0')
episode: 301 training return: tensor(388.6015, device='cuda:0')
episode: 302 training return: tensor(477.0414, device='cuda:0')
episode: 303 training return: tensor(435.1272, device='cuda:0')
epoch: 76 test_true_pfm: 4076.7437061739433 sim_pfm: 414.10751657988294
episode: 304 training return: tensor(479.9001, device='cuda:0')
episode: 305 training return: tensor(451.2614, device='cuda:0')
episode: 306 training return: tensor(411.2628, device='cuda:0')
episode: 307 training return: tensor(462.1965, device='cuda:0')
epoch: 77 test_true_pfm: 4102.992461746318 sim_pfm: 476.3120039656933
episode: 308 training return: tensor(354.4616, device='cuda:0')
episode: 309 training return: tensor(416.8491, device='cuda:0')
episode: 310 training return: tensor(370.5391, device='cuda:0')
episode: 311 training return: tensor(476.6205, device='cuda:0')
epoch: 78 test_true_pfm: 4132.655755935048 sim_pfm: 483.118128849436
episode: 312 training return: tensor(379.1204, device='cuda:0')
episode: 313 training return: tensor(504.2130, device='cuda:0')
episode: 314 training return: tensor(505.0419, device='cuda:0')
episode: 315 training return: tensor(512.2357, device='cuda:0')
epoch: 79 test_true_pfm: 4139.138358291391 sim_pfm: 509.26112573501695
episode: 316 training return: tensor(462.6161, device='cuda:0')
episode: 317 training return: tensor(401.4043, device='cuda:0')
episode: 318 training return: tensor(419.7458, device='cuda:0')
episode: 319 training return: tensor(503.5207, device='cuda:0')
epoch: 80 test_true_pfm: 4063.6342307003583 sim_pfm: 515.6770377941624
episode: 320 training return: tensor(539.0657, device='cuda:0')
episode: 321 training return: tensor(524.8290, device='cuda:0')
episode: 322 training return: tensor(480.3221, device='cuda:0')
episode: 323 training return: tensor(365.2768, device='cuda:0')
epoch: 81 test_true_pfm: 4208.343761738364 sim_pfm: 457.86817801079206
episode: 324 training return: tensor(397.0537, device='cuda:0')
episode: 325 training return: tensor(446.7221, device='cuda:0')
episode: 326 training return: tensor(489.1932, device='cuda:0')
episode: 327 training return: tensor(510.0114, device='cuda:0')
epoch: 82 test_true_pfm: 4088.9757859813485 sim_pfm: 501.20414354181656
episode: 328 training return: tensor(458.2379, device='cuda:0')
episode: 329 training return: tensor(503.8430, device='cuda:0')
episode: 330 training return: tensor(399.0456, device='cuda:0')
episode: 331 training return: tensor(501.2500, device='cuda:0')
epoch: 83 test_true_pfm: 4119.005713740763 sim_pfm: 486.2197040167327
episode: 332 training return: tensor(460.9591, device='cuda:0')
episode: 333 training return: tensor(487.9512, device='cuda:0')
episode: 334 training return: tensor(-541.9194, device='cuda:0')
episode: 335 training return: tensor(435.4450, device='cuda:0')
epoch: 84 test_true_pfm: 4167.743287677552 sim_pfm: 445.18901680619456
episode: 336 training return: tensor(415.9861, device='cuda:0')
episode: 337 training return: tensor(514.9009, device='cuda:0')
episode: 338 training return: tensor(469.6202, device='cuda:0')
episode: 339 training return: tensor(505.6417, device='cuda:0')
epoch: 85 test_true_pfm: 4166.240753567683 sim_pfm: 494.72986810697086
episode: 340 training return: tensor(358.9919, device='cuda:0')
episode: 341 training return: tensor(343.1009, device='cuda:0')
episode: 342 training return: tensor(470.6658, device='cuda:0')
episode: 343 training return: tensor(474.7450, device='cuda:0')
epoch: 86 test_true_pfm: 4135.0269381752805 sim_pfm: 486.27131097238936
episode: 344 training return: tensor(517.7534, device='cuda:0')
episode: 345 training return: tensor(373.1867, device='cuda:0')
episode: 346 training return: tensor(429.4818, device='cuda:0')
episode: 347 training return: tensor(400.1874, device='cuda:0')
epoch: 87 test_true_pfm: 4088.2813198417157 sim_pfm: 469.2500563792807
episode: 348 training return: tensor(523.0938, device='cuda:0')
episode: 349 training return: tensor(510.8323, device='cuda:0')
episode: 350 training return: tensor(369.1397, device='cuda:0')
episode: 351 training return: tensor(467.9075, device='cuda:0')
epoch: 88 test_true_pfm: 4212.126027507836 sim_pfm: 486.0151228509688
episode: 352 training return: tensor(352.3640, device='cuda:0')
episode: 353 training return: tensor(488.0483, device='cuda:0')
episode: 354 training return: tensor(454.1447, device='cuda:0')
episode: 355 training return: tensor(422.4131, device='cuda:0')
epoch: 89 test_true_pfm: 4079.831966379279 sim_pfm: 481.7045799151626
episode: 356 training return: tensor(425.1475, device='cuda:0')
episode: 357 training return: tensor(398.8728, device='cuda:0')
episode: 358 training return: tensor(454.9635, device='cuda:0')
episode: 359 training return: tensor(494.3123, device='cuda:0')
epoch: 90 test_true_pfm: 4079.4059494259723 sim_pfm: 501.4811768013363
episode: 360 training return: tensor(446.5361, device='cuda:0')
episode: 361 training return: tensor(511.5583, device='cuda:0')
episode: 362 training return: tensor(447.8025, device='cuda:0')
episode: 363 training return: tensor(422.4134, device='cuda:0')
epoch: 91 test_true_pfm: 4102.997591473591 sim_pfm: 516.4530680625467
episode: 364 training return: tensor(365.1172, device='cuda:0')
episode: 365 training return: tensor(447.8526, device='cuda:0')
episode: 366 training return: tensor(358.5044, device='cuda:0')
episode: 367 training return: tensor(402.5153, device='cuda:0')
epoch: 92 test_true_pfm: 4139.096981719856 sim_pfm: 473.59964275569655
episode: 368 training return: tensor(523.7800, device='cuda:0')
episode: 369 training return: tensor(381.8343, device='cuda:0')
episode: 370 training return: tensor(491.4104, device='cuda:0')
episode: 371 training return: tensor(503.9085, device='cuda:0')
epoch: 93 test_true_pfm: 4245.537982058328 sim_pfm: 509.9639517373871
episode: 372 training return: tensor(480.7855, device='cuda:0')
episode: 373 training return: tensor(532.5432, device='cuda:0')
episode: 374 training return: tensor(441.6592, device='cuda:0')
episode: 375 training return: tensor(402.7523, device='cuda:0')
epoch: 94 test_true_pfm: 4061.9571114687037 sim_pfm: 463.35344096895034
episode: 376 training return: tensor(433.6251, device='cuda:0')
episode: 377 training return: tensor(415.5076, device='cuda:0')
episode: 378 training return: tensor(394.3234, device='cuda:0')
episode: 379 training return: tensor(417.0493, device='cuda:0')
epoch: 95 test_true_pfm: 4176.888526077516 sim_pfm: 501.5800330771987
episode: 380 training return: tensor(480.3385, device='cuda:0')
episode: 381 training return: tensor(499.6593, device='cuda:0')
episode: 382 training return: tensor(427.1621, device='cuda:0')
episode: 383 training return: tensor(478.4846, device='cuda:0')
epoch: 96 test_true_pfm: 4148.656087889235 sim_pfm: 545.0044798915042
episode: 384 training return: tensor(370.5543, device='cuda:0')
episode: 385 training return: tensor(481.3016, device='cuda:0')
episode: 386 training return: tensor(501.1072, device='cuda:0')
episode: 387 training return: tensor(437.2560, device='cuda:0')
epoch: 97 test_true_pfm: 4147.549984141599 sim_pfm: 437.8928986740066
episode: 388 training return: tensor(475.7989, device='cuda:0')
episode: 389 training return: tensor(456.5081, device='cuda:0')
episode: 390 training return: tensor(427.2416, device='cuda:0')
episode: 391 training return: tensor(382.1171, device='cuda:0')
epoch: 98 test_true_pfm: 4170.331537880314 sim_pfm: 504.5214183013886
episode: 392 training return: tensor(-480.9439, device='cuda:0')
episode: 393 training return: tensor(516.0583, device='cuda:0')
episode: 394 training return: tensor(487.4928, device='cuda:0')
episode: 395 training return: tensor(362.8136, device='cuda:0')
epoch: 99 test_true_pfm: 4205.489776478659 sim_pfm: 503.4004686886522
episode: 396 training return: tensor(494.0357, device='cuda:0')
episode: 397 training return: tensor(432.0780, device='cuda:0')
episode: 398 training return: tensor(492.4027, device='cuda:0')
episode: 399 training return: tensor(486.5060, device='cuda:0')
epoch: 100 test_true_pfm: 4144.722705046696 sim_pfm: 502.00152227604605
episode: 400 training return: tensor(446.8580, device='cuda:0')
episode: 401 training return: tensor(346.6938, device='cuda:0')
episode: 402 training return: tensor(439.3781, device='cuda:0')
episode: 403 training return: tensor(505.9021, device='cuda:0')
epoch: 101 test_true_pfm: 4137.625554104502 sim_pfm: 499.1306107894052
episode: 404 training return: tensor(326.8430, device='cuda:0')
episode: 405 training return: tensor(478.9589, device='cuda:0')
episode: 406 training return: tensor(508.1481, device='cuda:0')
episode: 407 training return: tensor(478.9560, device='cuda:0')
epoch: 102 test_true_pfm: 4134.720799672231 sim_pfm: 488.1485476504701
episode: 408 training return: tensor(514.4163, device='cuda:0')
episode: 409 training return: tensor(505.0850, device='cuda:0')
episode: 410 training return: tensor(466.9276, device='cuda:0')
episode: 411 training return: tensor(387.1159, device='cuda:0')
epoch: 103 test_true_pfm: 4204.143244914353 sim_pfm: 495.0541288756067
episode: 412 training return: tensor(430.1718, device='cuda:0')
episode: 413 training return: tensor(535.4071, device='cuda:0')
episode: 414 training return: tensor(450.9890, device='cuda:0')
episode: 415 training return: tensor(-619.4725, device='cuda:0')
epoch: 104 test_true_pfm: 4109.7248109535985 sim_pfm: 477.8858229876302
episode: 416 training return: tensor(454.4126, device='cuda:0')
episode: 417 training return: tensor(-686.5098, device='cuda:0')
episode: 418 training return: tensor(484.1425, device='cuda:0')
episode: 419 training return: tensor(532.5262, device='cuda:0')
epoch: 105 test_true_pfm: 4147.1849100748395 sim_pfm: 470.8057472338939
episode: 420 training return: tensor(502.9886, device='cuda:0')
episode: 421 training return: tensor(542.0080, device='cuda:0')
episode: 422 training return: tensor(390.5629, device='cuda:0')
episode: 423 training return: tensor(472.2415, device='cuda:0')
epoch: 106 test_true_pfm: 4155.997973071079 sim_pfm: 503.86224510264583
episode: 424 training return: tensor(459.0020, device='cuda:0')
episode: 425 training return: tensor(-651.5892, device='cuda:0')
episode: 426 training return: tensor(552.9563, device='cuda:0')
episode: 427 training return: tensor(489.8598, device='cuda:0')
epoch: 107 test_true_pfm: 4148.743408595831 sim_pfm: 456.33926356304437
episode: 428 training return: tensor(472.7042, device='cuda:0')
episode: 429 training return: tensor(515.2345, device='cuda:0')
episode: 430 training return: tensor(385.0193, device='cuda:0')
episode: 431 training return: tensor(-532.9848, device='cuda:0')
epoch: 108 test_true_pfm: 4183.05714975948 sim_pfm: 490.6475478145294
episode: 432 training return: tensor(408.0476, device='cuda:0')
episode: 433 training return: tensor(418.6295, device='cuda:0')
episode: 434 training return: tensor(424.2703, device='cuda:0')
episode: 435 training return: tensor(507.5127, device='cuda:0')
epoch: 109 test_true_pfm: 4185.519575711564 sim_pfm: 515.0315202856824
episode: 436 training return: tensor(470.3329, device='cuda:0')
episode: 437 training return: tensor(513.7761, device='cuda:0')
episode: 438 training return: tensor(458.5904, device='cuda:0')
episode: 439 training return: tensor(454.0563, device='cuda:0')
epoch: 110 test_true_pfm: 4114.81313744561 sim_pfm: 499.6324363980675
episode: 440 training return: tensor(469.7236, device='cuda:0')
episode: 441 training return: tensor(387.7074, device='cuda:0')
episode: 442 training return: tensor(490.3307, device='cuda:0')
episode: 443 training return: tensor(514.8390, device='cuda:0')
epoch: 111 test_true_pfm: 4185.910072252721 sim_pfm: 507.5488327759085
episode: 444 training return: tensor(486.9156, device='cuda:0')
episode: 445 training return: tensor(513.4484, device='cuda:0')
episode: 446 training return: tensor(532.2980, device='cuda:0')
episode: 447 training return: tensor(498.6159, device='cuda:0')
epoch: 112 test_true_pfm: 4178.81473699477 sim_pfm: 455.30363995500375
episode: 448 training return: tensor(487.6668, device='cuda:0')
episode: 449 training return: tensor(509.9675, device='cuda:0')
episode: 450 training return: tensor(442.7292, device='cuda:0')
episode: 451 training return: tensor(504.8381, device='cuda:0')
epoch: 113 test_true_pfm: 4154.449002439044 sim_pfm: 508.7478555399769
episode: 452 training return: tensor(429.8683, device='cuda:0')
episode: 453 training return: tensor(387.7752, device='cuda:0')
episode: 454 training return: tensor(502.5031, device='cuda:0')
episode: 455 training return: tensor(502.0098, device='cuda:0')
epoch: 114 test_true_pfm: 4131.3150376732 sim_pfm: 511.4957354869305
episode: 456 training return: tensor(469.5490, device='cuda:0')
episode: 457 training return: tensor(480.3609, device='cuda:0')
episode: 458 training return: tensor(453.3441, device='cuda:0')
episode: 459 training return: tensor(480.7074, device='cuda:0')
epoch: 115 test_true_pfm: 4108.951682260429 sim_pfm: 459.5413006434683
episode: 460 training return: tensor(463.7917, device='cuda:0')
episode: 461 training return: tensor(447.4789, device='cuda:0')
episode: 462 training return: tensor(413.7726, device='cuda:0')
episode: 463 training return: tensor(482.7853, device='cuda:0')
epoch: 116 test_true_pfm: 4150.396957199702 sim_pfm: 511.97057051898446
episode: 464 training return: tensor(435.8471, device='cuda:0')
episode: 465 training return: tensor(535.0220, device='cuda:0')
episode: 466 training return: tensor(468.1562, device='cuda:0')
episode: 467 training return: tensor(435.4099, device='cuda:0')
epoch: 117 test_true_pfm: 4083.746351599537 sim_pfm: 492.2645962795553
episode: 468 training return: tensor(506.5286, device='cuda:0')
episode: 469 training return: tensor(499.0826, device='cuda:0')
episode: 470 training return: tensor(487.9842, device='cuda:0')
episode: 471 training return: tensor(423.3921, device='cuda:0')
epoch: 118 test_true_pfm: 4053.870044089322 sim_pfm: 469.3313199258312
episode: 472 training return: tensor(497.6507, device='cuda:0')
episode: 473 training return: tensor(471.1264, device='cuda:0')
episode: 474 training return: tensor(483.0836, device='cuda:0')
episode: 475 training return: tensor(482.6608, device='cuda:0')
epoch: 119 test_true_pfm: 4077.233003717412 sim_pfm: 485.9542749148289
episode: 476 training return: tensor(438.0286, device='cuda:0')
episode: 477 training return: tensor(516.8865, device='cuda:0')
episode: 478 training return: tensor(465.8926, device='cuda:0')
episode: 479 training return: tensor(461.0553, device='cuda:0')
epoch: 120 test_true_pfm: 4153.330785130948 sim_pfm: 498.00562364817597
episode: 480 training return: tensor(479.2589, device='cuda:0')
episode: 481 training return: tensor(499.0316, device='cuda:0')
episode: 482 training return: tensor(507.4335, device='cuda:0')
episode: 483 training return: tensor(407.6563, device='cuda:0')
epoch: 121 test_true_pfm: 4048.6870654748914 sim_pfm: 500.0945034193185
episode: 484 training return: tensor(503.3629, device='cuda:0')
episode: 485 training return: tensor(535.7427, device='cuda:0')
episode: 486 training return: tensor(411.3092, device='cuda:0')
episode: 487 training return: tensor(499.1023, device='cuda:0')
epoch: 122 test_true_pfm: 3997.5393931397284 sim_pfm: 465.18237867222825
episode: 488 training return: tensor(523.8514, device='cuda:0')
episode: 489 training return: tensor(479.2549, device='cuda:0')
episode: 490 training return: tensor(430.2180, device='cuda:0')
episode: 491 training return: tensor(498.3407, device='cuda:0')
epoch: 123 test_true_pfm: 4077.7886508116003 sim_pfm: 509.42403360502794
episode: 492 training return: tensor(481.6378, device='cuda:0')
episode: 493 training return: tensor(463.8549, device='cuda:0')
episode: 494 training return: tensor(-752.5492, device='cuda:0')
episode: 495 training return: tensor(362.0844, device='cuda:0')
epoch: 124 test_true_pfm: 4036.0999163749943 sim_pfm: 427.1530333958023
episode: 496 training return: tensor(394.4071, device='cuda:0')
episode: 497 training return: tensor(443.2861, device='cuda:0')
episode: 498 training return: tensor(349.5052, device='cuda:0')
episode: 499 training return: tensor(418.9048, device='cuda:0')
epoch: 125 test_true_pfm: 4181.8549532082225 sim_pfm: 509.69291716538527
episode: 500 training return: tensor(514.5761, device='cuda:0')
episode: 501 training return: tensor(509.0338, device='cuda:0')
episode: 502 training return: tensor(363.9757, device='cuda:0')
episode: 503 training return: tensor(405.2679, device='cuda:0')
epoch: 126 test_true_pfm: 4202.182302483078 sim_pfm: 515.5516974544347
episode: 504 training return: tensor(498.5968, device='cuda:0')
episode: 505 training return: tensor(569.4083, device='cuda:0')
episode: 506 training return: tensor(375.0518, device='cuda:0')
episode: 507 training return: tensor(470.6708, device='cuda:0')
epoch: 127 test_true_pfm: 4082.4276229715365 sim_pfm: 475.76383715256816
episode: 508 training return: tensor(416.6732, device='cuda:0')
episode: 509 training return: tensor(500.1776, device='cuda:0')
episode: 510 training return: tensor(383.6974, device='cuda:0')
episode: 511 training return: tensor(500.9606, device='cuda:0')
epoch: 128 test_true_pfm: 4141.618600792481 sim_pfm: 514.1496592224576
episode: 512 training return: tensor(408.6825, device='cuda:0')
episode: 513 training return: tensor(430.8778, device='cuda:0')
episode: 514 training return: tensor(466.5159, device='cuda:0')
episode: 515 training return: tensor(464.6381, device='cuda:0')
epoch: 129 test_true_pfm: 4098.422558319006 sim_pfm: 493.09791720685706
episode: 516 training return: tensor(-717.8751, device='cuda:0')
episode: 517 training return: tensor(492.9012, device='cuda:0')
episode: 518 training return: tensor(506.5285, device='cuda:0')
episode: 519 training return: tensor(444.0722, device='cuda:0')
epoch: 130 test_true_pfm: 4154.654889448634 sim_pfm: 475.3292389965306
episode: 520 training return: tensor(394.6972, device='cuda:0')
episode: 521 training return: tensor(492.4821, device='cuda:0')
episode: 522 training return: tensor(502.8485, device='cuda:0')
episode: 523 training return: tensor(441.4916, device='cuda:0')
epoch: 131 test_true_pfm: 4175.185780614963 sim_pfm: 470.2985426412973
episode: 524 training return: tensor(479.8596, device='cuda:0')
episode: 525 training return: tensor(440.2010, device='cuda:0')
episode: 526 training return: tensor(348.2535, device='cuda:0')
episode: 527 training return: tensor(502.2784, device='cuda:0')
epoch: 132 test_true_pfm: 4100.303631455703 sim_pfm: 524.5463904211938
episode: 528 training return: tensor(438.3134, device='cuda:0')
episode: 529 training return: tensor(494.6956, device='cuda:0')
episode: 530 training return: tensor(403.9799, device='cuda:0')
episode: 531 training return: tensor(436.3950, device='cuda:0')
epoch: 133 test_true_pfm: 4127.351663204878 sim_pfm: 488.7443145584742
episode: 532 training return: tensor(403.9602, device='cuda:0')
episode: 533 training return: tensor(471.5561, device='cuda:0')
episode: 534 training return: tensor(484.1936, device='cuda:0')
episode: 535 training return: tensor(499.0391, device='cuda:0')
epoch: 134 test_true_pfm: 4123.739861817853 sim_pfm: 511.7295155572647
episode: 536 training return: tensor(410.6789, device='cuda:0')
episode: 537 training return: tensor(485.5333, device='cuda:0')
episode: 538 training return: tensor(489.7690, device='cuda:0')
episode: 539 training return: tensor(512.9771, device='cuda:0')
epoch: 135 test_true_pfm: 4157.811695086829 sim_pfm: 494.79469539733446
episode: 540 training return: tensor(458.2664, device='cuda:0')
episode: 541 training return: tensor(494.9066, device='cuda:0')
episode: 542 training return: tensor(404.2136, device='cuda:0')
episode: 543 training return: tensor(453.6880, device='cuda:0')
epoch: 136 test_true_pfm: 4123.278181512497 sim_pfm: 475.77887933095917
episode: 544 training return: tensor(498.3593, device='cuda:0')
episode: 545 training return: tensor(443.3763, device='cuda:0')
episode: 546 training return: tensor(464.5711, device='cuda:0')
episode: 547 training return: tensor(499.0492, device='cuda:0')
epoch: 137 test_true_pfm: 4100.214098937602 sim_pfm: 484.44963098160224
episode: 548 training return: tensor(505.4516, device='cuda:0')
episode: 549 training return: tensor(383.0946, device='cuda:0')
episode: 550 training return: tensor(533.5364, device='cuda:0')
episode: 551 training return: tensor(490.0910, device='cuda:0')
epoch: 138 test_true_pfm: 4167.568455419297 sim_pfm: 481.6778867797305
episode: 552 training return: tensor(505.1437, device='cuda:0')
episode: 553 training return: tensor(478.4469, device='cuda:0')
episode: 554 training return: tensor(537.7479, device='cuda:0')
episode: 555 training return: tensor(489.6499, device='cuda:0')
epoch: 139 test_true_pfm: 4083.057717056679 sim_pfm: 502.2890838516566
episode: 556 training return: tensor(463.8250, device='cuda:0')
episode: 557 training return: tensor(537.3254, device='cuda:0')
episode: 558 training return: tensor(457.8629, device='cuda:0')
episode: 559 training return: tensor(513.1568, device='cuda:0')
epoch: 140 test_true_pfm: 4129.507811174805 sim_pfm: 477.82394352076034
episode: 560 training return: tensor(496.1885, device='cuda:0')
episode: 561 training return: tensor(533.8418, device='cuda:0')
episode: 562 training return: tensor(393.7221, device='cuda:0')
episode: 563 training return: tensor(430.0914, device='cuda:0')
epoch: 141 test_true_pfm: 4101.775183111968 sim_pfm: 444.9616747358232
episode: 564 training return: tensor(491.9858, device='cuda:0')
episode: 565 training return: tensor(501.9391, device='cuda:0')
episode: 566 training return: tensor(468.8230, device='cuda:0')
episode: 567 training return: tensor(365.4044, device='cuda:0')
epoch: 142 test_true_pfm: 4179.756239677823 sim_pfm: 509.48367458214244
episode: 568 training return: tensor(474.8503, device='cuda:0')
episode: 569 training return: tensor(470.2306, device='cuda:0')
episode: 570 training return: tensor(402.9414, device='cuda:0')
episode: 571 training return: tensor(475.4168, device='cuda:0')
epoch: 143 test_true_pfm: 4167.231907277619 sim_pfm: 506.0391228660398
episode: 572 training return: tensor(322.3792, device='cuda:0')
episode: 573 training return: tensor(457.6408, device='cuda:0')
episode: 574 training return: tensor(480.2182, device='cuda:0')
episode: 575 training return: tensor(504.6755, device='cuda:0')
epoch: 144 test_true_pfm: 4189.50322747985 sim_pfm: 497.31288129498716
episode: 576 training return: tensor(441.2149, device='cuda:0')
episode: 577 training return: tensor(504.9322, device='cuda:0')
episode: 578 training return: tensor(507.4881, device='cuda:0')
episode: 579 training return: tensor(477.7491, device='cuda:0')
epoch: 145 test_true_pfm: 4141.162590943125 sim_pfm: 512.5266918221799
episode: 580 training return: tensor(425.5616, device='cuda:0')
episode: 581 training return: tensor(396.2746, device='cuda:0')
episode: 582 training return: tensor(504.7188, device='cuda:0')
episode: 583 training return: tensor(497.0237, device='cuda:0')
epoch: 146 test_true_pfm: 4123.335454883538 sim_pfm: 490.48910745297326
episode: 584 training return: tensor(487.8729, device='cuda:0')
episode: 585 training return: tensor(430.4798, device='cuda:0')
episode: 586 training return: tensor(356.5219, device='cuda:0')
episode: 587 training return: tensor(401.8137, device='cuda:0')
epoch: 147 test_true_pfm: 4202.933431746723 sim_pfm: 472.0903596598655
episode: 588 training return: tensor(496.5452, device='cuda:0')
episode: 589 training return: tensor(340.0086, device='cuda:0')
episode: 590 training return: tensor(509.7138, device='cuda:0')
episode: 591 training return: tensor(535.7288, device='cuda:0')
epoch: 148 test_true_pfm: 4181.086344938271 sim_pfm: 515.9250128463997
episode: 592 training return: tensor(510.2822, device='cuda:0')
episode: 593 training return: tensor(490.1987, device='cuda:0')
episode: 594 training return: tensor(457.1719, device='cuda:0')
episode: 595 training return: tensor(514.2934, device='cuda:0')
epoch: 149 test_true_pfm: 4096.707211588742 sim_pfm: 480.182600546376
episode: 596 training return: tensor(455.9983, device='cuda:0')
episode: 597 training return: tensor(510.3077, device='cuda:0')
episode: 598 training return: tensor(472.3188, device='cuda:0')
episode: 599 training return: tensor(494.4756, device='cuda:0')
epoch: 150 test_true_pfm: 4171.847444050864 sim_pfm: 505.73152202233905
