['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '2', '--data', '30000', '--sub']
epoch: 0 training_loss 0.2701348726451397 test_loss: 0.20442707538604737
epoch: 1 training_loss 0.16549460783600808 test_loss: 0.16135942935943604
epoch: 2 training_loss 0.14260799199342727 test_loss: 0.14256782531738282
epoch: 3 training_loss 0.12709329195320607 test_loss: 0.13811975717544556
epoch: 4 training_loss 0.12212291691452265 test_loss: 0.11531374454498292
epoch: 5 training_loss 0.11767462536692619 test_loss: 0.11024971008300781
epoch: 6 training_loss 0.11265928447246551 test_loss: 0.11639230251312256
epoch: 7 training_loss 0.10618920469656587 test_loss: 0.10865609645843506
epoch: 8 training_loss 0.11256204683333636 test_loss: 0.1094694972038269
epoch: 9 training_loss 0.10406498689204455 test_loss: 0.10499628782272338
epoch: 10 training_loss 0.10358886875212192 test_loss: 0.11594765186309815
epoch: 11 training_loss 0.10882072459906339 test_loss: 0.12113467454910279
epoch: 12 training_loss 0.10136909797787666 test_loss: 0.1103635549545288
epoch: 13 training_loss 0.09776406593620778 test_loss: 0.11600222587585449
epoch: 14 training_loss 0.10478808287531137 test_loss: 0.08538605570793152
epoch: 15 training_loss 0.09924264341592788 test_loss: 0.11188457012176514
epoch: 16 training_loss 0.10537732316181064 test_loss: 0.10287015438079834
epoch: 17 training_loss 0.09755049116909503 test_loss: 0.10201839208602906
epoch: 18 training_loss 0.0925635777413845 test_loss: 0.11121689081192017
epoch: 19 training_loss 0.09469330824911594 test_loss: 0.1016957402229309
epoch: 20 training_loss 0.09861798813566565 test_loss: 0.09742735624313355
epoch: 21 training_loss 0.09219482889398932 test_loss: 0.10552858114242554
epoch: 22 training_loss 0.09333180962130427 test_loss: 0.1249970555305481
epoch: 23 training_loss 0.08845672963187098 test_loss: 0.11012367010116578
epoch: 24 training_loss 0.09330378072336316 test_loss: 0.10907522439956666
epoch: 25 training_loss 0.09673283340409398 test_loss: 0.10610870122909546
epoch: 26 training_loss 0.10390296317636967 test_loss: 0.11363425254821777
epoch: 27 training_loss 0.094311495218426 test_loss: 0.11007860898971558
epoch: 28 training_loss 0.0989182896539569 test_loss: 0.11235861778259278
epoch: 29 training_loss 0.09460568897426129 test_loss: 0.10695430040359497
epoch: 30 training_loss 0.09020790291950107 test_loss: 0.08580314517021179
epoch: 31 training_loss 0.09672832407057286 test_loss: 0.09958969354629517
epoch: 32 training_loss 0.09451947052031756 test_loss: 0.10445588827133179
epoch: 33 training_loss 0.0993448532745242 test_loss: 0.1015059471130371
epoch: 34 training_loss 0.08276349430903793 test_loss: 0.10105725526809692
epoch: 35 training_loss 0.09183550342917442 test_loss: 0.10292824506759643
epoch: 36 training_loss 0.09436648834496736 test_loss: 0.10532474517822266
epoch: 37 training_loss 0.09125079085119069 test_loss: 0.09954582452774048
epoch: 38 training_loss 0.08538518336601555 test_loss: 0.10501638650894166
epoch: 39 training_loss 0.09204302696511149 test_loss: 0.11657326221466065
epoch: 40 training_loss 0.08794446314685046 test_loss: 0.10017539262771606
epoch: 41 training_loss 0.09114136977121234 test_loss: 0.11336383819580079
epoch: 42 training_loss 0.09381570756435394 test_loss: 0.09897004365921021
epoch: 43 training_loss 0.09476010913029313 test_loss: 0.08906093239784241
epoch: 44 training_loss 0.09188244184479118 test_loss: 0.10949177742004394
epoch: 45 training_loss 0.09418029710650444 test_loss: 0.09496954679489136
epoch: 46 training_loss 0.08479209786280989 test_loss: 0.11420347690582275
epoch: 47 training_loss 0.09265640425495804 test_loss: 0.10810283422470093
epoch: 48 training_loss 0.08928297374397516 test_loss: 0.10695111751556396
epoch: 49 training_loss 0.09274522067978978 test_loss: 0.09690181016921998
epoch: 50 training_loss 0.09114904759451747 test_loss: 0.10242699384689331
epoch: 51 training_loss 0.08856816694140435 test_loss: 0.10144788026809692
epoch: 52 training_loss 0.09091260138899088 test_loss: 0.10934680700302124
epoch: 53 training_loss 0.08861862817779183 test_loss: 0.10730479955673218
epoch: 54 training_loss 0.09129260754212737 test_loss: 0.11015106439590454
epoch: 55 training_loss 0.08480023419484496 test_loss: 0.0955050528049469
epoch: 56 training_loss 0.08611846417188644 test_loss: 0.09543440341949463
epoch: 57 training_loss 0.09222747268155217 test_loss: 0.09652001857757568
epoch: 58 training_loss 0.08932534371502697 test_loss: 0.09836395382881165
epoch: 59 training_loss 0.08739287005737424 test_loss: 0.10952081680297851
epoch: 60 training_loss 0.09076322875916958 test_loss: 0.09541135430335998
epoch: 61 training_loss 0.09017323343083262 test_loss: 0.09945171475410461
epoch: 62 training_loss 0.08811896391212941 test_loss: 0.0950950562953949
epoch: 63 training_loss 0.09066417692229152 test_loss: 0.11524058580398559
epoch: 64 training_loss 0.08224971184507013 test_loss: 0.10941805839538574
epoch: 65 training_loss 0.09177181895822287 test_loss: 0.10030487775802613
epoch: 66 training_loss 0.09014024147763848 test_loss: 0.10026851892471314
epoch: 67 training_loss 0.08920058254152537 test_loss: 0.11851663589477539
epoch: 68 training_loss 0.0946129272133112 test_loss: 0.09492141604423524
epoch: 69 training_loss 0.08126577012240886 test_loss: 0.09299709796905517
epoch: 70 training_loss 0.08995251204818487 test_loss: 0.09847930073738098
epoch: 71 training_loss 0.08664610985666514 test_loss: 0.11647897958755493
epoch: 72 training_loss 0.0853094551898539 test_loss: 0.11468816995620727
epoch: 73 training_loss 0.08445910781621933 test_loss: 0.10700958967208862
epoch: 74 training_loss 0.08338471617549657 test_loss: 0.09773838520050049
epoch: 75 training_loss 0.09072685468941927 test_loss: 0.10077940225601197
epoch: 76 training_loss 0.08096324346959591 test_loss: 0.10535776615142822
epoch: 77 training_loss 0.08659212298691273 test_loss: 0.09313691258430482
epoch: 78 training_loss 0.08356088032945991 test_loss: 0.08676143288612366
epoch: 79 training_loss 0.08236054074950516 test_loss: 0.08565164804458618
epoch: 80 training_loss 0.0811311543546617 test_loss: 0.10086530447006226
epoch: 81 training_loss 0.08253474207594991 test_loss: 0.10109838247299194
epoch: 82 training_loss 0.09342980172485113 test_loss: 0.11736717224121093
epoch: 83 training_loss 0.08711630322039127 test_loss: 0.09912036657333374
epoch: 84 training_loss 0.07827764596790075 test_loss: 0.08917357325553894
epoch: 85 training_loss 0.08750088818371296 test_loss: 0.10363224744796753
epoch: 86 training_loss 0.0845005436707288 test_loss: 0.09574137330055237
epoch: 87 training_loss 0.08734372835606337 test_loss: 0.09995378255844116
epoch: 88 training_loss 0.08074642315506936 test_loss: 0.09255970120429993
epoch: 89 training_loss 0.08390427604317666 test_loss: 0.11185486316680908
epoch: 90 training_loss 0.08595063101500272 test_loss: 0.09290788769721985
epoch: 91 training_loss 0.08689777977764607 test_loss: 0.09848756790161133
epoch: 92 training_loss 0.08483295368030667 test_loss: 0.10972261428833008
epoch: 93 training_loss 0.09194420574232937 test_loss: 0.10611886978149414
epoch: 94 training_loss 0.08535163527354599 test_loss: 0.08939293026924133
epoch: 95 training_loss 0.08687469266355037 test_loss: 0.09196186065673828
epoch: 96 training_loss 0.08563256196677685 test_loss: 0.09356684684753418
epoch: 97 training_loss 0.08432548990473152 test_loss: 0.09764373898506165
epoch: 98 training_loss 0.08959831794723869 test_loss: 0.08953096866607665
epoch: 99 training_loss 0.084797307793051 test_loss: 0.10455679893493652
epoch: 100 training_loss 0.08221589475870132 test_loss: 0.08808810710906982
epoch: 101 training_loss 0.08266767455264926 test_loss: 0.09396625757217407
epoch: 102 training_loss 0.08904121113941073 test_loss: 0.09347926974296569
epoch: 103 training_loss 0.08345584034919738 test_loss: 0.09666218161582947
epoch: 104 training_loss 0.08117946438491344 test_loss: 0.09607345461845399
epoch: 105 training_loss 0.08878482570871711 test_loss: 0.09181075096130371
epoch: 106 training_loss 0.08300883736461401 test_loss: 0.09234214425086976
epoch: 107 training_loss 0.08135113909840584 test_loss: 0.09572703838348388
epoch: 108 training_loss 0.08382396863773466 test_loss: 0.08380631804466247
epoch: 109 training_loss 0.08110698512755335 test_loss: 0.09660164713859558
epoch: 110 training_loss 0.08116297313943505 test_loss: 0.09944416880607605
epoch: 111 training_loss 0.08703192621469498 test_loss: 0.0850247323513031
epoch: 112 training_loss 0.08503927394747735 test_loss: 0.09961063861846924
epoch: 113 training_loss 0.08330422796308995 test_loss: 0.0932668149471283
epoch: 114 training_loss 0.08294547760859132 test_loss: 0.09004649519920349
epoch: 115 training_loss 0.08918487494811415 test_loss: 0.09381327629089356
epoch: 116 training_loss 0.08405336892232299 test_loss: 0.08780032992362977
epoch: 117 training_loss 0.08297171997837723 test_loss: 0.08992692232131957
epoch: 118 training_loss 0.07956794294528663 test_loss: 0.10641515254974365
epoch: 119 training_loss 0.08020072270184755 test_loss: 0.09448793530464172
epoch: 120 training_loss 0.07876521511003375 test_loss: 0.09473491311073304
epoch: 121 training_loss 0.0792011241428554 test_loss: 0.09624618887901307
epoch: 122 training_loss 0.08423029083758593 test_loss: 0.08897131085395812
epoch: 123 training_loss 0.08947367208078504 test_loss: 0.10280208587646485
epoch: 124 training_loss 0.08004634166136385 test_loss: 0.10372258424758911
epoch: 125 training_loss 0.08452321289107204 test_loss: 0.10073137283325195
epoch: 126 training_loss 0.08397879535332323 test_loss: 0.09997923970222473
epoch: 127 training_loss 0.08202015252783895 test_loss: 0.1017532467842102
epoch: 128 training_loss 0.08449908930808306 test_loss: 0.08972209095954894
epoch: 129 training_loss 0.07626382544636727 test_loss: 0.09384687542915345
epoch: 130 training_loss 0.0805136682279408 test_loss: 0.09032167792320252
epoch: 131 training_loss 0.08360262081027031 test_loss: 0.10466681718826294
epoch: 132 training_loss 0.0801779398880899 test_loss: 0.10967371463775635
epoch: 133 training_loss 0.07982477493584156 test_loss: 0.08885241746902466
epoch: 134 training_loss 0.08191449688747525 test_loss: 0.09887393712997436
epoch: 135 training_loss 0.079010402970016 test_loss: 0.10529986619949341
epoch: 136 training_loss 0.08143139344640077 test_loss: 0.1037407636642456
epoch: 137 training_loss 0.07976642157882452 test_loss: 0.09129287600517273
epoch: 138 training_loss 0.08876080272719264 test_loss: 0.10064473152160644
epoch: 139 training_loss 0.07950359885580838 test_loss: 0.09430900812149048
epoch: 140 training_loss 0.0789362283796072 test_loss: 0.09441620111465454
epoch: 141 training_loss 0.07705452867783606 test_loss: 0.09442706108093261
epoch: 142 training_loss 0.08216737166047096 test_loss: 0.10185099840164184
epoch: 143 training_loss 0.0796880871616304 test_loss: 0.09362314343452453
epoch: 144 training_loss 0.07867506610229612 test_loss: 0.10516777038574218
epoch: 145 training_loss 0.07956627110019326 test_loss: 0.09046130180358887
epoch: 146 training_loss 0.08272442944347859 test_loss: 0.10337082147598267
epoch: 147 training_loss 0.08059384308755398 test_loss: 0.091851145029068
epoch: 148 training_loss 0.07951444555073976 test_loss: 0.10466687679290772
epoch: 149 training_loss 0.08186525681987405 test_loss: 0.1068912386894226
epoch: 0 training_loss 38.76538459777832 test_loss: 19.72822265625
epoch: 1 training_loss 16.1793826007843 test_loss: 13.821153259277343
epoch: 2 training_loss 12.143022441864014 test_loss: 11.32052230834961
epoch: 3 training_loss 10.413922953605653 test_loss: 9.734330749511718
epoch: 4 training_loss 8.86976369857788 test_loss: 8.711585235595702
epoch: 5 training_loss 8.152877941131592 test_loss: 7.712176513671875
epoch: 6 training_loss 7.448692450523376 test_loss: 7.291490173339843
epoch: 7 training_loss 6.973315000534058 test_loss: 6.574089050292969
epoch: 8 training_loss 6.677109398841858 test_loss: 6.388105773925782
epoch: 9 training_loss 6.10981653213501 test_loss: 6.4103141784667965
epoch: 10 training_loss 6.054166193008423 test_loss: 5.844857406616211
epoch: 11 training_loss 5.7012480449676515 test_loss: 5.5860130310058596
epoch: 12 training_loss 5.480550227165222 test_loss: 5.495233154296875
epoch: 13 training_loss 5.240061402320862 test_loss: 5.669436645507813
epoch: 14 training_loss 5.0348884391784665 test_loss: 5.142744445800782
epoch: 15 training_loss 4.983910384178162 test_loss: 4.796847534179688
epoch: 16 training_loss 4.768498702049255 test_loss: 4.8607330322265625
epoch: 17 training_loss 4.671140277385712 test_loss: 4.5783332824707035
epoch: 18 training_loss 4.572503688335419 test_loss: 4.459352493286133
epoch: 19 training_loss 4.5329580497741695 test_loss: 4.555805587768555
epoch: 20 training_loss 4.294555354118347 test_loss: 4.2816814422607425
epoch: 21 training_loss 4.226117279529571 test_loss: 4.073233413696289
epoch: 22 training_loss 4.213840343952179 test_loss: 4.216183853149414
epoch: 23 training_loss 4.1174255871772765 test_loss: 3.9778827667236327
epoch: 24 training_loss 3.904928524494171 test_loss: 3.937738800048828
epoch: 25 training_loss 3.954159965515137 test_loss: 3.9999553680419924
epoch: 26 training_loss 3.806202566623688 test_loss: 3.860643768310547
epoch: 27 training_loss 3.756451132297516 test_loss: 3.748044967651367
epoch: 28 training_loss 3.7490370416641237 test_loss: 3.7219791412353516
epoch: 29 training_loss 3.68905335187912 test_loss: 3.657798004150391
epoch: 30 training_loss 3.5968634510040283 test_loss: 3.7003509521484377
epoch: 31 training_loss 3.5620011782646177 test_loss: 3.7085491180419923
epoch: 32 training_loss 3.4691836977005006 test_loss: 3.5956005096435546
epoch: 33 training_loss 3.4392800450325014 test_loss: 3.371019744873047
epoch: 34 training_loss 3.465940625667572 test_loss: 3.2039634704589846
epoch: 35 training_loss 3.3272063159942626 test_loss: 3.480910873413086
epoch: 36 training_loss 3.407481458187103 test_loss: 3.5143333435058595
epoch: 37 training_loss 3.2851799845695497 test_loss: 3.2838401794433594
epoch: 38 training_loss 3.228027539253235 test_loss: 3.3015415191650392
epoch: 39 training_loss 3.1995050430297853 test_loss: 3.2116493225097655
epoch: 40 training_loss 3.213554546833038 test_loss: 3.2535755157470705
epoch: 41 training_loss 3.131041798591614 test_loss: 3.030419921875
epoch: 42 training_loss 3.1585742735862734 test_loss: 3.0230955123901366
epoch: 43 training_loss 3.0863729190826414 test_loss: 3.121462821960449
epoch: 44 training_loss 3.0980902314186096 test_loss: 3.069346809387207
epoch: 45 training_loss 3.120103166103363 test_loss: 3.2082054138183596
epoch: 46 training_loss 3.059219992160797 test_loss: 3.0517587661743164
epoch: 47 training_loss 3.0322320747375486 test_loss: 2.967472267150879
epoch: 48 training_loss 3.031049039363861 test_loss: 3.205046844482422
epoch: 49 training_loss 2.9242850708961488 test_loss: 2.8864648818969725
epoch: 50 training_loss 2.999345006942749 test_loss: 3.0208362579345702
epoch: 51 training_loss 2.8963560175895693 test_loss: 2.844422149658203
epoch: 52 training_loss 2.8341105508804323 test_loss: 2.9713157653808593
epoch: 53 training_loss 2.902365810871124 test_loss: 2.880283546447754
epoch: 54 training_loss 2.8589545583724973 test_loss: 2.8819252014160157
epoch: 55 training_loss 2.8219829297065733 test_loss: 2.965401840209961
epoch: 56 training_loss 2.8167438650131227 test_loss: 2.9143560409545897
epoch: 57 training_loss 2.8207980227470397 test_loss: 2.796174430847168
epoch: 58 training_loss 2.7590609622001647 test_loss: 2.7703453063964845
epoch: 59 training_loss 2.7774122738838196 test_loss: 2.732512283325195
epoch: 60 training_loss 2.733390488624573 test_loss: 2.7284172058105467
epoch: 61 training_loss 2.7613818764686586 test_loss: 2.7412298202514647
epoch: 62 training_loss 2.682652025222778 test_loss: 2.73400993347168
epoch: 63 training_loss 2.695838295221329 test_loss: 2.720328521728516
epoch: 64 training_loss 2.6040081119537355 test_loss: 2.7352136611938476
epoch: 65 training_loss 2.677241005897522 test_loss: 2.6454734802246094
epoch: 66 training_loss 2.7335306549072267 test_loss: 2.7875364303588865
epoch: 67 training_loss 2.639290418624878 test_loss: 2.6788854598999023
epoch: 68 training_loss 2.638456791639328 test_loss: 2.5686065673828127
epoch: 69 training_loss 2.622343327999115 test_loss: 2.642390251159668
epoch: 70 training_loss 2.6630888891220095 test_loss: 2.642172431945801
epoch: 71 training_loss 2.5529759347438814 test_loss: 2.7501815795898437
epoch: 72 training_loss 2.5564589858055116 test_loss: 2.6013755798339844
epoch: 73 training_loss 2.573649277687073 test_loss: 2.452344512939453
epoch: 74 training_loss 2.5748315942287445 test_loss: 2.639605712890625
epoch: 75 training_loss 2.5589074766635895 test_loss: 2.602787971496582
epoch: 76 training_loss 2.553639979362488 test_loss: 2.580514335632324
epoch: 77 training_loss 2.5380675745010377 test_loss: 2.5363815307617186
epoch: 78 training_loss 2.5275253319740294 test_loss: 2.5426284790039064
epoch: 79 training_loss 2.4810036182403565 test_loss: 2.5628677368164063
epoch: 80 training_loss 2.4970692372322083 test_loss: 2.5028839111328125
epoch: 81 training_loss 2.553976306915283 test_loss: 2.588310432434082
epoch: 82 training_loss 2.50040700674057 test_loss: 2.44576358795166
epoch: 83 training_loss 2.5337053716182707 test_loss: 2.4310985565185548
epoch: 84 training_loss 2.480334584712982 test_loss: 2.564270782470703
epoch: 85 training_loss 2.458980462551117 test_loss: 2.4425838470458983
epoch: 86 training_loss 2.403598769903183 test_loss: 2.5757108688354493
epoch: 87 training_loss 2.4317310297489168 test_loss: 2.520656394958496
epoch: 88 training_loss 2.4749561762809753 test_loss: 2.5870058059692385
epoch: 89 training_loss 2.443812825679779 test_loss: 2.585487174987793
epoch: 90 training_loss 2.4459255039691925 test_loss: 2.500595474243164
epoch: 91 training_loss 2.3955291223526003 test_loss: 2.362331008911133
epoch: 92 training_loss 2.408426351547241 test_loss: 2.485794258117676
epoch: 93 training_loss 2.3676659190654754 test_loss: 2.4671537399291994
epoch: 94 training_loss 2.333266543149948 test_loss: 2.535011863708496
epoch: 95 training_loss 2.3271377658843995 test_loss: 2.5127058029174805
epoch: 96 training_loss 2.402799265384674 test_loss: 2.456973838806152
epoch: 97 training_loss 2.353786073923111 test_loss: 2.440295600891113
epoch: 98 training_loss 2.3332877099514007 test_loss: 2.5904483795166016
epoch: 99 training_loss 2.347731245756149 test_loss: 2.364655876159668
epoch: 100 training_loss 2.353424211740494 test_loss: 2.4620040893554687
epoch: 101 training_loss 2.378969988822937 test_loss: 2.357130241394043
epoch: 102 training_loss 2.4104256629943848 test_loss: 2.326045608520508
epoch: 103 training_loss 2.3484905910491944 test_loss: 2.3340131759643556
epoch: 104 training_loss 2.3250180208683013 test_loss: 2.4474620819091797
epoch: 105 training_loss 2.29012632727623 test_loss: 2.349355125427246
epoch: 106 training_loss 2.3323114109039307 test_loss: 2.3057071685791017
epoch: 107 training_loss 2.322336263656616 test_loss: 2.4049808502197267
epoch: 108 training_loss 2.311936708688736 test_loss: 2.3151506423950194
epoch: 109 training_loss 2.302230055332184 test_loss: 2.3639703750610352
epoch: 110 training_loss 2.2926373732089997 test_loss: 2.3724592208862303
epoch: 111 training_loss 2.3008424615859986 test_loss: 2.3699453353881834
epoch: 112 training_loss 2.3286287903785707 test_loss: 2.3570695877075196
epoch: 113 training_loss 2.275905190706253 test_loss: 2.2906976699829102
epoch: 114 training_loss 2.2821694684028624 test_loss: 2.364584541320801
epoch: 115 training_loss 2.33033371090889 test_loss: 2.442042922973633
epoch: 116 training_loss 2.268738616704941 test_loss: 2.3251651763916015
epoch: 117 training_loss 2.2366184222698213 test_loss: 2.291141700744629
epoch: 118 training_loss 2.2397342431545257 test_loss: 2.36116886138916
epoch: 119 training_loss 2.2928856897354124 test_loss: 2.2665754318237306
epoch: 120 training_loss 2.307921773195267 test_loss: 2.401814270019531
epoch: 121 training_loss 2.296959798336029 test_loss: 2.3171817779541017
epoch: 122 training_loss 2.24112429857254 test_loss: 2.2914819717407227
epoch: 123 training_loss 2.2589131343364715 test_loss: 2.3954195022583007
epoch: 124 training_loss 2.2439286398887632 test_loss: 2.2685388565063476
epoch: 125 training_loss 2.2004122459888458 test_loss: 2.35889892578125
epoch: 126 training_loss 2.180457818508148 test_loss: 2.3351402282714844
epoch: 127 training_loss 2.2007863187789916 test_loss: 2.2294845581054688
epoch: 128 training_loss 2.222744184732437 test_loss: 2.2025136947631836
epoch: 129 training_loss 2.2159274363517762 test_loss: 2.3370361328125
epoch: 130 training_loss 2.2312635457515717 test_loss: 2.235535430908203
epoch: 131 training_loss 2.1801275098323822 test_loss: 2.3181024551391602
epoch: 132 training_loss 2.247228239774704 test_loss: 2.213673973083496
epoch: 133 training_loss 2.1889026284217836 test_loss: 2.3444252014160156
epoch: 134 training_loss 2.2314292824268342 test_loss: 2.1989553451538084
epoch: 135 training_loss 2.1802924847602845 test_loss: 2.3381380081176757
epoch: 136 training_loss 2.158512514829636 test_loss: 2.221281814575195
epoch: 137 training_loss 2.1491219222545626 test_loss: 2.350325012207031
epoch: 138 training_loss 2.171876652240753 test_loss: 2.270899772644043
epoch: 139 training_loss 2.1742470908164977 test_loss: 2.2426151275634765
epoch: 140 training_loss 2.1806098902225495 test_loss: 2.159683609008789
epoch: 141 training_loss 2.161345511674881 test_loss: 2.251552772521973
epoch: 142 training_loss 2.1537113320827483 test_loss: 2.1997421264648436
epoch: 143 training_loss 2.1793180096149443 test_loss: 2.2418365478515625
epoch: 144 training_loss 2.207621146440506 test_loss: 2.100005531311035
epoch: 145 training_loss 2.1181538128852844 test_loss: 2.3496007919311523
epoch: 146 training_loss 2.178839520215988 test_loss: 2.230526161193848
epoch: 147 training_loss 2.1516198074817656 test_loss: 2.212207794189453
epoch: 148 training_loss 2.156779783964157 test_loss: 2.218774604797363
epoch: 149 training_loss 2.154695200920105 test_loss: 2.307189178466797
3002.146879238324
episode: 0 training return: tensor(225.5510, device='cuda:0')
episode: 1 training return: tensor(199.8962, device='cuda:0')
episode: 2 training return: tensor(259.3779, device='cuda:0')
episode: 3 training return: tensor(202.7379, device='cuda:0')
epoch: 1 test_true_pfm: 3277.925605097362 sim_pfm: 70.29037281345033
episode: 4 training return: tensor(158.5828, device='cuda:0')
episode: 5 training return: tensor(125.7242, device='cuda:0')
episode: 6 training return: tensor(127.7759, device='cuda:0')
episode: 7 training return: tensor(251.7279, device='cuda:0')
epoch: 2 test_true_pfm: 2764.8020897180995 sim_pfm: 187.57929849799257
episode: 8 training return: tensor(-170.7465, device='cuda:0')
episode: 9 training return: tensor(180.1312, device='cuda:0')
episode: 10 training return: tensor(259.6725, device='cuda:0')
episode: 11 training return: tensor(180.0926, device='cuda:0')
epoch: 3 test_true_pfm: 3246.077464236796 sim_pfm: 127.90917685085635
episode: 12 training return: tensor(162.1953, device='cuda:0')
episode: 13 training return: tensor(-78.2842, device='cuda:0')
episode: 14 training return: tensor(203.6880, device='cuda:0')
episode: 15 training return: tensor(215.6232, device='cuda:0')
epoch: 4 test_true_pfm: 3091.6940218284144 sim_pfm: 213.8165136569684
episode: 16 training return: tensor(70.2789, device='cuda:0')
episode: 17 training return: tensor(174.4946, device='cuda:0')
episode: 18 training return: tensor(-26.8385, device='cuda:0')
episode: 19 training return: tensor(223.3696, device='cuda:0')
epoch: 5 test_true_pfm: 3202.5076940056865 sim_pfm: 141.71497230847794
episode: 20 training return: tensor(197.2428, device='cuda:0')
episode: 21 training return: tensor(177.8897, device='cuda:0')
episode: 22 training return: tensor(214.3740, device='cuda:0')
episode: 23 training return: tensor(171.4694, device='cuda:0')
epoch: 6 test_true_pfm: 3112.3901379017284 sim_pfm: 162.47380610528248
episode: 24 training return: tensor(167.4683, device='cuda:0')
episode: 25 training return: tensor(-22.0198, device='cuda:0')
episode: 26 training return: tensor(15.0777, device='cuda:0')
episode: 27 training return: tensor(243.5535, device='cuda:0')
epoch: 7 test_true_pfm: 3138.3175993070677 sim_pfm: 217.66762084292714
episode: 28 training return: tensor(84.8099, device='cuda:0')
episode: 29 training return: tensor(273.4793, device='cuda:0')
episode: 30 training return: tensor(-124.2764, device='cuda:0')
episode: 31 training return: tensor(141.6010, device='cuda:0')
epoch: 8 test_true_pfm: 3249.5920754380677 sim_pfm: 254.12187582300976
episode: 32 training return: tensor(245.7106, device='cuda:0')
episode: 33 training return: tensor(12.5882, device='cuda:0')
episode: 34 training return: tensor(-60.1634, device='cuda:0')
episode: 35 training return: tensor(156.2804, device='cuda:0')
epoch: 9 test_true_pfm: 3209.0808457754515 sim_pfm: 202.18376711533833
episode: 36 training return: tensor(165.9980, device='cuda:0')
episode: 37 training return: tensor(256.9482, device='cuda:0')
episode: 38 training return: tensor(209.6303, device='cuda:0')
episode: 39 training return: tensor(298.8951, device='cuda:0')
epoch: 10 test_true_pfm: 3238.208089019879 sim_pfm: 241.8764108371494
episode: 40 training return: tensor(-304.3056, device='cuda:0')
episode: 41 training return: tensor(137.1609, device='cuda:0')
episode: 42 training return: tensor(248.4137, device='cuda:0')
episode: 43 training return: tensor(188.0037, device='cuda:0')
epoch: 11 test_true_pfm: 2824.8589625934455 sim_pfm: 268.1164236344436
episode: 44 training return: tensor(167.7144, device='cuda:0')
episode: 45 training return: tensor(113.1061, device='cuda:0')
episode: 46 training return: tensor(257.9311, device='cuda:0')
episode: 47 training return: tensor(172.4095, device='cuda:0')
epoch: 12 test_true_pfm: 3220.667725228055 sim_pfm: 131.489717737985
episode: 48 training return: tensor(-335.3618, device='cuda:0')
episode: 49 training return: tensor(228.4084, device='cuda:0')
episode: 50 training return: tensor(71.2938, device='cuda:0')
episode: 51 training return: tensor(261.8341, device='cuda:0')
epoch: 13 test_true_pfm: 3348.7321705967624 sim_pfm: 278.0619926475629
episode: 52 training return: tensor(298.7238, device='cuda:0')
episode: 53 training return: tensor(153.0699, device='cuda:0')
episode: 54 training return: tensor(324.4883, device='cuda:0')
episode: 55 training return: tensor(193.2829, device='cuda:0')
epoch: 14 test_true_pfm: 3206.165820428528 sim_pfm: 161.87651286256732
episode: 56 training return: tensor(239.8810, device='cuda:0')
episode: 57 training return: tensor(-400.7535, device='cuda:0')
episode: 58 training return: tensor(296.2685, device='cuda:0')
episode: 59 training return: tensor(-113.7868, device='cuda:0')
epoch: 15 test_true_pfm: 3291.3028693091333 sim_pfm: 101.77886112002307
episode: 60 training return: tensor(162.8748, device='cuda:0')
episode: 61 training return: tensor(73.1834, device='cuda:0')
episode: 62 training return: tensor(226.9702, device='cuda:0')
episode: 63 training return: tensor(223.2518, device='cuda:0')
epoch: 16 test_true_pfm: 3296.9423014169347 sim_pfm: 179.04721563336594
episode: 64 training return: tensor(210.5759, device='cuda:0')
episode: 65 training return: tensor(128.0582, device='cuda:0')
episode: 66 training return: tensor(273.2451, device='cuda:0')
episode: 67 training return: tensor(-93.3655, device='cuda:0')
epoch: 17 test_true_pfm: 3234.6170612845744 sim_pfm: 277.4084443829779
episode: 68 training return: tensor(141.8645, device='cuda:0')
episode: 69 training return: tensor(-223.4666, device='cuda:0')
episode: 70 training return: tensor(259.0623, device='cuda:0')
episode: 71 training return: tensor(257.3802, device='cuda:0')
epoch: 18 test_true_pfm: 3231.3188804086044 sim_pfm: 282.84417994045845
episode: 72 training return: tensor(247.0135, device='cuda:0')
episode: 73 training return: tensor(318.1147, device='cuda:0')
episode: 74 training return: tensor(245.2343, device='cuda:0')
episode: 75 training return: tensor(232.6043, device='cuda:0')
epoch: 19 test_true_pfm: 3300.4953793595955 sim_pfm: 300.9927115549799
episode: 76 training return: tensor(250.0945, device='cuda:0')
episode: 77 training return: tensor(223.8537, device='cuda:0')
episode: 78 training return: tensor(155.2340, device='cuda:0')
episode: 79 training return: tensor(304.7986, device='cuda:0')
epoch: 20 test_true_pfm: 3343.638827363023 sim_pfm: 296.5231827555593
episode: 80 training return: tensor(280.7910, device='cuda:0')
episode: 81 training return: tensor(69.4882, device='cuda:0')
episode: 82 training return: tensor(330.3942, device='cuda:0')
episode: 83 training return: tensor(284.9853, device='cuda:0')
epoch: 21 test_true_pfm: 3255.271620866053 sim_pfm: 206.83845343372863
episode: 84 training return: tensor(290.4810, device='cuda:0')
episode: 85 training return: tensor(268.9561, device='cuda:0')
episode: 86 training return: tensor(-43.9547, device='cuda:0')
episode: 87 training return: tensor(224.8918, device='cuda:0')
epoch: 22 test_true_pfm: 3373.9459361393892 sim_pfm: 337.80387288015726
episode: 88 training return: tensor(289.0720, device='cuda:0')
episode: 89 training return: tensor(216.7048, device='cuda:0')
episode: 90 training return: tensor(272.8080, device='cuda:0')
episode: 91 training return: tensor(249.3708, device='cuda:0')
epoch: 23 test_true_pfm: 3402.2007503895425 sim_pfm: 319.43878670253133
episode: 92 training return: tensor(192.9501, device='cuda:0')
episode: 93 training return: tensor(274.0685, device='cuda:0')
episode: 94 training return: tensor(272.8997, device='cuda:0')
episode: 95 training return: tensor(258.0715, device='cuda:0')
epoch: 24 test_true_pfm: 3378.0714795312865 sim_pfm: 365.105234653883
episode: 96 training return: tensor(281.1147, device='cuda:0')
episode: 97 training return: tensor(294.9680, device='cuda:0')
episode: 98 training return: tensor(339.3169, device='cuda:0')
episode: 99 training return: tensor(209.3467, device='cuda:0')
epoch: 25 test_true_pfm: 3306.562988730579 sim_pfm: 320.7359498082563
episode: 100 training return: tensor(270.8926, device='cuda:0')
episode: 101 training return: tensor(294.1868, device='cuda:0')
episode: 102 training return: tensor(296.5692, device='cuda:0')
episode: 103 training return: tensor(318.7776, device='cuda:0')
epoch: 26 test_true_pfm: 3393.709316743623 sim_pfm: 352.23012407981633
episode: 104 training return: tensor(290.3633, device='cuda:0')
episode: 105 training return: tensor(303.3818, device='cuda:0')
episode: 106 training return: tensor(269.9467, device='cuda:0')
episode: 107 training return: tensor(350.9513, device='cuda:0')
epoch: 27 test_true_pfm: 3393.870167446838 sim_pfm: 369.0592174145374
episode: 108 training return: tensor(299.4777, device='cuda:0')
episode: 109 training return: tensor(274.7003, device='cuda:0')
episode: 110 training return: tensor(341.6470, device='cuda:0')
episode: 111 training return: tensor(234.3231, device='cuda:0')
epoch: 28 test_true_pfm: 3433.3730574634483 sim_pfm: 326.5228570286902
episode: 112 training return: tensor(311.7856, device='cuda:0')
episode: 113 training return: tensor(318.2454, device='cuda:0')
episode: 114 training return: tensor(317.2898, device='cuda:0')
episode: 115 training return: tensor(242.7874, device='cuda:0')
epoch: 29 test_true_pfm: 3416.364806762311 sim_pfm: 378.2463843078197
episode: 116 training return: tensor(268.4508, device='cuda:0')
episode: 117 training return: tensor(212.5986, device='cuda:0')
episode: 118 training return: tensor(308.2901, device='cuda:0')
episode: 119 training return: tensor(373.6851, device='cuda:0')
epoch: 30 test_true_pfm: 3399.297954097394 sim_pfm: 363.4462889407102
episode: 120 training return: tensor(264.3368, device='cuda:0')
episode: 121 training return: tensor(231.2684, device='cuda:0')
episode: 122 training return: tensor(287.5853, device='cuda:0')
episode: 123 training return: tensor(-72.0109, device='cuda:0')
epoch: 31 test_true_pfm: 3410.9262702311985 sim_pfm: 340.2068451636199
episode: 124 training return: tensor(314.6335, device='cuda:0')
episode: 125 training return: tensor(339.6864, device='cuda:0')
episode: 126 training return: tensor(-45.5083, device='cuda:0')
episode: 127 training return: tensor(320.9543, device='cuda:0')
epoch: 32 test_true_pfm: 3408.5283267197933 sim_pfm: 330.59032015433576
episode: 128 training return: tensor(243.4539, device='cuda:0')
episode: 129 training return: tensor(314.7835, device='cuda:0')
episode: 130 training return: tensor(348.7843, device='cuda:0')
episode: 131 training return: tensor(315.7539, device='cuda:0')
epoch: 33 test_true_pfm: 3418.34283707871 sim_pfm: 341.81359774110996
episode: 132 training return: tensor(343.6412, device='cuda:0')
episode: 133 training return: tensor(309.6509, device='cuda:0')
episode: 134 training return: tensor(270.3890, device='cuda:0')
episode: 135 training return: tensor(271.5900, device='cuda:0')
epoch: 34 test_true_pfm: 3382.9962052928145 sim_pfm: 327.7052449460219
episode: 136 training return: tensor(284.1796, device='cuda:0')
episode: 137 training return: tensor(313.1513, device='cuda:0')
episode: 138 training return: tensor(337.7389, device='cuda:0')
episode: 139 training return: tensor(313.9990, device='cuda:0')
epoch: 35 test_true_pfm: 3423.0463913204726 sim_pfm: 346.32622993002104
episode: 140 training return: tensor(287.9150, device='cuda:0')
episode: 141 training return: tensor(175.9241, device='cuda:0')
episode: 142 training return: tensor(291.6544, device='cuda:0')
episode: 143 training return: tensor(323.0327, device='cuda:0')
epoch: 36 test_true_pfm: 3464.817215514365 sim_pfm: 342.51873345450923
episode: 144 training return: tensor(295.5507, device='cuda:0')
episode: 145 training return: tensor(144.5038, device='cuda:0')
episode: 146 training return: tensor(307.9874, device='cuda:0')
episode: 147 training return: tensor(285.7897, device='cuda:0')
epoch: 37 test_true_pfm: 3425.77233624313 sim_pfm: 276.51894080343965
episode: 148 training return: tensor(307.1142, device='cuda:0')
episode: 149 training return: tensor(326.8170, device='cuda:0')
episode: 150 training return: tensor(230.1650, device='cuda:0')
episode: 151 training return: tensor(307.9768, device='cuda:0')
epoch: 38 test_true_pfm: 3413.6459634451858 sim_pfm: 310.57909254332
episode: 152 training return: tensor(277.1057, device='cuda:0')
episode: 153 training return: tensor(286.7030, device='cuda:0')
episode: 154 training return: tensor(304.5996, device='cuda:0')
episode: 155 training return: tensor(229.7584, device='cuda:0')
epoch: 39 test_true_pfm: 3366.831617709928 sim_pfm: 319.06730058024795
episode: 156 training return: tensor(310.1115, device='cuda:0')
episode: 157 training return: tensor(285.6275, device='cuda:0')
episode: 158 training return: tensor(307.6460, device='cuda:0')
episode: 159 training return: tensor(244.7137, device='cuda:0')
epoch: 40 test_true_pfm: 3378.380221863969 sim_pfm: 327.71977376108407
episode: 160 training return: tensor(282.8333, device='cuda:0')
episode: 161 training return: tensor(287.4202, device='cuda:0')
episode: 162 training return: tensor(308.8802, device='cuda:0')
episode: 163 training return: tensor(316.4370, device='cuda:0')
epoch: 41 test_true_pfm: 3409.6705747335786 sim_pfm: 322.90583829108317
episode: 164 training return: tensor(284.3374, device='cuda:0')
episode: 165 training return: tensor(376.5374, device='cuda:0')
episode: 166 training return: tensor(261.0067, device='cuda:0')
episode: 167 training return: tensor(309.8533, device='cuda:0')
epoch: 42 test_true_pfm: 3419.5897275207535 sim_pfm: 331.0593680675277
episode: 168 training return: tensor(324.8373, device='cuda:0')
episode: 169 training return: tensor(246.0484, device='cuda:0')
episode: 170 training return: tensor(275.2032, device='cuda:0')
episode: 171 training return: tensor(305.0016, device='cuda:0')
epoch: 43 test_true_pfm: 3378.6666864121016 sim_pfm: 315.31633794112713
episode: 172 training return: tensor(249.5199, device='cuda:0')
episode: 173 training return: tensor(190.6917, device='cuda:0')
episode: 174 training return: tensor(338.3632, device='cuda:0')
episode: 175 training return: tensor(269.8813, device='cuda:0')
epoch: 44 test_true_pfm: 3414.977944203045 sim_pfm: 322.8398668448014
episode: 176 training return: tensor(267.8164, device='cuda:0')
episode: 177 training return: tensor(291.4785, device='cuda:0')
episode: 178 training return: tensor(298.3698, device='cuda:0')
episode: 179 training return: tensor(350.8745, device='cuda:0')
epoch: 45 test_true_pfm: 3346.98045126659 sim_pfm: 343.87147702871397
episode: 180 training return: tensor(305.9579, device='cuda:0')
episode: 181 training return: tensor(284.4141, device='cuda:0')
episode: 182 training return: tensor(325.2622, device='cuda:0')
episode: 183 training return: tensor(279.5271, device='cuda:0')
epoch: 46 test_true_pfm: 3413.3417679629 sim_pfm: 350.0940890298807
episode: 184 training return: tensor(254.7684, device='cuda:0')
episode: 185 training return: tensor(244.1696, device='cuda:0')
episode: 186 training return: tensor(283.7036, device='cuda:0')
episode: 187 training return: tensor(265.9246, device='cuda:0')
epoch: 47 test_true_pfm: 3431.0796407691328 sim_pfm: 341.164677373056
episode: 188 training return: tensor(345.2069, device='cuda:0')
episode: 189 training return: tensor(284.7039, device='cuda:0')
episode: 190 training return: tensor(318.7606, device='cuda:0')
episode: 191 training return: tensor(347.4214, device='cuda:0')
epoch: 48 test_true_pfm: 3288.28211744977 sim_pfm: 325.4740768110787
episode: 192 training return: tensor(269.4072, device='cuda:0')
episode: 193 training return: tensor(335.9340, device='cuda:0')
episode: 194 training return: tensor(67.1715, device='cuda:0')
episode: 195 training return: tensor(323.2557, device='cuda:0')
epoch: 49 test_true_pfm: 3377.540974722129 sim_pfm: 310.7555757885178
episode: 196 training return: tensor(296.3138, device='cuda:0')
episode: 197 training return: tensor(305.3890, device='cuda:0')
episode: 198 training return: tensor(232.8457, device='cuda:0')
episode: 199 training return: tensor(301.9767, device='cuda:0')
epoch: 50 test_true_pfm: 3344.074103216586 sim_pfm: 293.8392884106336
episode: 200 training return: tensor(324.4478, device='cuda:0')
episode: 201 training return: tensor(-218.2050, device='cuda:0')
episode: 202 training return: tensor(341.1696, device='cuda:0')
episode: 203 training return: tensor(326.4362, device='cuda:0')
epoch: 51 test_true_pfm: 3375.0051295226235 sim_pfm: 293.4334865643177
episode: 204 training return: tensor(340.2818, device='cuda:0')
episode: 205 training return: tensor(317.3892, device='cuda:0')
episode: 206 training return: tensor(348.2147, device='cuda:0')
episode: 207 training return: tensor(263.4880, device='cuda:0')
epoch: 52 test_true_pfm: 3447.4418079089264 sim_pfm: 349.06236710729235
episode: 208 training return: tensor(330.3716, device='cuda:0')
episode: 209 training return: tensor(261.8943, device='cuda:0')
episode: 210 training return: tensor(352.7182, device='cuda:0')
episode: 211 training return: tensor(302.2376, device='cuda:0')
epoch: 53 test_true_pfm: 3414.2443262318775 sim_pfm: 305.94483449996915
episode: 212 training return: tensor(290.5692, device='cuda:0')
episode: 213 training return: tensor(133.3546, device='cuda:0')
episode: 214 training return: tensor(320.2535, device='cuda:0')
episode: 215 training return: tensor(330.2739, device='cuda:0')
epoch: 54 test_true_pfm: 3413.8719408001834 sim_pfm: 306.56656850451446
episode: 216 training return: tensor(310.2598, device='cuda:0')
episode: 217 training return: tensor(288.4642, device='cuda:0')
episode: 218 training return: tensor(270.9141, device='cuda:0')
episode: 219 training return: tensor(273.3311, device='cuda:0')
epoch: 55 test_true_pfm: 3381.819156273034 sim_pfm: 292.17280765250325
episode: 220 training return: tensor(-93.9487, device='cuda:0')
episode: 221 training return: tensor(287.8135, device='cuda:0')
episode: 222 training return: tensor(282.1638, device='cuda:0')
episode: 223 training return: tensor(301.0694, device='cuda:0')
epoch: 56 test_true_pfm: 3418.092485210194 sim_pfm: 321.46096845627955
episode: 224 training return: tensor(331.4550, device='cuda:0')
episode: 225 training return: tensor(274.2557, device='cuda:0')
episode: 226 training return: tensor(375.5583, device='cuda:0')
episode: 227 training return: tensor(325.8961, device='cuda:0')
epoch: 57 test_true_pfm: 3407.08651688476 sim_pfm: 325.936937121364
episode: 228 training return: tensor(292.4163, device='cuda:0')
episode: 229 training return: tensor(339.7036, device='cuda:0')
episode: 230 training return: tensor(291.1882, device='cuda:0')
episode: 231 training return: tensor(392.2220, device='cuda:0')
epoch: 58 test_true_pfm: 3379.7870543716167 sim_pfm: 277.2435945335795
episode: 232 training return: tensor(356.9155, device='cuda:0')
episode: 233 training return: tensor(290.6716, device='cuda:0')
episode: 234 training return: tensor(275.9509, device='cuda:0')
episode: 235 training return: tensor(318.3999, device='cuda:0')
epoch: 59 test_true_pfm: 3366.587973094946 sim_pfm: 279.9949815844787
episode: 236 training return: tensor(308.1291, device='cuda:0')
episode: 237 training return: tensor(312.7891, device='cuda:0')
episode: 238 training return: tensor(256.1870, device='cuda:0')
episode: 239 training return: tensor(325.1948, device='cuda:0')
epoch: 60 test_true_pfm: 3379.5037836074066 sim_pfm: 299.58115750103025
episode: 240 training return: tensor(330.8871, device='cuda:0')
episode: 241 training return: tensor(196.4209, device='cuda:0')
episode: 242 training return: tensor(278.3450, device='cuda:0')
episode: 243 training return: tensor(345.5455, device='cuda:0')
epoch: 61 test_true_pfm: 3369.436943025077 sim_pfm: 356.9041481112072
episode: 244 training return: tensor(268.0246, device='cuda:0')
episode: 245 training return: tensor(261.5413, device='cuda:0')
episode: 246 training return: tensor(275.3115, device='cuda:0')
episode: 247 training return: tensor(340.5876, device='cuda:0')
epoch: 62 test_true_pfm: 3414.2056943416333 sim_pfm: 325.4356599613675
episode: 248 training return: tensor(334.0389, device='cuda:0')
episode: 249 training return: tensor(266.1628, device='cuda:0')
episode: 250 training return: tensor(276.2720, device='cuda:0')
episode: 251 training return: tensor(296.5663, device='cuda:0')
epoch: 63 test_true_pfm: 3409.7863990113015 sim_pfm: 295.5776853182858
episode: 252 training return: tensor(286.5739, device='cuda:0')
episode: 253 training return: tensor(325.1646, device='cuda:0')
episode: 254 training return: tensor(238.8768, device='cuda:0')
episode: 255 training return: tensor(161.6392, device='cuda:0')
epoch: 64 test_true_pfm: 3422.4452102863615 sim_pfm: 348.0128952778953
episode: 256 training return: tensor(360.9512, device='cuda:0')
episode: 257 training return: tensor(358.9354, device='cuda:0')
episode: 258 training return: tensor(304.3115, device='cuda:0')
episode: 259 training return: tensor(269.1464, device='cuda:0')
epoch: 65 test_true_pfm: 3390.2537025203187 sim_pfm: 335.72252771480515
episode: 260 training return: tensor(309.4870, device='cuda:0')
episode: 261 training return: tensor(337.6380, device='cuda:0')
episode: 262 training return: tensor(360.0563, device='cuda:0')
episode: 263 training return: tensor(223.5494, device='cuda:0')
epoch: 66 test_true_pfm: 3215.631671890249 sim_pfm: 300.98923199380323
episode: 264 training return: tensor(287.7888, device='cuda:0')
episode: 265 training return: tensor(308.8255, device='cuda:0')
episode: 266 training return: tensor(331.5219, device='cuda:0')
episode: 267 training return: tensor(335.5563, device='cuda:0')
epoch: 67 test_true_pfm: 3325.416961921567 sim_pfm: 333.69772627139656
episode: 268 training return: tensor(-40.1005, device='cuda:0')
episode: 269 training return: tensor(314.3101, device='cuda:0')
episode: 270 training return: tensor(348.9436, device='cuda:0')
episode: 271 training return: tensor(153.7245, device='cuda:0')
epoch: 68 test_true_pfm: 3379.0948523024103 sim_pfm: 325.75880405605614
episode: 272 training return: tensor(-56.3359, device='cuda:0')
episode: 273 training return: tensor(316.3228, device='cuda:0')
episode: 274 training return: tensor(321.1152, device='cuda:0')
episode: 275 training return: tensor(325.7443, device='cuda:0')
epoch: 69 test_true_pfm: 3392.230438975201 sim_pfm: 334.32162412741064
episode: 276 training return: tensor(296.6181, device='cuda:0')
episode: 277 training return: tensor(324.5485, device='cuda:0')
episode: 278 training return: tensor(177.4826, device='cuda:0')
episode: 279 training return: tensor(278.8981, device='cuda:0')
epoch: 70 test_true_pfm: 3392.201687679721 sim_pfm: 351.07448961688596
episode: 280 training return: tensor(353.0097, device='cuda:0')
episode: 281 training return: tensor(288.4259, device='cuda:0')
episode: 282 training return: tensor(335.2312, device='cuda:0')
episode: 283 training return: tensor(298.9516, device='cuda:0')
epoch: 71 test_true_pfm: 3394.0778508376548 sim_pfm: 311.9406353668989
episode: 284 training return: tensor(346.2084, device='cuda:0')
episode: 285 training return: tensor(328.3229, device='cuda:0')
episode: 286 training return: tensor(327.6736, device='cuda:0')
episode: 287 training return: tensor(327.3539, device='cuda:0')
epoch: 72 test_true_pfm: 3327.6142166504465 sim_pfm: 343.81289255508455
episode: 288 training return: tensor(269.9272, device='cuda:0')
episode: 289 training return: tensor(-70.9036, device='cuda:0')
episode: 290 training return: tensor(326.1535, device='cuda:0')
episode: 291 training return: tensor(388.7540, device='cuda:0')
epoch: 73 test_true_pfm: 3406.636501074568 sim_pfm: 325.9336044632364
episode: 292 training return: tensor(306.6459, device='cuda:0')
episode: 293 training return: tensor(347.1718, device='cuda:0')
episode: 294 training return: tensor(359.0453, device='cuda:0')
episode: 295 training return: tensor(284.1389, device='cuda:0')
epoch: 74 test_true_pfm: 3461.106364448638 sim_pfm: 323.33284069717047
episode: 296 training return: tensor(239.9632, device='cuda:0')
episode: 297 training return: tensor(286.0264, device='cuda:0')
episode: 298 training return: tensor(321.3005, device='cuda:0')
episode: 299 training return: tensor(-22.9881, device='cuda:0')
epoch: 75 test_true_pfm: 3436.168045603287 sim_pfm: 360.57869554470136
episode: 300 training return: tensor(315.7135, device='cuda:0')
episode: 301 training return: tensor(-55.5814, device='cuda:0')
episode: 302 training return: tensor(309.5688, device='cuda:0')
episode: 303 training return: tensor(305.7885, device='cuda:0')
epoch: 76 test_true_pfm: 3375.478426398256 sim_pfm: 337.4060220522612
episode: 304 training return: tensor(310.1434, device='cuda:0')
episode: 305 training return: tensor(334.7879, device='cuda:0')
episode: 306 training return: tensor(326.0856, device='cuda:0')
episode: 307 training return: tensor(316.8226, device='cuda:0')
epoch: 77 test_true_pfm: 3347.6722821238777 sim_pfm: 319.7395605788236
episode: 308 training return: tensor(355.2661, device='cuda:0')
episode: 309 training return: tensor(249.9647, device='cuda:0')
episode: 310 training return: tensor(343.8986, device='cuda:0')
episode: 311 training return: tensor(317.4890, device='cuda:0')
epoch: 78 test_true_pfm: 3383.438774710053 sim_pfm: 359.0744883904311
episode: 312 training return: tensor(280.8699, device='cuda:0')
episode: 313 training return: tensor(290.8293, device='cuda:0')
episode: 314 training return: tensor(310.6120, device='cuda:0')
episode: 315 training return: tensor(352.5866, device='cuda:0')
epoch: 79 test_true_pfm: 3369.4534828741175 sim_pfm: 332.2360242823585
episode: 316 training return: tensor(266.2061, device='cuda:0')
episode: 317 training return: tensor(309.2187, device='cuda:0')
episode: 318 training return: tensor(303.3062, device='cuda:0')
episode: 319 training return: tensor(332.0963, device='cuda:0')
epoch: 80 test_true_pfm: 3424.1216517996995 sim_pfm: 340.3310722275831
episode: 320 training return: tensor(301.1979, device='cuda:0')
episode: 321 training return: tensor(215.6681, device='cuda:0')
episode: 322 training return: tensor(301.0296, device='cuda:0')
episode: 323 training return: tensor(143.5748, device='cuda:0')
epoch: 81 test_true_pfm: 3392.165291189665 sim_pfm: 334.2622661283337
episode: 324 training return: tensor(304.3655, device='cuda:0')
episode: 325 training return: tensor(-410.4779, device='cuda:0')
episode: 326 training return: tensor(325.4263, device='cuda:0')
episode: 327 training return: tensor(298.1749, device='cuda:0')
epoch: 82 test_true_pfm: 3352.4920280886267 sim_pfm: 331.49102017534705
episode: 328 training return: tensor(282.5256, device='cuda:0')
episode: 329 training return: tensor(196.7678, device='cuda:0')
episode: 330 training return: tensor(320.4982, device='cuda:0')
episode: 331 training return: tensor(302.2051, device='cuda:0')
epoch: 83 test_true_pfm: 3428.915969033847 sim_pfm: 353.30262888686656
episode: 332 training return: tensor(326.9198, device='cuda:0')
episode: 333 training return: tensor(264.7793, device='cuda:0')
episode: 334 training return: tensor(348.6313, device='cuda:0')
episode: 335 training return: tensor(338.6656, device='cuda:0')
epoch: 84 test_true_pfm: 3439.776437600792 sim_pfm: 359.6818667000334
episode: 336 training return: tensor(313.0119, device='cuda:0')
episode: 337 training return: tensor(154.0194, device='cuda:0')
episode: 338 training return: tensor(387.7025, device='cuda:0')
episode: 339 training return: tensor(359.5393, device='cuda:0')
epoch: 85 test_true_pfm: 3412.19336416216 sim_pfm: 364.4539631952066
episode: 340 training return: tensor(308.5342, device='cuda:0')
episode: 341 training return: tensor(314.6112, device='cuda:0')
episode: 342 training return: tensor(401.3326, device='cuda:0')
episode: 343 training return: tensor(321.7582, device='cuda:0')
epoch: 86 test_true_pfm: 3417.045511344666 sim_pfm: 313.1339398613588
episode: 344 training return: tensor(-1.6138, device='cuda:0')
episode: 345 training return: tensor(161.8681, device='cuda:0')
episode: 346 training return: tensor(329.8992, device='cuda:0')
episode: 347 training return: tensor(314.9016, device='cuda:0')
epoch: 87 test_true_pfm: 3400.8758397451243 sim_pfm: 319.5791228961995
episode: 348 training return: tensor(351.0023, device='cuda:0')
episode: 349 training return: tensor(367.6860, device='cuda:0')
episode: 350 training return: tensor(310.2644, device='cuda:0')
episode: 351 training return: tensor(330.0664, device='cuda:0')
epoch: 88 test_true_pfm: 3419.1583073347333 sim_pfm: 300.32999572781654
episode: 352 training return: tensor(296.3837, device='cuda:0')
episode: 353 training return: tensor(340.0105, device='cuda:0')
episode: 354 training return: tensor(331.6479, device='cuda:0')
episode: 355 training return: tensor(341.9748, device='cuda:0')
epoch: 89 test_true_pfm: 3424.4276162603182 sim_pfm: 339.4623240058815
episode: 356 training return: tensor(348.2003, device='cuda:0')
episode: 357 training return: tensor(271.4005, device='cuda:0')
episode: 358 training return: tensor(327.9485, device='cuda:0')
episode: 359 training return: tensor(292.7060, device='cuda:0')
epoch: 90 test_true_pfm: 3384.6119019993675 sim_pfm: 357.3680392000436
episode: 360 training return: tensor(314.5081, device='cuda:0')
episode: 361 training return: tensor(286.4025, device='cuda:0')
episode: 362 training return: tensor(362.6501, device='cuda:0')
episode: 363 training return: tensor(357.0705, device='cuda:0')
epoch: 91 test_true_pfm: 3423.4563951904533 sim_pfm: 350.94334877627745
episode: 364 training return: tensor(373.7753, device='cuda:0')
episode: 365 training return: tensor(313.9951, device='cuda:0')
episode: 366 training return: tensor(278.3517, device='cuda:0')
episode: 367 training return: tensor(365.5962, device='cuda:0')
epoch: 92 test_true_pfm: 3461.3526612241476 sim_pfm: 374.4807974083524
episode: 368 training return: tensor(389.3926, device='cuda:0')
episode: 369 training return: tensor(345.2386, device='cuda:0')
episode: 370 training return: tensor(346.6987, device='cuda:0')
episode: 371 training return: tensor(334.3283, device='cuda:0')
epoch: 93 test_true_pfm: 3410.667177034109 sim_pfm: 325.21922936756164
episode: 372 training return: tensor(353.3509, device='cuda:0')
episode: 373 training return: tensor(343.5021, device='cuda:0')
episode: 374 training return: tensor(278.0683, device='cuda:0')
episode: 375 training return: tensor(147.0294, device='cuda:0')
epoch: 94 test_true_pfm: 3349.277652872692 sim_pfm: 340.61276428187074
episode: 376 training return: tensor(377.3018, device='cuda:0')
episode: 377 training return: tensor(335.3637, device='cuda:0')
episode: 378 training return: tensor(371.3152, device='cuda:0')
episode: 379 training return: tensor(316.7809, device='cuda:0')
epoch: 95 test_true_pfm: 3440.9447134527218 sim_pfm: 366.08958390149445
episode: 380 training return: tensor(325.7737, device='cuda:0')
episode: 381 training return: tensor(286.6191, device='cuda:0')
episode: 382 training return: tensor(384.9260, device='cuda:0')
episode: 383 training return: tensor(352.4500, device='cuda:0')
epoch: 96 test_true_pfm: 3388.2621109900865 sim_pfm: 320.8227379801198
episode: 384 training return: tensor(324.6063, device='cuda:0')
episode: 385 training return: tensor(196.4749, device='cuda:0')
episode: 386 training return: tensor(177.5007, device='cuda:0')
episode: 387 training return: tensor(206.0083, device='cuda:0')
epoch: 97 test_true_pfm: 3508.8929775489974 sim_pfm: 376.5240470105782
episode: 388 training return: tensor(345.3377, device='cuda:0')
episode: 389 training return: tensor(302.8694, device='cuda:0')
episode: 390 training return: tensor(350.1152, device='cuda:0')
episode: 391 training return: tensor(325.1140, device='cuda:0')
epoch: 98 test_true_pfm: 3436.2815035440544 sim_pfm: 346.863901903717
episode: 392 training return: tensor(302.7057, device='cuda:0')
episode: 393 training return: tensor(378.9573, device='cuda:0')
episode: 394 training return: tensor(326.3697, device='cuda:0')
episode: 395 training return: tensor(348.3987, device='cuda:0')
epoch: 99 test_true_pfm: 3406.591700559687 sim_pfm: 335.0032449627761
episode: 396 training return: tensor(333.3589, device='cuda:0')
episode: 397 training return: tensor(362.2791, device='cuda:0')
episode: 398 training return: tensor(356.2631, device='cuda:0')
episode: 399 training return: tensor(297.9152, device='cuda:0')
epoch: 100 test_true_pfm: 3427.202989239753 sim_pfm: 336.9826009608223
episode: 400 training return: tensor(307.0233, device='cuda:0')
episode: 401 training return: tensor(384.6451, device='cuda:0')
episode: 402 training return: tensor(190.4938, device='cuda:0')
episode: 403 training return: tensor(-109.3007, device='cuda:0')
epoch: 101 test_true_pfm: 3398.6131734205846 sim_pfm: 346.45321772330016
episode: 404 training return: tensor(342.1578, device='cuda:0')
episode: 405 training return: tensor(294.7133, device='cuda:0')
episode: 406 training return: tensor(317.0643, device='cuda:0')
episode: 407 training return: tensor(306.2343, device='cuda:0')
epoch: 102 test_true_pfm: 3385.4063675166544 sim_pfm: 318.7349767651079
episode: 408 training return: tensor(300.8289, device='cuda:0')
episode: 409 training return: tensor(345.1740, device='cuda:0')
episode: 410 training return: tensor(331.3857, device='cuda:0')
episode: 411 training return: tensor(323.5651, device='cuda:0')
epoch: 103 test_true_pfm: 3358.2209034408265 sim_pfm: 337.87350865717355
episode: 412 training return: tensor(342.5182, device='cuda:0')
episode: 413 training return: tensor(269.8519, device='cuda:0')
episode: 414 training return: tensor(345.5991, device='cuda:0')
episode: 415 training return: tensor(266.5173, device='cuda:0')
epoch: 104 test_true_pfm: 3425.045670089052 sim_pfm: 353.02481908292003
episode: 416 training return: tensor(314.2586, device='cuda:0')
episode: 417 training return: tensor(335.7125, device='cuda:0')
episode: 418 training return: tensor(370.2174, device='cuda:0')
episode: 419 training return: tensor(337.7975, device='cuda:0')
epoch: 105 test_true_pfm: 3452.0564572759104 sim_pfm: 366.1035170376611
episode: 420 training return: tensor(320.1130, device='cuda:0')
episode: 421 training return: tensor(360.4919, device='cuda:0')
episode: 422 training return: tensor(341.0367, device='cuda:0')
episode: 423 training return: tensor(388.9453, device='cuda:0')
epoch: 106 test_true_pfm: 3399.018215851092 sim_pfm: 349.7734930656152
episode: 424 training return: tensor(340.9803, device='cuda:0')
episode: 425 training return: tensor(279.3537, device='cuda:0')
episode: 426 training return: tensor(340.3494, device='cuda:0')
episode: 427 training return: tensor(309.4395, device='cuda:0')
epoch: 107 test_true_pfm: 3433.6481499042734 sim_pfm: 384.7158453098964
episode: 428 training return: tensor(342.3380, device='cuda:0')
episode: 429 training return: tensor(329.9587, device='cuda:0')
episode: 430 training return: tensor(268.3666, device='cuda:0')
episode: 431 training return: tensor(318.7695, device='cuda:0')
epoch: 108 test_true_pfm: 3391.419255127724 sim_pfm: 348.112919695947
episode: 432 training return: tensor(51.9489, device='cuda:0')
episode: 433 training return: tensor(383.4918, device='cuda:0')
episode: 434 training return: tensor(309.6863, device='cuda:0')
episode: 435 training return: tensor(290.2181, device='cuda:0')
epoch: 109 test_true_pfm: 3429.465486979256 sim_pfm: 353.50258189501864
episode: 436 training return: tensor(235.2716, device='cuda:0')
episode: 437 training return: tensor(283.5432, device='cuda:0')
episode: 438 training return: tensor(349.5651, device='cuda:0')
episode: 439 training return: tensor(344.5220, device='cuda:0')
epoch: 110 test_true_pfm: 3438.0274821046555 sim_pfm: 331.1409103217981
episode: 440 training return: tensor(361.8375, device='cuda:0')
episode: 441 training return: tensor(309.3783, device='cuda:0')
episode: 442 training return: tensor(309.6337, device='cuda:0')
episode: 443 training return: tensor(328.9482, device='cuda:0')
epoch: 111 test_true_pfm: 3475.6166517949605 sim_pfm: 374.6969914196816
episode: 444 training return: tensor(341.8167, device='cuda:0')
episode: 445 training return: tensor(387.8607, device='cuda:0')
episode: 446 training return: tensor(121.3656, device='cuda:0')
episode: 447 training return: tensor(276.5531, device='cuda:0')
epoch: 112 test_true_pfm: 3423.529294345233 sim_pfm: 341.3549589559746
episode: 448 training return: tensor(296.1286, device='cuda:0')
episode: 449 training return: tensor(361.7083, device='cuda:0')
episode: 450 training return: tensor(102.0003, device='cuda:0')
episode: 451 training return: tensor(333.7937, device='cuda:0')
epoch: 113 test_true_pfm: 3398.2143197001265 sim_pfm: 349.11399784430006
episode: 452 training return: tensor(356.0519, device='cuda:0')
episode: 453 training return: tensor(377.8162, device='cuda:0')
episode: 454 training return: tensor(349.1776, device='cuda:0')
episode: 455 training return: tensor(352.1035, device='cuda:0')
epoch: 114 test_true_pfm: 3469.8363148670396 sim_pfm: 377.9871309612645
episode: 456 training return: tensor(330.1769, device='cuda:0')
episode: 457 training return: tensor(340.0271, device='cuda:0')
episode: 458 training return: tensor(345.9798, device='cuda:0')
episode: 459 training return: tensor(315.3003, device='cuda:0')
epoch: 115 test_true_pfm: 3440.93135800892 sim_pfm: 380.73703771154396
episode: 460 training return: tensor(354.7758, device='cuda:0')
episode: 461 training return: tensor(376.5505, device='cuda:0')
episode: 462 training return: tensor(266.9543, device='cuda:0')
episode: 463 training return: tensor(388.1611, device='cuda:0')
epoch: 116 test_true_pfm: 3494.62471026294 sim_pfm: 394.38130304496735
episode: 464 training return: tensor(327.2524, device='cuda:0')
episode: 465 training return: tensor(299.4361, device='cuda:0')
episode: 466 training return: tensor(336.7419, device='cuda:0')
episode: 467 training return: tensor(327.8331, device='cuda:0')
epoch: 117 test_true_pfm: 3427.5140160132555 sim_pfm: 344.93883823847864
episode: 468 training return: tensor(314.1142, device='cuda:0')
episode: 469 training return: tensor(331.9663, device='cuda:0')
episode: 470 training return: tensor(297.5825, device='cuda:0')
episode: 471 training return: tensor(292.9993, device='cuda:0')
epoch: 118 test_true_pfm: 3406.0650834431926 sim_pfm: 325.94738321123685
episode: 472 training return: tensor(376.9506, device='cuda:0')
episode: 473 training return: tensor(275.5558, device='cuda:0')
episode: 474 training return: tensor(243.3650, device='cuda:0')
episode: 475 training return: tensor(340.2132, device='cuda:0')
epoch: 119 test_true_pfm: 3401.6465574829926 sim_pfm: 382.1913146202084
episode: 476 training return: tensor(364.8769, device='cuda:0')
episode: 477 training return: tensor(340.9441, device='cuda:0')
episode: 478 training return: tensor(345.6216, device='cuda:0')
episode: 479 training return: tensor(374.6583, device='cuda:0')
epoch: 120 test_true_pfm: 3423.0587511367735 sim_pfm: 361.7068869350478
episode: 480 training return: tensor(358.9395, device='cuda:0')
episode: 481 training return: tensor(307.3436, device='cuda:0')
episode: 482 training return: tensor(300.8893, device='cuda:0')
episode: 483 training return: tensor(338.5736, device='cuda:0')
epoch: 121 test_true_pfm: 3462.9704982391013 sim_pfm: 354.37226368995226
episode: 484 training return: tensor(414.1280, device='cuda:0')
episode: 485 training return: tensor(340.4763, device='cuda:0')
episode: 486 training return: tensor(237.9037, device='cuda:0')
episode: 487 training return: tensor(327.0594, device='cuda:0')
epoch: 122 test_true_pfm: 3457.834943092935 sim_pfm: 350.94187370923464
episode: 488 training return: tensor(344.7386, device='cuda:0')
episode: 489 training return: tensor(323.2654, device='cuda:0')
episode: 490 training return: tensor(334.6868, device='cuda:0')
episode: 491 training return: tensor(324.1291, device='cuda:0')
epoch: 123 test_true_pfm: 3454.3503348509653 sim_pfm: 372.2200803554927
episode: 492 training return: tensor(316.1973, device='cuda:0')
episode: 493 training return: tensor(367.7076, device='cuda:0')
episode: 494 training return: tensor(341.1744, device='cuda:0')
episode: 495 training return: tensor(272.9754, device='cuda:0')
epoch: 124 test_true_pfm: 3448.311259745699 sim_pfm: 350.37940694814705
episode: 496 training return: tensor(384.5798, device='cuda:0')
episode: 497 training return: tensor(349.1126, device='cuda:0')
episode: 498 training return: tensor(330.3782, device='cuda:0')
episode: 499 training return: tensor(362.8339, device='cuda:0')
epoch: 125 test_true_pfm: 2731.9630655514816 sim_pfm: 383.86403690988664
episode: 500 training return: tensor(315.0948, device='cuda:0')
episode: 501 training return: tensor(349.3110, device='cuda:0')
episode: 502 training return: tensor(368.9156, device='cuda:0')
episode: 503 training return: tensor(373.8340, device='cuda:0')
epoch: 126 test_true_pfm: 3430.7237450846947 sim_pfm: 344.071221177311
episode: 504 training return: tensor(342.1771, device='cuda:0')
episode: 505 training return: tensor(299.3752, device='cuda:0')
episode: 506 training return: tensor(292.6324, device='cuda:0')
episode: 507 training return: tensor(285.5915, device='cuda:0')
epoch: 127 test_true_pfm: 3448.2591394493434 sim_pfm: 341.2500973595791
episode: 508 training return: tensor(311.7015, device='cuda:0')
episode: 509 training return: tensor(370.2951, device='cuda:0')
episode: 510 training return: tensor(321.1870, device='cuda:0')
episode: 511 training return: tensor(251.2901, device='cuda:0')
epoch: 128 test_true_pfm: 3445.612160836688 sim_pfm: 354.97279827153153
episode: 512 training return: tensor(73.6784, device='cuda:0')
episode: 513 training return: tensor(343.8230, device='cuda:0')
episode: 514 training return: tensor(338.1734, device='cuda:0')
episode: 515 training return: tensor(359.4466, device='cuda:0')
epoch: 129 test_true_pfm: 3394.9379335433473 sim_pfm: 315.9545677119556
episode: 516 training return: tensor(293.6116, device='cuda:0')
episode: 517 training return: tensor(311.3286, device='cuda:0')
episode: 518 training return: tensor(323.2131, device='cuda:0')
episode: 519 training return: tensor(338.2538, device='cuda:0')
epoch: 130 test_true_pfm: 3489.093982822991 sim_pfm: 390.37004958376446
episode: 520 training return: tensor(339.9781, device='cuda:0')
episode: 521 training return: tensor(324.1461, device='cuda:0')
episode: 522 training return: tensor(360.1295, device='cuda:0')
episode: 523 training return: tensor(350.1353, device='cuda:0')
epoch: 131 test_true_pfm: 3416.0918917142885 sim_pfm: 346.02774659661617
episode: 524 training return: tensor(311.5513, device='cuda:0')
episode: 525 training return: tensor(335.3943, device='cuda:0')
episode: 526 training return: tensor(354.8262, device='cuda:0')
episode: 527 training return: tensor(297.8632, device='cuda:0')
epoch: 132 test_true_pfm: 3407.3257966924134 sim_pfm: 366.93096949660685
episode: 528 training return: tensor(328.8473, device='cuda:0')
episode: 529 training return: tensor(315.8436, device='cuda:0')
episode: 530 training return: tensor(401.9601, device='cuda:0')
episode: 531 training return: tensor(336.4323, device='cuda:0')
epoch: 133 test_true_pfm: 3502.4596118178742 sim_pfm: 385.4104434567077
episode: 532 training return: tensor(362.9474, device='cuda:0')
episode: 533 training return: tensor(321.5144, device='cuda:0')
episode: 534 training return: tensor(336.4066, device='cuda:0')
episode: 535 training return: tensor(344.3539, device='cuda:0')
epoch: 134 test_true_pfm: 3424.474889839723 sim_pfm: 372.60704985017463
episode: 536 training return: tensor(320.9274, device='cuda:0')
episode: 537 training return: tensor(332.6095, device='cuda:0')
episode: 538 training return: tensor(345.1289, device='cuda:0')
episode: 539 training return: tensor(304.1915, device='cuda:0')
epoch: 135 test_true_pfm: 3435.8645064612883 sim_pfm: 346.45834005228244
episode: 540 training return: tensor(373.5409, device='cuda:0')
episode: 541 training return: tensor(303.2000, device='cuda:0')
episode: 542 training return: tensor(342.2664, device='cuda:0')
episode: 543 training return: tensor(294.3583, device='cuda:0')
epoch: 136 test_true_pfm: 3429.1498777587767 sim_pfm: 364.9491210036601
episode: 544 training return: tensor(356.3915, device='cuda:0')
episode: 545 training return: tensor(312.4486, device='cuda:0')
episode: 546 training return: tensor(379.4135, device='cuda:0')
episode: 547 training return: tensor(329.8111, device='cuda:0')
epoch: 137 test_true_pfm: 3424.9553746521565 sim_pfm: 369.3656972468986
episode: 548 training return: tensor(347.8008, device='cuda:0')
episode: 549 training return: tensor(358.2964, device='cuda:0')
episode: 550 training return: tensor(350.1820, device='cuda:0')
episode: 551 training return: tensor(351.4809, device='cuda:0')
epoch: 138 test_true_pfm: 3400.305405039846 sim_pfm: 342.2754982113256
episode: 552 training return: tensor(353.0073, device='cuda:0')
episode: 553 training return: tensor(328.2285, device='cuda:0')
episode: 554 training return: tensor(288.1784, device='cuda:0')
episode: 555 training return: tensor(328.6227, device='cuda:0')
epoch: 139 test_true_pfm: 3454.4818983098794 sim_pfm: 362.7651262911968
episode: 556 training return: tensor(287.7356, device='cuda:0')
episode: 557 training return: tensor(329.5752, device='cuda:0')
episode: 558 training return: tensor(309.1038, device='cuda:0')
episode: 559 training return: tensor(342.7981, device='cuda:0')
epoch: 140 test_true_pfm: 3410.973691185826 sim_pfm: 344.3136560227916
episode: 560 training return: tensor(335.5044, device='cuda:0')
episode: 561 training return: tensor(350.5708, device='cuda:0')
episode: 562 training return: tensor(365.8813, device='cuda:0')
episode: 563 training return: tensor(418.5534, device='cuda:0')
epoch: 141 test_true_pfm: 3430.2233909733845 sim_pfm: 376.9011651951393
episode: 564 training return: tensor(336.2702, device='cuda:0')
episode: 565 training return: tensor(327.2009, device='cuda:0')
episode: 566 training return: tensor(388.4466, device='cuda:0')
episode: 567 training return: tensor(325.7788, device='cuda:0')
epoch: 142 test_true_pfm: 3443.740813925804 sim_pfm: 359.3665634251859
episode: 568 training return: tensor(301.9830, device='cuda:0')
episode: 569 training return: tensor(347.2396, device='cuda:0')
episode: 570 training return: tensor(339.0791, device='cuda:0')
episode: 571 training return: tensor(378.2250, device='cuda:0')
epoch: 143 test_true_pfm: 3427.4559804968612 sim_pfm: 389.32025658791343
episode: 572 training return: tensor(379.7801, device='cuda:0')
episode: 573 training return: tensor(302.7409, device='cuda:0')
episode: 574 training return: tensor(364.6254, device='cuda:0')
episode: 575 training return: tensor(358.0741, device='cuda:0')
epoch: 144 test_true_pfm: 3451.511638355941 sim_pfm: 372.4032569850776
episode: 576 training return: tensor(365.0979, device='cuda:0')
episode: 577 training return: tensor(267.2285, device='cuda:0')
episode: 578 training return: tensor(321.4663, device='cuda:0')
episode: 579 training return: tensor(325.3091, device='cuda:0')
epoch: 145 test_true_pfm: 3414.618057073825 sim_pfm: 346.784544728968
episode: 580 training return: tensor(369.1801, device='cuda:0')
episode: 581 training return: tensor(352.3004, device='cuda:0')
episode: 582 training return: tensor(345.6376, device='cuda:0')
episode: 583 training return: tensor(333.3394, device='cuda:0')
epoch: 146 test_true_pfm: 3460.850970899051 sim_pfm: 380.6593333010581
episode: 584 training return: tensor(296.9874, device='cuda:0')
episode: 585 training return: tensor(354.7000, device='cuda:0')
episode: 586 training return: tensor(313.8788, device='cuda:0')
episode: 587 training return: tensor(335.9421, device='cuda:0')
epoch: 147 test_true_pfm: 3424.4390952916124 sim_pfm: 358.2505939610419
episode: 588 training return: tensor(309.3235, device='cuda:0')
episode: 589 training return: tensor(351.1928, device='cuda:0')
episode: 590 training return: tensor(360.7317, device='cuda:0')
episode: 591 training return: tensor(300.5559, device='cuda:0')
epoch: 148 test_true_pfm: 3443.5204330929932 sim_pfm: 362.723356137935
episode: 592 training return: tensor(320.3640, device='cuda:0')
episode: 593 training return: tensor(337.6253, device='cuda:0')
episode: 594 training return: tensor(345.3893, device='cuda:0')
episode: 595 training return: tensor(338.0770, device='cuda:0')
epoch: 149 test_true_pfm: 3463.8053052411174 sim_pfm: 366.3503296216174
episode: 596 training return: tensor(335.2306, device='cuda:0')
episode: 597 training return: tensor(313.7447, device='cuda:0')
episode: 598 training return: tensor(353.9335, device='cuda:0')
episode: 599 training return: tensor(326.0413, device='cuda:0')
epoch: 150 test_true_pfm: 3456.6303622867854 sim_pfm: 396.6801017993809
