['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '2', '--data', '3000', '--sub']
epoch: 0 training_loss 0.2683287401497364 test_loss: 0.08445010781288147
epoch: 1 training_loss 0.15122542075812817 test_loss: 0.08243948817253113
epoch: 2 training_loss 0.11937694720923901 test_loss: 0.07243677377700805
epoch: 3 training_loss 0.1115473004616797 test_loss: 0.07545973062515259
epoch: 4 training_loss 0.09797471687197686 test_loss: 0.06687098741531372
epoch: 5 training_loss 0.10822092700749636 test_loss: 0.06380265355110168
epoch: 6 training_loss 0.09687685992568731 test_loss: 0.07063581347465515
epoch: 7 training_loss 0.09361238177865744 test_loss: 0.06568676829338074
epoch: 8 training_loss 0.09129955433309078 test_loss: 0.06153032183647156
epoch: 9 training_loss 0.08386925864964724 test_loss: 0.06262483596801757
epoch: 10 training_loss 0.0875715883821249 test_loss: 0.07668909430503845
epoch: 11 training_loss 0.07959868105128408 test_loss: 0.06429938077926636
epoch: 12 training_loss 0.07499462863430381 test_loss: 0.06682555079460144
epoch: 13 training_loss 0.08288286008872092 test_loss: 0.06902240514755249
epoch: 14 training_loss 0.07562970781698823 test_loss: 0.0702441692352295
epoch: 15 training_loss 0.07826906686648727 test_loss: 0.0689759910106659
epoch: 16 training_loss 0.07075680242851376 test_loss: 0.07050459384918213
epoch: 17 training_loss 0.06829371551051736 test_loss: 0.06664619445800782
epoch: 18 training_loss 0.07385830909013748 test_loss: 0.06947259902954102
epoch: 19 training_loss 0.06925535055808724 test_loss: 0.06539577841758729
epoch: 20 training_loss 0.06729285703971982 test_loss: 0.06563026905059814
epoch: 21 training_loss 0.068102733977139 test_loss: 0.0718231201171875
epoch: 22 training_loss 0.06445951018482447 test_loss: 0.0740475594997406
epoch: 23 training_loss 0.06505972480401397 test_loss: 0.06719436049461365
epoch: 24 training_loss 0.05489178331568837 test_loss: 0.06670576333999634
epoch: 25 training_loss 0.05611785295419395 test_loss: 0.07200773954391479
epoch: 26 training_loss 0.05821589204482734 test_loss: 0.07182668447494507
epoch: 27 training_loss 0.057447507213801145 test_loss: 0.06765751838684082
epoch: 28 training_loss 0.060490179480984806 test_loss: 0.08828514218330383
epoch: 29 training_loss 0.06133931821212173 test_loss: 0.07413744926452637
epoch: 30 training_loss 0.05269999925047159 test_loss: 0.0742956280708313
epoch: 31 training_loss 0.052973863193765285 test_loss: 0.07283040285110473
epoch: 32 training_loss 0.05019664184190333 test_loss: 0.07693362236022949
epoch: 33 training_loss 0.051358270752243695 test_loss: 0.08130428194999695
epoch: 34 training_loss 0.052602728586643936 test_loss: 0.08695350289344787
epoch: 35 training_loss 0.04969597788527608 test_loss: 0.08101316690444946
epoch: 36 training_loss 0.04276660954579711 test_loss: 0.07832546234130859
epoch: 37 training_loss 0.04383169944398105 test_loss: 0.08141334652900696
epoch: 38 training_loss 0.04679040286689997 test_loss: 0.08050971031188965
epoch: 39 training_loss 0.051031771628186104 test_loss: 0.07740598917007446
epoch: 40 training_loss 0.04700053442269564 test_loss: 0.10070978403091431
epoch: 41 training_loss 0.04579020139761269 test_loss: 0.09208890199661254
epoch: 42 training_loss 0.048304555956274274 test_loss: 0.10582566261291504
epoch: 43 training_loss 0.051497983266599476 test_loss: 0.08442662954330445
epoch: 44 training_loss 0.046732507720589636 test_loss: 0.08316378593444824
epoch: 45 training_loss 0.04906917593907565 test_loss: 0.07337820529937744
epoch: 46 training_loss 0.039794588284567 test_loss: 0.0803219735622406
epoch: 47 training_loss 0.042193749267607925 test_loss: 0.07389870882034302
epoch: 48 training_loss 0.04152498187497258 test_loss: 0.08416063189506531
epoch: 49 training_loss 0.040110995788127185 test_loss: 0.0805768072605133
epoch: 50 training_loss 0.04028628895059228 test_loss: 0.07636905908584594
epoch: 51 training_loss 0.037887182673439386 test_loss: 0.08151673078536988
epoch: 52 training_loss 0.0392453177087009 test_loss: 0.08134644031524658
epoch: 53 training_loss 0.03487661484628916 test_loss: 0.08396151065826415
epoch: 54 training_loss 0.04011716529261321 test_loss: 0.07923292517662048
epoch: 55 training_loss 0.038533943318761886 test_loss: 0.08200007081031799
epoch: 56 training_loss 0.03777065018657595 test_loss: 0.08875939846038819
epoch: 57 training_loss 0.03826820673653856 test_loss: 0.0757591962814331
epoch: 58 training_loss 0.033925178777426485 test_loss: 0.08514995574951172
epoch: 59 training_loss 0.030768089415505528 test_loss: 0.08465647101402282
epoch: 60 training_loss 0.03241315156687051 test_loss: 0.08680051565170288
epoch: 61 training_loss 0.03473964275792241 test_loss: 0.09822441935539246
epoch: 62 training_loss 0.0295927015831694 test_loss: 0.09011611938476563
epoch: 63 training_loss 0.03401991250924766 test_loss: 0.10397253036499024
epoch: 64 training_loss 0.033475201036781074 test_loss: 0.08564433455467224
epoch: 65 training_loss 0.03058454383397475 test_loss: 0.09459555149078369
epoch: 66 training_loss 0.03202246666885913 test_loss: 0.08494728207588195
epoch: 67 training_loss 0.03056930810213089 test_loss: 0.09774702191352844
epoch: 68 training_loss 0.03762384930625558 test_loss: 0.09420177936553956
epoch: 69 training_loss 0.031050544111058117 test_loss: 0.08266152739524842
epoch: 70 training_loss 0.03132176887476817 test_loss: 0.08337181806564331
epoch: 71 training_loss 0.030117287975735964 test_loss: 0.09717203974723816
epoch: 72 training_loss 0.02566897899378091 test_loss: 0.10133097171783448
epoch: 73 training_loss 0.02860182066448033 test_loss: 0.08391306400299073
epoch: 74 training_loss 0.029864158856216818 test_loss: 0.09051057696342468
epoch: 75 training_loss 0.028669816930778323 test_loss: 0.09255035519599915
epoch: 76 training_loss 0.025361243369989097 test_loss: 0.09641668796539307
epoch: 77 training_loss 0.028082251898013055 test_loss: 0.08562365770339966
epoch: 78 training_loss 0.025380483381450176 test_loss: 0.09185912609100341
epoch: 79 training_loss 0.021908258888870477 test_loss: 0.08952709436416625
epoch: 80 training_loss 0.02737900771200657 test_loss: 0.09083210825920104
epoch: 81 training_loss 0.023727540816180406 test_loss: 0.08724401593208313
epoch: 82 training_loss 0.020365830063819885 test_loss: 0.0959233283996582
epoch: 83 training_loss 0.021887220062781126 test_loss: 0.09233648777008056
epoch: 84 training_loss 0.026083763083443047 test_loss: 0.09486817717552185
epoch: 85 training_loss 0.026653555212542413 test_loss: 0.09748769998550415
epoch: 86 training_loss 0.019320272435434164 test_loss: 0.0961637794971466
epoch: 87 training_loss 0.022514186582993714 test_loss: 0.09639328718185425
epoch: 88 training_loss 0.02339759175432846 test_loss: 0.09472227692604065
epoch: 89 training_loss 0.017274118361528965 test_loss: 0.09571475982666015
epoch: 90 training_loss 0.018967178743332626 test_loss: 0.09567140340805054
epoch: 91 training_loss 0.026261202753521503 test_loss: 0.12239633798599243
epoch: 92 training_loss 0.02578918809071183 test_loss: 0.09050742387771607
epoch: 93 training_loss 0.017228047298267483 test_loss: 0.0944103717803955
epoch: 94 training_loss 0.019598229681141676 test_loss: 0.10555776357650756
epoch: 95 training_loss 0.016626372863538564 test_loss: 0.10109280347824097
epoch: 96 training_loss 0.017056967592798174 test_loss: 0.09895139932632446
epoch: 97 training_loss 0.029289579442702232 test_loss: 0.09926409125328065
epoch: 98 training_loss 0.02325237247161567 test_loss: 0.10420533418655395
epoch: 99 training_loss 0.0193130332743749 test_loss: 0.1027742862701416
epoch: 100 training_loss 0.01650957716163248 test_loss: 0.08963283896446228
epoch: 101 training_loss 0.01920825716573745 test_loss: 0.0956373929977417
epoch: 102 training_loss 0.01834035010077059 test_loss: 0.10754055976867676
epoch: 103 training_loss 0.013491575515363365 test_loss: 0.10199173688888549
epoch: 104 training_loss 0.015583237735554575 test_loss: 0.09841351509094239
epoch: 105 training_loss 0.01237483988981694 test_loss: 0.10361402034759522
epoch: 106 training_loss 0.019589512129314243 test_loss: 0.1037736177444458
epoch: 107 training_loss 0.019027235070243478 test_loss: 0.10346485376358032
epoch: 108 training_loss 0.033913129911525175 test_loss: 0.10919437408447266
epoch: 109 training_loss 0.028752052020281554 test_loss: 0.10369864702224732
epoch: 110 training_loss 0.014500037911348044 test_loss: 0.10164412260055541
epoch: 111 training_loss 0.012883460049051791 test_loss: 0.10391802787780761
epoch: 112 training_loss 0.011419398677535355 test_loss: 0.09176402688026428
epoch: 113 training_loss 0.011748583745211363 test_loss: 0.10222233533859253
epoch: 114 training_loss 0.013564379347953945 test_loss: 0.11292208433151245
epoch: 115 training_loss 0.01334845436969772 test_loss: 0.10415931940078735
epoch: 116 training_loss 0.011358083807863295 test_loss: 0.10718454122543335
epoch: 117 training_loss 0.009295522514730693 test_loss: 0.10163419246673584
epoch: 118 training_loss 0.009725205516442657 test_loss: 0.0972456693649292
epoch: 119 training_loss 0.01091699417331256 test_loss: 0.10095789432525634
epoch: 120 training_loss 0.014055493855848908 test_loss: 0.10552082061767579
epoch: 121 training_loss 0.011838595712324604 test_loss: 0.1083789587020874
epoch: 122 training_loss 0.012318993932567537 test_loss: 0.10728509426116943
epoch: 123 training_loss 0.013441675167996436 test_loss: 0.105598783493042
epoch: 124 training_loss 0.0114682037406601 test_loss: 0.10094932317733765
epoch: 125 training_loss 0.009698862622026355 test_loss: 0.10646220445632934
epoch: 126 training_loss 0.01014087749645114 test_loss: 0.10483787059783936
epoch: 127 training_loss 0.011120969038456678 test_loss: 0.10657765865325927
epoch: 128 training_loss 0.007734274682588875 test_loss: 0.11828764677047729
epoch: 129 training_loss 0.008518517321208492 test_loss: 0.11154526472091675
epoch: 130 training_loss 0.01144540750188753 test_loss: 0.10580410957336425
epoch: 131 training_loss 0.00921894753118977 test_loss: 0.1095012068748474
epoch: 132 training_loss 0.008030258873477579 test_loss: 0.11430909633636474
epoch: 133 training_loss 0.009574749395251275 test_loss: 0.11431150436401367
epoch: 134 training_loss 0.008226434969110415 test_loss: 0.113775634765625
epoch: 135 training_loss 0.006806351597188041 test_loss: 0.11432520151138306
epoch: 136 training_loss 0.007754764133132994 test_loss: 0.1115530252456665
epoch: 137 training_loss 0.009153741094050928 test_loss: 0.11486649513244629
epoch: 138 training_loss 0.012772488172631711 test_loss: 0.11542295217514038
epoch: 139 training_loss 0.04588525042287074 test_loss: 0.1003071904182434
epoch: 140 training_loss 0.03954333921661601 test_loss: 0.11481989622116089
epoch: 141 training_loss 0.017952872395981104 test_loss: 0.09590741395950317
epoch: 142 training_loss 0.01169767635408789 test_loss: 0.11133828163146972
epoch: 143 training_loss 0.00784698965959251 test_loss: 0.10371519327163696
epoch: 144 training_loss 0.006717322649783455 test_loss: 0.1214533805847168
epoch: 145 training_loss 0.007235012822784484 test_loss: 0.11160789728164673
epoch: 146 training_loss 0.007197820848086849 test_loss: 0.12459995746612548
epoch: 147 training_loss 0.006835799885448068 test_loss: 0.12150387763977051
epoch: 148 training_loss 0.006470750067383051 test_loss: 0.11534401178359985
epoch: 149 training_loss 0.00674003227497451 test_loss: 0.12376776933670045
epoch: 0 training_loss 39.41450077056885 test_loss: 9.908866882324219
epoch: 1 training_loss 16.633805112838743 test_loss: 6.50113525390625
epoch: 2 training_loss 12.02817045211792 test_loss: 5.2352336883544925
epoch: 3 training_loss 9.916667656898499 test_loss: 4.518639373779297
epoch: 4 training_loss 8.796783061027527 test_loss: 4.046787261962891
epoch: 5 training_loss 7.897425131797791 test_loss: 3.728944778442383
epoch: 6 training_loss 7.2351675748825075 test_loss: 3.4573841094970703
epoch: 7 training_loss 6.793970308303833 test_loss: 3.269661712646484
epoch: 8 training_loss 6.384638776779175 test_loss: 3.162167549133301
epoch: 9 training_loss 6.147271537780762 test_loss: 2.9353044509887694
epoch: 10 training_loss 5.776637105941773 test_loss: 2.8402856826782226
epoch: 11 training_loss 5.334091849327088 test_loss: 2.704641914367676
epoch: 12 training_loss 5.282446060180664 test_loss: 2.5955657958984375
epoch: 13 training_loss 5.091940937042236 test_loss: 2.5129064559936523
epoch: 14 training_loss 4.82521874666214 test_loss: 2.4343164443969725
epoch: 15 training_loss 4.806081283092499 test_loss: 2.3680742263793944
epoch: 16 training_loss 4.570836448669434 test_loss: 2.3223588943481444
epoch: 17 training_loss 4.426919536590576 test_loss: 2.2748573303222654
epoch: 18 training_loss 4.3732568287849425 test_loss: 2.2829872131347657
epoch: 19 training_loss 4.220014386177063 test_loss: 2.1447582244873047
epoch: 20 training_loss 4.165957238674164 test_loss: 2.1260448455810548
epoch: 21 training_loss 4.0043280529975895 test_loss: 2.1050788879394533
epoch: 22 training_loss 3.9642796397209166 test_loss: 2.0360618591308595
epoch: 23 training_loss 3.903114025592804 test_loss: 2.009347915649414
epoch: 24 training_loss 3.846031913757324 test_loss: 2.0150270462036133
epoch: 25 training_loss 3.674175567626953 test_loss: 1.9428749084472656
epoch: 26 training_loss 3.659855923652649 test_loss: 1.9468912124633788
epoch: 27 training_loss 3.532041645050049 test_loss: 1.912973976135254
epoch: 28 training_loss 3.61296049118042 test_loss: 1.9273103713989257
epoch: 29 training_loss 3.4576895403862 test_loss: 1.8459281921386719
epoch: 30 training_loss 3.456291444301605 test_loss: 1.8439252853393555
epoch: 31 training_loss 3.3715596342086793 test_loss: 1.8128582000732423
epoch: 32 training_loss 3.372370271682739 test_loss: 1.8086790084838866
epoch: 33 training_loss 3.3501301765441895 test_loss: 1.7826396942138671
epoch: 34 training_loss 3.3053453826904295 test_loss: 1.7680456161499023
epoch: 35 training_loss 3.2769037771224974 test_loss: 1.7721803665161133
epoch: 36 training_loss 3.191172602176666 test_loss: 1.742569923400879
epoch: 37 training_loss 3.1490891289711 test_loss: 1.7186777114868164
epoch: 38 training_loss 3.113326313495636 test_loss: 1.6904373168945312
epoch: 39 training_loss 3.096329913139343 test_loss: 1.6960199356079102
epoch: 40 training_loss 2.9979844999313356 test_loss: 1.672511672973633
epoch: 41 training_loss 3.0053521013259887 test_loss: 1.6631420135498047
epoch: 42 training_loss 2.9584768056869506 test_loss: 1.643747901916504
epoch: 43 training_loss 2.9988877081871035 test_loss: 1.6237634658813476
epoch: 44 training_loss 2.9008284187316895 test_loss: 1.617093276977539
epoch: 45 training_loss 2.879636344909668 test_loss: 1.633127784729004
epoch: 46 training_loss 2.8467338490486145 test_loss: 1.5834546089172363
epoch: 47 training_loss 2.825432958602905 test_loss: 1.5780668258666992
epoch: 48 training_loss 2.801125717163086 test_loss: 1.5720973014831543
epoch: 49 training_loss 2.806889207363129 test_loss: 1.5728952407836914
epoch: 50 training_loss 2.7792460203170775 test_loss: 1.550215721130371
epoch: 51 training_loss 2.7619079399108886 test_loss: 1.570274257659912
epoch: 52 training_loss 2.6773777174949647 test_loss: 1.5332229614257813
epoch: 53 training_loss 2.755593466758728 test_loss: 1.5180331230163575
epoch: 54 training_loss 2.6579073762893675 test_loss: 1.5278719902038573
epoch: 55 training_loss 2.686124472618103 test_loss: 1.54221773147583
epoch: 56 training_loss 2.6450066590309143 test_loss: 1.521290111541748
epoch: 57 training_loss 2.6052839374542236 test_loss: 1.5065781593322753
epoch: 58 training_loss 2.6183632969856263 test_loss: 1.4981063842773437
epoch: 59 training_loss 2.6172835493087767 test_loss: 1.4633495330810546
epoch: 60 training_loss 2.5677659630775453 test_loss: 1.4727446556091308
epoch: 61 training_loss 2.577576358318329 test_loss: 1.4801800727844239
epoch: 62 training_loss 2.543233563899994 test_loss: 1.4595458984375
epoch: 63 training_loss 2.576638355255127 test_loss: 1.4647239685058593
epoch: 64 training_loss 2.5091340911388396 test_loss: 1.4531967163085937
epoch: 65 training_loss 2.518833656311035 test_loss: 1.4360466003417969
epoch: 66 training_loss 2.532320249080658 test_loss: 1.4386957168579102
epoch: 67 training_loss 2.5392329788208006 test_loss: 1.4211441040039063
epoch: 68 training_loss 2.4926869320869445 test_loss: 1.4762937545776367
epoch: 69 training_loss 2.463221930265427 test_loss: 1.4071651458740235
epoch: 70 training_loss 2.4749046671390533 test_loss: 1.4388287544250489
epoch: 71 training_loss 2.4900387787818907 test_loss: 1.4160951614379882
epoch: 72 training_loss 2.444428811073303 test_loss: 1.4198122024536133
epoch: 73 training_loss 2.436228892803192 test_loss: 1.394934844970703
epoch: 74 training_loss 2.351744688749313 test_loss: 1.3970495223999024
epoch: 75 training_loss 2.394645417928696 test_loss: 1.3785626411437988
epoch: 76 training_loss 2.349298813343048 test_loss: 1.4162200927734374
epoch: 77 training_loss 2.4047124457359312 test_loss: 1.3824331283569335
epoch: 78 training_loss 2.3608948862552643 test_loss: 1.3894951820373536
epoch: 79 training_loss 2.384448273181915 test_loss: 1.3557515144348145
epoch: 80 training_loss 2.335812040567398 test_loss: 1.3847712516784667
epoch: 81 training_loss 2.3389047849178315 test_loss: 1.36697998046875
epoch: 82 training_loss 2.298502650260925 test_loss: 1.3701488494873046
epoch: 83 training_loss 2.3220906221866606 test_loss: 1.3613374710083008
epoch: 84 training_loss 2.275894201993942 test_loss: 1.3524563789367676
epoch: 85 training_loss 2.352974096536636 test_loss: 1.3481420516967773
epoch: 86 training_loss 2.2943140029907227 test_loss: 1.3614299774169922
epoch: 87 training_loss 2.2979390597343445 test_loss: 1.3370915412902833
epoch: 88 training_loss 2.2949810683727265 test_loss: 1.3402793884277344
epoch: 89 training_loss 2.267296335697174 test_loss: 1.335592555999756
epoch: 90 training_loss 2.2937499594688417 test_loss: 1.3405937194824218
epoch: 91 training_loss 2.249816709756851 test_loss: 1.3371170997619628
epoch: 92 training_loss 2.240014811754227 test_loss: 1.3525670051574707
epoch: 93 training_loss 2.25288237452507 test_loss: 1.339416790008545
epoch: 94 training_loss 2.2410058224201204 test_loss: 1.3323967933654786
epoch: 95 training_loss 2.2357617616653442 test_loss: 1.3268343925476074
epoch: 96 training_loss 2.2272839379310607 test_loss: 1.3179137229919433
epoch: 97 training_loss 2.202809064388275 test_loss: 1.3062260627746582
epoch: 98 training_loss 2.251490309238434 test_loss: 1.294937515258789
epoch: 99 training_loss 2.2161484372615816 test_loss: 1.302910614013672
epoch: 100 training_loss 2.229052563905716 test_loss: 1.313899040222168
epoch: 101 training_loss 2.1667325854301454 test_loss: 1.3127434730529786
epoch: 102 training_loss 2.1671227335929872 test_loss: 1.314937210083008
epoch: 103 training_loss 2.198628101348877 test_loss: 1.3060710906982422
epoch: 104 training_loss 2.150233987569809 test_loss: 1.2967376708984375
epoch: 105 training_loss 2.1519627463817597 test_loss: 1.283674430847168
epoch: 106 training_loss 2.169076682329178 test_loss: 1.2886963844299317
epoch: 107 training_loss 2.163138039112091 test_loss: 1.296457004547119
epoch: 108 training_loss 2.132280778884888 test_loss: 1.283888816833496
epoch: 109 training_loss 2.1519171631336214 test_loss: 1.3072357177734375
epoch: 110 training_loss 2.1657979452610014 test_loss: 1.3645658493041992
epoch: 111 training_loss 2.1398022162914274 test_loss: 1.262076473236084
epoch: 112 training_loss 2.1434196150302887 test_loss: 1.2797968864440918
epoch: 113 training_loss 2.1253011095523835 test_loss: 1.273359775543213
epoch: 114 training_loss 2.1359810864925386 test_loss: 1.2599390983581542
epoch: 115 training_loss 2.0810635828971864 test_loss: 1.2794017791748047
epoch: 116 training_loss 2.1363082695007325 test_loss: 1.2983850479125976
epoch: 117 training_loss 2.1135168993473052 test_loss: 1.2692543983459472
epoch: 118 training_loss 2.143616291284561 test_loss: 1.262152099609375
epoch: 119 training_loss 2.077433776855469 test_loss: 1.2587371826171876
epoch: 120 training_loss 2.052414982318878 test_loss: 1.2414097785949707
epoch: 121 training_loss 2.0381954419612884 test_loss: 1.2488250732421875
epoch: 122 training_loss 2.1300557446479798 test_loss: 1.261117935180664
epoch: 123 training_loss 2.079974853992462 test_loss: 1.2490636825561523
epoch: 124 training_loss 2.085702842473984 test_loss: 1.26436128616333
epoch: 125 training_loss 2.027944713830948 test_loss: 1.2420946121215821
epoch: 126 training_loss 2.049553607702255 test_loss: 1.2352962493896484
epoch: 127 training_loss 2.055524673461914 test_loss: 1.2540937423706056
epoch: 128 training_loss 2.0728327906131745 test_loss: 1.240996742248535
epoch: 129 training_loss 2.052867559194565 test_loss: 1.2259199142456054
epoch: 130 training_loss 2.061711587905884 test_loss: 1.2377918243408204
epoch: 131 training_loss 2.001191953420639 test_loss: 1.2462678909301759
epoch: 132 training_loss 2.012108329534531 test_loss: 1.2244604110717774
epoch: 133 training_loss 2.0241454887390136 test_loss: 1.237189483642578
epoch: 134 training_loss 2.0456850230693817 test_loss: 1.222167682647705
epoch: 135 training_loss 1.9940528523921968 test_loss: 1.24223051071167
epoch: 136 training_loss 2.031784049272537 test_loss: 1.2125158309936523
epoch: 137 training_loss 1.9764250576496125 test_loss: 1.219446563720703
epoch: 138 training_loss 2.019700245857239 test_loss: 1.224284839630127
epoch: 139 training_loss 1.992724862098694 test_loss: 1.2372372627258301
epoch: 140 training_loss 2.024732449054718 test_loss: 1.2298181533813477
epoch: 141 training_loss 2.0311304104328154 test_loss: 1.2327728271484375
epoch: 142 training_loss 2.048171429634094 test_loss: 1.2286154747009277
epoch: 143 training_loss 1.976505870819092 test_loss: 1.2105509757995605
epoch: 144 training_loss 1.985110070705414 test_loss: 1.2674912452697753
epoch: 145 training_loss 2.026571280956268 test_loss: 1.2305921554565429
epoch: 146 training_loss 2.029273512363434 test_loss: 1.2304449081420898
epoch: 147 training_loss 1.9972257649898528 test_loss: 1.2433737754821776
epoch: 148 training_loss 1.9810092341899872 test_loss: 1.2153680801391602
epoch: 149 training_loss 1.9791405260562898 test_loss: 1.2049410820007325
2805.3428137389383
episode: 0 training return: tensor(74.1376, device='cuda:0')
episode: 1 training return: tensor(29.8693, device='cuda:0')
episode: 2 training return: tensor(-246.1591, device='cuda:0')
episode: 3 training return: tensor(-405.4452, device='cuda:0')
epoch: 1 test_true_pfm: 3317.975740910639 sim_pfm: -131.1011257802214
episode: 4 training return: tensor(-131.5234, device='cuda:0')
episode: 5 training return: tensor(-71.9546, device='cuda:0')
episode: 6 training return: tensor(87.1018, device='cuda:0')
episode: 7 training return: tensor(195.4570, device='cuda:0')
epoch: 2 test_true_pfm: 3339.5115824670625 sim_pfm: 84.77640103570108
episode: 8 training return: tensor(-324.2694, device='cuda:0')
episode: 9 training return: tensor(-160.2509, device='cuda:0')
episode: 10 training return: tensor(81.8210, device='cuda:0')
episode: 11 training return: tensor(205.6248, device='cuda:0')
epoch: 3 test_true_pfm: 3422.3724276875655 sim_pfm: 175.81959297818443
episode: 12 training return: tensor(147.1972, device='cuda:0')
episode: 13 training return: tensor(93.6540, device='cuda:0')
episode: 14 training return: tensor(-438.2271, device='cuda:0')
episode: 15 training return: tensor(105.2520, device='cuda:0')
epoch: 4 test_true_pfm: 2922.4323500973546 sim_pfm: 100.3694634626833
episode: 16 training return: tensor(141.4434, device='cuda:0')
episode: 17 training return: tensor(91.1956, device='cuda:0')
episode: 18 training return: tensor(131.1918, device='cuda:0')
episode: 19 training return: tensor(-32.5348, device='cuda:0')
epoch: 5 test_true_pfm: 3373.3977735955814 sim_pfm: -77.48905918052576
episode: 20 training return: tensor(-238.0194, device='cuda:0')
episode: 21 training return: tensor(67.8100, device='cuda:0')
episode: 22 training return: tensor(172.2690, device='cuda:0')
episode: 23 training return: tensor(-171.8573, device='cuda:0')
epoch: 6 test_true_pfm: 3343.1095847271877 sim_pfm: 131.35919812351736
episode: 24 training return: tensor(71.2486, device='cuda:0')
episode: 25 training return: tensor(-315.3078, device='cuda:0')
episode: 26 training return: tensor(-107.4114, device='cuda:0')
episode: 27 training return: tensor(-290.7422, device='cuda:0')
epoch: 7 test_true_pfm: 2941.102207498814 sim_pfm: 26.79681034688838
episode: 28 training return: tensor(-294.8467, device='cuda:0')
episode: 29 training return: tensor(186.9489, device='cuda:0')
episode: 30 training return: tensor(-39.4378, device='cuda:0')
episode: 31 training return: tensor(55.4708, device='cuda:0')
epoch: 8 test_true_pfm: 2439.448063280668 sim_pfm: -127.24301890392478
episode: 32 training return: tensor(-188.4963, device='cuda:0')
episode: 33 training return: tensor(-282.5388, device='cuda:0')
episode: 34 training return: tensor(-262.9195, device='cuda:0')
episode: 35 training return: tensor(105.7281, device='cuda:0')
epoch: 9 test_true_pfm: 2965.8048983108365 sim_pfm: -167.4421298300537
episode: 36 training return: tensor(168.7156, device='cuda:0')
episode: 37 training return: tensor(57.1625, device='cuda:0')
episode: 38 training return: tensor(303.0870, device='cuda:0')
episode: 39 training return: tensor(-312.8704, device='cuda:0')
epoch: 10 test_true_pfm: 3325.4362158327353 sim_pfm: 51.59709446275762
episode: 40 training return: tensor(74.4057, device='cuda:0')
episode: 41 training return: tensor(-235.0764, device='cuda:0')
episode: 42 training return: tensor(214.6552, device='cuda:0')
episode: 43 training return: tensor(101.0882, device='cuda:0')
epoch: 11 test_true_pfm: 2893.8902805497905 sim_pfm: -186.7779716963414
episode: 44 training return: tensor(152.2133, device='cuda:0')
episode: 45 training return: tensor(-277.7686, device='cuda:0')
episode: 46 training return: tensor(53.9082, device='cuda:0')
episode: 47 training return: tensor(6.3949, device='cuda:0')
epoch: 12 test_true_pfm: 2757.496424186857 sim_pfm: 102.43759940977907
episode: 48 training return: tensor(-351.8018, device='cuda:0')
episode: 49 training return: tensor(-140.8482, device='cuda:0')
episode: 50 training return: tensor(43.0979, device='cuda:0')
episode: 51 training return: tensor(-257.5603, device='cuda:0')
epoch: 13 test_true_pfm: 3184.1650728074897 sim_pfm: 90.30518888651083
episode: 52 training return: tensor(56.8438, device='cuda:0')
episode: 53 training return: tensor(233.5257, device='cuda:0')
episode: 54 training return: tensor(-169.1469, device='cuda:0')
episode: 55 training return: tensor(107.2159, device='cuda:0')
epoch: 14 test_true_pfm: 2980.9135297454814 sim_pfm: 116.68266762549563
episode: 56 training return: tensor(52.3153, device='cuda:0')
episode: 57 training return: tensor(-100.3613, device='cuda:0')
episode: 58 training return: tensor(-388.0764, device='cuda:0')
episode: 59 training return: tensor(181.7160, device='cuda:0')
epoch: 15 test_true_pfm: 2397.3104407726837 sim_pfm: -39.15067329032657
episode: 60 training return: tensor(219.5143, device='cuda:0')
episode: 61 training return: tensor(44.5292, device='cuda:0')
episode: 62 training return: tensor(-366.1007, device='cuda:0')
episode: 63 training return: tensor(-274.2220, device='cuda:0')
epoch: 16 test_true_pfm: 2925.277429058395 sim_pfm: 30.66437731089536
episode: 64 training return: tensor(-298.5612, device='cuda:0')
episode: 65 training return: tensor(-66.7080, device='cuda:0')
episode: 66 training return: tensor(-182.4365, device='cuda:0')
episode: 67 training return: tensor(17.6885, device='cuda:0')
epoch: 17 test_true_pfm: 3119.2954761224137 sim_pfm: -72.202671111833
episode: 68 training return: tensor(38.9649, device='cuda:0')
episode: 69 training return: tensor(-384.0157, device='cuda:0')
episode: 70 training return: tensor(33.5996, device='cuda:0')
episode: 71 training return: tensor(-71.7240, device='cuda:0')
epoch: 18 test_true_pfm: 3210.014463256315 sim_pfm: 91.61805080548704
episode: 72 training return: tensor(109.5349, device='cuda:0')
episode: 73 training return: tensor(197.5333, device='cuda:0')
episode: 74 training return: tensor(-76.4898, device='cuda:0')
episode: 75 training return: tensor(-176.5542, device='cuda:0')
epoch: 19 test_true_pfm: 2919.321140711591 sim_pfm: 95.33055496863865
episode: 76 training return: tensor(-132.6545, device='cuda:0')
episode: 77 training return: tensor(184.0112, device='cuda:0')
episode: 78 training return: tensor(-163.4065, device='cuda:0')
episode: 79 training return: tensor(172.7389, device='cuda:0')
epoch: 20 test_true_pfm: 3360.146524297774 sim_pfm: 50.10577067352521
episode: 80 training return: tensor(106.8427, device='cuda:0')
episode: 81 training return: tensor(114.1434, device='cuda:0')
episode: 82 training return: tensor(-254.5603, device='cuda:0')
episode: 83 training return: tensor(140.2159, device='cuda:0')
epoch: 21 test_true_pfm: 3390.7730637717254 sim_pfm: 148.96306807225724
episode: 84 training return: tensor(-47.9205, device='cuda:0')
episode: 85 training return: tensor(-1.6877, device='cuda:0')
episode: 86 training return: tensor(-275.8096, device='cuda:0')
episode: 87 training return: tensor(72.7724, device='cuda:0')
epoch: 22 test_true_pfm: 2782.483985626585 sim_pfm: 21.452211709392333
episode: 88 training return: tensor(156.4763, device='cuda:0')
episode: 89 training return: tensor(44.4651, device='cuda:0')
episode: 90 training return: tensor(123.8292, device='cuda:0')
episode: 91 training return: tensor(178.3901, device='cuda:0')
epoch: 23 test_true_pfm: 3387.6640273864746 sim_pfm: 179.54564368593856
episode: 92 training return: tensor(147.6758, device='cuda:0')
episode: 93 training return: tensor(-97.5279, device='cuda:0')
episode: 94 training return: tensor(-254.5528, device='cuda:0')
episode: 95 training return: tensor(242.6320, device='cuda:0')
epoch: 24 test_true_pfm: 3181.7261513150515 sim_pfm: -9.960594529053196
episode: 96 training return: tensor(55.2953, device='cuda:0')
episode: 97 training return: tensor(-482.7865, device='cuda:0')
episode: 98 training return: tensor(68.2922, device='cuda:0')
episode: 99 training return: tensor(88.6161, device='cuda:0')
epoch: 25 test_true_pfm: 3233.4552515792807 sim_pfm: -136.31860203753845
episode: 100 training return: tensor(84.8076, device='cuda:0')
episode: 101 training return: tensor(150.8698, device='cuda:0')
episode: 102 training return: tensor(86.4743, device='cuda:0')
episode: 103 training return: tensor(73.3507, device='cuda:0')
epoch: 26 test_true_pfm: 3386.9424576297956 sim_pfm: 65.38309508057621
episode: 104 training return: tensor(-314.9896, device='cuda:0')
episode: 105 training return: tensor(129.8961, device='cuda:0')
episode: 106 training return: tensor(147.5749, device='cuda:0')
episode: 107 training return: tensor(64.7346, device='cuda:0')
epoch: 27 test_true_pfm: 3458.898212954671 sim_pfm: 202.13540152997788
episode: 108 training return: tensor(94.1073, device='cuda:0')
episode: 109 training return: tensor(-182.0657, device='cuda:0')
episode: 110 training return: tensor(-19.7015, device='cuda:0')
episode: 111 training return: tensor(139.0742, device='cuda:0')
epoch: 28 test_true_pfm: 2763.9159623466535 sim_pfm: 172.87751310646613
episode: 112 training return: tensor(-38.8234, device='cuda:0')
episode: 113 training return: tensor(131.3428, device='cuda:0')
episode: 114 training return: tensor(227.7141, device='cuda:0')
episode: 115 training return: tensor(-198.6158, device='cuda:0')
epoch: 29 test_true_pfm: 2828.0667777475805 sim_pfm: 201.36883553897496
episode: 116 training return: tensor(285.4991, device='cuda:0')
episode: 117 training return: tensor(153.9897, device='cuda:0')
episode: 118 training return: tensor(226.6332, device='cuda:0')
episode: 119 training return: tensor(-235.6731, device='cuda:0')
epoch: 30 test_true_pfm: 3278.3252225100277 sim_pfm: 85.26709210664073
episode: 120 training return: tensor(186.6509, device='cuda:0')
episode: 121 training return: tensor(213.3813, device='cuda:0')
episode: 122 training return: tensor(259.5655, device='cuda:0')
episode: 123 training return: tensor(146.1472, device='cuda:0')
epoch: 31 test_true_pfm: 3257.324675851892 sim_pfm: 189.53554182811058
episode: 124 training return: tensor(28.9155, device='cuda:0')
episode: 125 training return: tensor(267.0795, device='cuda:0')
episode: 126 training return: tensor(144.9094, device='cuda:0')
episode: 127 training return: tensor(-113.2445, device='cuda:0')
epoch: 32 test_true_pfm: 3399.760936074507 sim_pfm: 131.89341036917176
episode: 128 training return: tensor(133.0577, device='cuda:0')
episode: 129 training return: tensor(94.1201, device='cuda:0')
episode: 130 training return: tensor(-259.6536, device='cuda:0')
episode: 131 training return: tensor(174.6801, device='cuda:0')
epoch: 33 test_true_pfm: 3379.883172369242 sim_pfm: -3.8762779002427123
episode: 132 training return: tensor(-252.5321, device='cuda:0')
episode: 133 training return: tensor(37.3719, device='cuda:0')
episode: 134 training return: tensor(202.8058, device='cuda:0')
episode: 135 training return: tensor(150.8114, device='cuda:0')
epoch: 34 test_true_pfm: 3435.3275908107403 sim_pfm: 151.55720131643466
episode: 136 training return: tensor(60.6093, device='cuda:0')
episode: 137 training return: tensor(248.4074, device='cuda:0')
episode: 138 training return: tensor(-193.8948, device='cuda:0')
episode: 139 training return: tensor(194.7372, device='cuda:0')
epoch: 35 test_true_pfm: 3077.9644372421594 sim_pfm: -97.77807258581743
episode: 140 training return: tensor(171.8765, device='cuda:0')
episode: 141 training return: tensor(95.7056, device='cuda:0')
episode: 142 training return: tensor(106.2091, device='cuda:0')
episode: 143 training return: tensor(274.5423, device='cuda:0')
epoch: 36 test_true_pfm: 3463.365693489485 sim_pfm: 90.38745948016488
episode: 144 training return: tensor(135.1843, device='cuda:0')
episode: 145 training return: tensor(143.0127, device='cuda:0')
episode: 146 training return: tensor(32.2345, device='cuda:0')
episode: 147 training return: tensor(207.1841, device='cuda:0')
epoch: 37 test_true_pfm: 3415.727558596029 sim_pfm: 209.89041799749248
episode: 148 training return: tensor(28.4506, device='cuda:0')
episode: 149 training return: tensor(265.3514, device='cuda:0')
episode: 150 training return: tensor(204.5028, device='cuda:0')
episode: 151 training return: tensor(179.6251, device='cuda:0')
epoch: 38 test_true_pfm: 3494.7070995314803 sim_pfm: 158.8514210610107
episode: 152 training return: tensor(167.0423, device='cuda:0')
episode: 153 training return: tensor(15.7315, device='cuda:0')
episode: 154 training return: tensor(210.8393, device='cuda:0')
episode: 155 training return: tensor(190.7038, device='cuda:0')
epoch: 39 test_true_pfm: 3237.7442136588033 sim_pfm: 176.86376807861961
episode: 156 training return: tensor(177.8877, device='cuda:0')
episode: 157 training return: tensor(148.4539, device='cuda:0')
episode: 158 training return: tensor(157.3151, device='cuda:0')
episode: 159 training return: tensor(86.7663, device='cuda:0')
epoch: 40 test_true_pfm: 3431.889551309882 sim_pfm: 183.11351307005194
episode: 160 training return: tensor(144.1373, device='cuda:0')
episode: 161 training return: tensor(201.5862, device='cuda:0')
episode: 162 training return: tensor(178.0714, device='cuda:0')
episode: 163 training return: tensor(-43.3106, device='cuda:0')
epoch: 41 test_true_pfm: 3397.913080938659 sim_pfm: 93.65033361575722
episode: 164 training return: tensor(26.0262, device='cuda:0')
episode: 165 training return: tensor(120.8841, device='cuda:0')
episode: 166 training return: tensor(-39.2290, device='cuda:0')
episode: 167 training return: tensor(111.5053, device='cuda:0')
epoch: 42 test_true_pfm: 3435.2630836277335 sim_pfm: 216.31134422192312
episode: 168 training return: tensor(86.0500, device='cuda:0')
episode: 169 training return: tensor(145.9251, device='cuda:0')
episode: 170 training return: tensor(148.6393, device='cuda:0')
episode: 171 training return: tensor(160.9031, device='cuda:0')
epoch: 43 test_true_pfm: 3388.5219632606277 sim_pfm: 222.078826896303
episode: 172 training return: tensor(155.3801, device='cuda:0')
episode: 173 training return: tensor(39.9275, device='cuda:0')
episode: 174 training return: tensor(157.7898, device='cuda:0')
episode: 175 training return: tensor(-218.8468, device='cuda:0')
epoch: 44 test_true_pfm: 2899.0613579428536 sim_pfm: 151.59556454919706
episode: 176 training return: tensor(160.9270, device='cuda:0')
episode: 177 training return: tensor(-182.5093, device='cuda:0')
episode: 178 training return: tensor(135.0400, device='cuda:0')
episode: 179 training return: tensor(203.9798, device='cuda:0')
epoch: 45 test_true_pfm: 3416.973414131908 sim_pfm: 46.238496560254134
episode: 180 training return: tensor(204.3278, device='cuda:0')
episode: 181 training return: tensor(187.1947, device='cuda:0')
episode: 182 training return: tensor(62.9741, device='cuda:0')
episode: 183 training return: tensor(128.1188, device='cuda:0')
epoch: 46 test_true_pfm: 3455.675307144265 sim_pfm: 260.04809239538736
episode: 184 training return: tensor(-237.6174, device='cuda:0')
episode: 185 training return: tensor(66.9067, device='cuda:0')
episode: 186 training return: tensor(28.6428, device='cuda:0')
episode: 187 training return: tensor(171.5615, device='cuda:0')
epoch: 47 test_true_pfm: 3397.6741722975326 sim_pfm: 224.59111892843308
episode: 188 training return: tensor(30.1793, device='cuda:0')
episode: 189 training return: tensor(230.4729, device='cuda:0')
episode: 190 training return: tensor(-74.1928, device='cuda:0')
episode: 191 training return: tensor(270.1512, device='cuda:0')
epoch: 48 test_true_pfm: 3287.254015936575 sim_pfm: 258.2769394776163
episode: 192 training return: tensor(218.2947, device='cuda:0')
episode: 193 training return: tensor(141.1429, device='cuda:0')
episode: 194 training return: tensor(138.9084, device='cuda:0')
episode: 195 training return: tensor(86.9754, device='cuda:0')
epoch: 49 test_true_pfm: 3071.7958005175005 sim_pfm: 247.08156271985112
episode: 196 training return: tensor(-92.3473, device='cuda:0')
episode: 197 training return: tensor(156.5112, device='cuda:0')
episode: 198 training return: tensor(28.3762, device='cuda:0')
episode: 199 training return: tensor(192.1914, device='cuda:0')
epoch: 50 test_true_pfm: 3367.425004306469 sim_pfm: 157.2147666278761
episode: 200 training return: tensor(150.0965, device='cuda:0')
episode: 201 training return: tensor(184.5954, device='cuda:0')
episode: 202 training return: tensor(218.2458, device='cuda:0')
episode: 203 training return: tensor(-121.3362, device='cuda:0')
epoch: 51 test_true_pfm: 2785.4417156707345 sim_pfm: -46.49818020065626
episode: 204 training return: tensor(168.8225, device='cuda:0')
episode: 205 training return: tensor(-78.7827, device='cuda:0')
episode: 206 training return: tensor(-323.0439, device='cuda:0')
episode: 207 training return: tensor(219.3289, device='cuda:0')
epoch: 52 test_true_pfm: 3167.471695864013 sim_pfm: 188.38490853497447
episode: 208 training return: tensor(201.9014, device='cuda:0')
episode: 209 training return: tensor(-477.7300, device='cuda:0')
episode: 210 training return: tensor(85.5857, device='cuda:0')
episode: 211 training return: tensor(130.2237, device='cuda:0')
epoch: 53 test_true_pfm: 3405.041463287335 sim_pfm: 158.15227287606103
episode: 212 training return: tensor(-237.7408, device='cuda:0')
episode: 213 training return: tensor(-152.1941, device='cuda:0')
episode: 214 training return: tensor(253.5077, device='cuda:0')
episode: 215 training return: tensor(-150.9202, device='cuda:0')
epoch: 54 test_true_pfm: 3425.2789417640815 sim_pfm: 217.42247778436285
episode: 216 training return: tensor(191.2152, device='cuda:0')
episode: 217 training return: tensor(163.6021, device='cuda:0')
episode: 218 training return: tensor(64.7998, device='cuda:0')
episode: 219 training return: tensor(206.2955, device='cuda:0')
epoch: 55 test_true_pfm: 3188.787294803553 sim_pfm: 120.50625987900032
episode: 220 training return: tensor(79.2418, device='cuda:0')
episode: 221 training return: tensor(169.5089, device='cuda:0')
episode: 222 training return: tensor(1.8076, device='cuda:0')
episode: 223 training return: tensor(200.0464, device='cuda:0')
epoch: 56 test_true_pfm: 3453.4198855939726 sim_pfm: 216.89139144235136
episode: 224 training return: tensor(196.5671, device='cuda:0')
episode: 225 training return: tensor(-6.9241, device='cuda:0')
episode: 226 training return: tensor(174.5932, device='cuda:0')
episode: 227 training return: tensor(222.3709, device='cuda:0')
epoch: 57 test_true_pfm: 3395.8815940654113 sim_pfm: 93.47497954910311
episode: 228 training return: tensor(-120.6639, device='cuda:0')
episode: 229 training return: tensor(-66.5641, device='cuda:0')
episode: 230 training return: tensor(-32.4861, device='cuda:0')
episode: 231 training return: tensor(271.2671, device='cuda:0')
epoch: 58 test_true_pfm: 3446.4268788548084 sim_pfm: 233.63744861616092
episode: 232 training return: tensor(103.4594, device='cuda:0')
episode: 233 training return: tensor(168.0679, device='cuda:0')
episode: 234 training return: tensor(194.0395, device='cuda:0')
episode: 235 training return: tensor(69.7040, device='cuda:0')
epoch: 59 test_true_pfm: 3206.985210888922 sim_pfm: 231.10349083761685
episode: 236 training return: tensor(96.1119, device='cuda:0')
episode: 237 training return: tensor(178.4070, device='cuda:0')
episode: 238 training return: tensor(170.6190, device='cuda:0')
episode: 239 training return: tensor(153.5635, device='cuda:0')
epoch: 60 test_true_pfm: 3399.2634172855182 sim_pfm: 255.5002533045578
episode: 240 training return: tensor(173.6447, device='cuda:0')
episode: 241 training return: tensor(238.8743, device='cuda:0')
episode: 242 training return: tensor(217.4221, device='cuda:0')
episode: 243 training return: tensor(207.9642, device='cuda:0')
epoch: 61 test_true_pfm: 3417.866149965105 sim_pfm: 272.78710496085114
episode: 244 training return: tensor(185.1108, device='cuda:0')
episode: 245 training return: tensor(167.9157, device='cuda:0')
episode: 246 training return: tensor(-416.0353, device='cuda:0')
episode: 247 training return: tensor(152.8473, device='cuda:0')
epoch: 62 test_true_pfm: 3450.305215322067 sim_pfm: 235.35898049432822
episode: 248 training return: tensor(60.0515, device='cuda:0')
episode: 249 training return: tensor(176.2352, device='cuda:0')
episode: 250 training return: tensor(142.9846, device='cuda:0')
episode: 251 training return: tensor(212.4357, device='cuda:0')
epoch: 63 test_true_pfm: 3427.550300786741 sim_pfm: 174.28501890960615
episode: 252 training return: tensor(-243.4795, device='cuda:0')
episode: 253 training return: tensor(153.5244, device='cuda:0')
episode: 254 training return: tensor(203.7458, device='cuda:0')
episode: 255 training return: tensor(-260.1494, device='cuda:0')
epoch: 64 test_true_pfm: 3201.0274019067692 sim_pfm: 236.32522634556517
episode: 256 training return: tensor(261.7244, device='cuda:0')
episode: 257 training return: tensor(174.7992, device='cuda:0')
episode: 258 training return: tensor(187.0380, device='cuda:0')
episode: 259 training return: tensor(249.0583, device='cuda:0')
epoch: 65 test_true_pfm: 2664.7646265221647 sim_pfm: 250.34114821697585
episode: 260 training return: tensor(96.6169, device='cuda:0')
episode: 261 training return: tensor(63.1343, device='cuda:0')
episode: 262 training return: tensor(186.5543, device='cuda:0')
episode: 263 training return: tensor(195.2473, device='cuda:0')
epoch: 66 test_true_pfm: 3431.0928359939808 sim_pfm: 167.89080773011665
episode: 264 training return: tensor(175.6256, device='cuda:0')
episode: 265 training return: tensor(170.4001, device='cuda:0')
episode: 266 training return: tensor(-271.5879, device='cuda:0')
episode: 267 training return: tensor(132.9921, device='cuda:0')
epoch: 67 test_true_pfm: 3393.4933495948812 sim_pfm: 230.38307567714946
episode: 268 training return: tensor(183.8347, device='cuda:0')
episode: 269 training return: tensor(140.9044, device='cuda:0')
episode: 270 training return: tensor(187.9454, device='cuda:0')
episode: 271 training return: tensor(75.7729, device='cuda:0')
epoch: 68 test_true_pfm: 3417.494441834182 sim_pfm: 136.48395198846507
episode: 272 training return: tensor(157.5723, device='cuda:0')
episode: 273 training return: tensor(118.2498, device='cuda:0')
episode: 274 training return: tensor(-158.2203, device='cuda:0')
episode: 275 training return: tensor(189.9193, device='cuda:0')
epoch: 69 test_true_pfm: 3429.516815852215 sim_pfm: 103.34880035248352
episode: 276 training return: tensor(197.0109, device='cuda:0')
episode: 277 training return: tensor(153.0013, device='cuda:0')
episode: 278 training return: tensor(176.5827, device='cuda:0')
episode: 279 training return: tensor(-99.7113, device='cuda:0')
epoch: 70 test_true_pfm: 3061.506172430252 sim_pfm: 212.45808631591112
episode: 280 training return: tensor(224.1532, device='cuda:0')
episode: 281 training return: tensor(-215.8982, device='cuda:0')
episode: 282 training return: tensor(165.6703, device='cuda:0')
episode: 283 training return: tensor(208.0781, device='cuda:0')
epoch: 71 test_true_pfm: 3467.3303141641445 sim_pfm: 253.60669441744298
episode: 284 training return: tensor(-155.3467, device='cuda:0')
episode: 285 training return: tensor(116.3838, device='cuda:0')
episode: 286 training return: tensor(160.9938, device='cuda:0')
episode: 287 training return: tensor(271.7997, device='cuda:0')
epoch: 72 test_true_pfm: 3488.6662873111018 sim_pfm: 228.6779283110518
episode: 288 training return: tensor(171.7520, device='cuda:0')
episode: 289 training return: tensor(22.1628, device='cuda:0')
episode: 290 training return: tensor(188.2421, device='cuda:0')
episode: 291 training return: tensor(153.6020, device='cuda:0')
epoch: 73 test_true_pfm: 3449.235791249404 sim_pfm: 211.63313584126686
episode: 292 training return: tensor(186.5812, device='cuda:0')
episode: 293 training return: tensor(166.6526, device='cuda:0')
episode: 294 training return: tensor(162.0164, device='cuda:0')
episode: 295 training return: tensor(244.5948, device='cuda:0')
epoch: 74 test_true_pfm: 3422.898223445241 sim_pfm: 230.32333632958276
episode: 296 training return: tensor(100.9803, device='cuda:0')
episode: 297 training return: tensor(-154.0941, device='cuda:0')
episode: 298 training return: tensor(218.9530, device='cuda:0')
episode: 299 training return: tensor(-439.9053, device='cuda:0')
epoch: 75 test_true_pfm: 3201.5492546398054 sim_pfm: 278.26183901079156
episode: 300 training return: tensor(202.2181, device='cuda:0')
episode: 301 training return: tensor(161.7947, device='cuda:0')
episode: 302 training return: tensor(211.0799, device='cuda:0')
episode: 303 training return: tensor(141.2649, device='cuda:0')
epoch: 76 test_true_pfm: 3477.851381384607 sim_pfm: 221.81408542505233
episode: 304 training return: tensor(165.5046, device='cuda:0')
episode: 305 training return: tensor(60.2129, device='cuda:0')
episode: 306 training return: tensor(181.6862, device='cuda:0')
episode: 307 training return: tensor(217.8993, device='cuda:0')
epoch: 77 test_true_pfm: 3113.8719090859468 sim_pfm: 202.84686415370865
episode: 308 training return: tensor(-180.0028, device='cuda:0')
episode: 309 training return: tensor(-187.2453, device='cuda:0')
episode: 310 training return: tensor(232.4606, device='cuda:0')
episode: 311 training return: tensor(292.0612, device='cuda:0')
epoch: 78 test_true_pfm: 3437.9519538226505 sim_pfm: 234.05348338374947
episode: 312 training return: tensor(-250.5543, device='cuda:0')
episode: 313 training return: tensor(183.6880, device='cuda:0')
episode: 314 training return: tensor(208.7558, device='cuda:0')
episode: 315 training return: tensor(152.7808, device='cuda:0')
epoch: 79 test_true_pfm: 3428.9358813303825 sim_pfm: 247.58492284691116
episode: 316 training return: tensor(136.7570, device='cuda:0')
episode: 317 training return: tensor(171.3479, device='cuda:0')
episode: 318 training return: tensor(233.3403, device='cuda:0')
episode: 319 training return: tensor(-352.3477, device='cuda:0')
epoch: 80 test_true_pfm: 3435.836330955453 sim_pfm: 199.31001051464895
episode: 320 training return: tensor(152.8909, device='cuda:0')
episode: 321 training return: tensor(160.8105, device='cuda:0')
episode: 322 training return: tensor(195.9327, device='cuda:0')
episode: 323 training return: tensor(178.9021, device='cuda:0')
epoch: 81 test_true_pfm: 3408.4190729900765 sim_pfm: 265.2463662484952
episode: 324 training return: tensor(221.7730, device='cuda:0')
episode: 325 training return: tensor(237.8503, device='cuda:0')
episode: 326 training return: tensor(165.1320, device='cuda:0')
episode: 327 training return: tensor(203.1553, device='cuda:0')
epoch: 82 test_true_pfm: 3067.847046346876 sim_pfm: 233.99300577044292
episode: 328 training return: tensor(203.3626, device='cuda:0')
episode: 329 training return: tensor(-73.8249, device='cuda:0')
episode: 330 training return: tensor(114.7174, device='cuda:0')
episode: 331 training return: tensor(165.2193, device='cuda:0')
epoch: 83 test_true_pfm: 3429.126119279809 sim_pfm: 197.23254136362812
episode: 332 training return: tensor(241.4750, device='cuda:0')
episode: 333 training return: tensor(214.2753, device='cuda:0')
episode: 334 training return: tensor(54.3131, device='cuda:0')
episode: 335 training return: tensor(226.4693, device='cuda:0')
epoch: 84 test_true_pfm: 3403.838802592394 sim_pfm: 215.03598352030772
episode: 336 training return: tensor(259.0984, device='cuda:0')
episode: 337 training return: tensor(194.6925, device='cuda:0')
episode: 338 training return: tensor(-174.8309, device='cuda:0')
episode: 339 training return: tensor(219.7520, device='cuda:0')
epoch: 85 test_true_pfm: 3424.921470446066 sim_pfm: 205.57914151680112
episode: 340 training return: tensor(183.0131, device='cuda:0')
episode: 341 training return: tensor(183.8538, device='cuda:0')
episode: 342 training return: tensor(234.8600, device='cuda:0')
episode: 343 training return: tensor(207.4697, device='cuda:0')
epoch: 86 test_true_pfm: 2938.2867665967406 sim_pfm: -33.94450790434106
episode: 344 training return: tensor(196.4376, device='cuda:0')
episode: 345 training return: tensor(173.0594, device='cuda:0')
episode: 346 training return: tensor(174.5593, device='cuda:0')
episode: 347 training return: tensor(237.9251, device='cuda:0')
epoch: 87 test_true_pfm: 3417.619493416141 sim_pfm: 211.78196728316834
episode: 348 training return: tensor(143.6816, device='cuda:0')
episode: 349 training return: tensor(108.2635, device='cuda:0')
episode: 350 training return: tensor(257.4926, device='cuda:0')
episode: 351 training return: tensor(168.8040, device='cuda:0')
epoch: 88 test_true_pfm: 3460.129879097774 sim_pfm: 202.5189584696394
episode: 352 training return: tensor(-302.7760, device='cuda:0')
episode: 353 training return: tensor(144.7051, device='cuda:0')
episode: 354 training return: tensor(-12.2868, device='cuda:0')
episode: 355 training return: tensor(20.0247, device='cuda:0')
epoch: 89 test_true_pfm: 3243.8954697174063 sim_pfm: 231.34348823382365
episode: 356 training return: tensor(144.6261, device='cuda:0')
episode: 357 training return: tensor(242.2247, device='cuda:0')
episode: 358 training return: tensor(-241.5707, device='cuda:0')
episode: 359 training return: tensor(252.4654, device='cuda:0')
epoch: 90 test_true_pfm: 3452.95808531707 sim_pfm: 228.93966363238482
episode: 360 training return: tensor(78.8004, device='cuda:0')
episode: 361 training return: tensor(191.2425, device='cuda:0')
episode: 362 training return: tensor(228.5980, device='cuda:0')
episode: 363 training return: tensor(148.8659, device='cuda:0')
epoch: 91 test_true_pfm: 3444.642844037575 sim_pfm: 222.2386871004128
episode: 364 training return: tensor(235.0891, device='cuda:0')
episode: 365 training return: tensor(108.5470, device='cuda:0')
episode: 366 training return: tensor(52.3542, device='cuda:0')
episode: 367 training return: tensor(254.0025, device='cuda:0')
epoch: 92 test_true_pfm: 2369.192901738989 sim_pfm: 216.43469068898898
episode: 368 training return: tensor(-327.1334, device='cuda:0')
episode: 369 training return: tensor(220.6031, device='cuda:0')
episode: 370 training return: tensor(251.3289, device='cuda:0')
episode: 371 training return: tensor(187.7627, device='cuda:0')
epoch: 93 test_true_pfm: 3132.4712313560594 sim_pfm: 263.2408347430949
episode: 372 training return: tensor(240.5639, device='cuda:0')
episode: 373 training return: tensor(70.7201, device='cuda:0')
episode: 374 training return: tensor(189.4819, device='cuda:0')
episode: 375 training return: tensor(205.4048, device='cuda:0')
epoch: 94 test_true_pfm: 3395.4715184731867 sim_pfm: 124.1123004516315
episode: 376 training return: tensor(210.7538, device='cuda:0')
episode: 377 training return: tensor(264.3326, device='cuda:0')
episode: 378 training return: tensor(162.2626, device='cuda:0')
episode: 379 training return: tensor(200.9410, device='cuda:0')
epoch: 95 test_true_pfm: 3445.2076881764283 sim_pfm: 201.9276542974791
episode: 380 training return: tensor(108.7232, device='cuda:0')
episode: 381 training return: tensor(178.7292, device='cuda:0')
episode: 382 training return: tensor(218.3686, device='cuda:0')
episode: 383 training return: tensor(205.0991, device='cuda:0')
epoch: 96 test_true_pfm: 3159.05027440527 sim_pfm: 184.24106202156204
episode: 384 training return: tensor(202.8674, device='cuda:0')
episode: 385 training return: tensor(140.5880, device='cuda:0')
episode: 386 training return: tensor(200.8409, device='cuda:0')
episode: 387 training return: tensor(247.4821, device='cuda:0')
epoch: 97 test_true_pfm: 3338.978645639227 sim_pfm: 192.31491537799593
episode: 388 training return: tensor(210.4525, device='cuda:0')
episode: 389 training return: tensor(273.8578, device='cuda:0')
episode: 390 training return: tensor(126.5492, device='cuda:0')
episode: 391 training return: tensor(-20.7292, device='cuda:0')
epoch: 98 test_true_pfm: 3462.9077345834376 sim_pfm: 112.54311408179153
episode: 392 training return: tensor(108.2187, device='cuda:0')
episode: 393 training return: tensor(-262.3459, device='cuda:0')
episode: 394 training return: tensor(219.7863, device='cuda:0')
episode: 395 training return: tensor(227.6904, device='cuda:0')
epoch: 99 test_true_pfm: 3070.679809493627 sim_pfm: 149.28384071901868
episode: 396 training return: tensor(244.9372, device='cuda:0')
episode: 397 training return: tensor(125.1687, device='cuda:0')
episode: 398 training return: tensor(-130.9350, device='cuda:0')
episode: 399 training return: tensor(172.9534, device='cuda:0')
epoch: 100 test_true_pfm: 3143.918099600822 sim_pfm: 277.2676383686873
episode: 400 training return: tensor(179.5927, device='cuda:0')
episode: 401 training return: tensor(215.4921, device='cuda:0')
episode: 402 training return: tensor(-301.8163, device='cuda:0')
episode: 403 training return: tensor(242.2702, device='cuda:0')
epoch: 101 test_true_pfm: 3449.4817252795124 sim_pfm: 218.57665831325963
episode: 404 training return: tensor(132.8846, device='cuda:0')
episode: 405 training return: tensor(172.9917, device='cuda:0')
episode: 406 training return: tensor(123.5904, device='cuda:0')
episode: 407 training return: tensor(135.3488, device='cuda:0')
epoch: 102 test_true_pfm: 3449.774118117999 sim_pfm: 224.10242081075558
episode: 408 training return: tensor(291.9234, device='cuda:0')
episode: 409 training return: tensor(97.8442, device='cuda:0')
episode: 410 training return: tensor(61.5346, device='cuda:0')
episode: 411 training return: tensor(188.4041, device='cuda:0')
epoch: 103 test_true_pfm: 3452.927021322666 sim_pfm: 218.43200024706312
episode: 412 training return: tensor(276.9404, device='cuda:0')
episode: 413 training return: tensor(244.1250, device='cuda:0')
episode: 414 training return: tensor(242.3307, device='cuda:0')
episode: 415 training return: tensor(213.2740, device='cuda:0')
epoch: 104 test_true_pfm: 3342.6172490890262 sim_pfm: 242.677980755611
episode: 416 training return: tensor(148.7112, device='cuda:0')
episode: 417 training return: tensor(261.9037, device='cuda:0')
episode: 418 training return: tensor(-103.1484, device='cuda:0')
episode: 419 training return: tensor(67.5773, device='cuda:0')
epoch: 105 test_true_pfm: 3465.0581426666345 sim_pfm: 226.77665949686585
episode: 420 training return: tensor(157.9148, device='cuda:0')
episode: 421 training return: tensor(280.9716, device='cuda:0')
episode: 422 training return: tensor(320.4574, device='cuda:0')
episode: 423 training return: tensor(199.8992, device='cuda:0')
epoch: 106 test_true_pfm: 3407.6664239873517 sim_pfm: 254.06507281957116
episode: 424 training return: tensor(230.2717, device='cuda:0')
episode: 425 training return: tensor(256.0093, device='cuda:0')
episode: 426 training return: tensor(118.2143, device='cuda:0')
episode: 427 training return: tensor(221.6740, device='cuda:0')
epoch: 107 test_true_pfm: 3430.0159807588875 sim_pfm: 207.1121767249424
episode: 428 training return: tensor(-153.7170, device='cuda:0')
episode: 429 training return: tensor(244.8319, device='cuda:0')
episode: 430 training return: tensor(208.3101, device='cuda:0')
episode: 431 training return: tensor(170.8716, device='cuda:0')
epoch: 108 test_true_pfm: 3442.8835003808053 sim_pfm: 187.13070742298927
episode: 432 training return: tensor(141.9473, device='cuda:0')
episode: 433 training return: tensor(164.2709, device='cuda:0')
episode: 434 training return: tensor(240.5246, device='cuda:0')
episode: 435 training return: tensor(289.7456, device='cuda:0')
epoch: 109 test_true_pfm: 3272.9502063010364 sim_pfm: 250.6005773744158
episode: 436 training return: tensor(223.4981, device='cuda:0')
episode: 437 training return: tensor(169.9978, device='cuda:0')
episode: 438 training return: tensor(221.5001, device='cuda:0')
episode: 439 training return: tensor(224.2194, device='cuda:0')
epoch: 110 test_true_pfm: 3154.391953878358 sim_pfm: 242.20010305566635
episode: 440 training return: tensor(238.5940, device='cuda:0')
episode: 441 training return: tensor(219.8218, device='cuda:0')
episode: 442 training return: tensor(208.6479, device='cuda:0')
episode: 443 training return: tensor(182.2045, device='cuda:0')
epoch: 111 test_true_pfm: 3428.483625503631 sim_pfm: 209.67060670991972
episode: 444 training return: tensor(170.5577, device='cuda:0')
episode: 445 training return: tensor(178.2577, device='cuda:0')
episode: 446 training return: tensor(150.7366, device='cuda:0')
episode: 447 training return: tensor(161.6876, device='cuda:0')
epoch: 112 test_true_pfm: 3487.453454940304 sim_pfm: 209.44153355960347
episode: 448 training return: tensor(280.4920, device='cuda:0')
episode: 449 training return: tensor(251.8973, device='cuda:0')
episode: 450 training return: tensor(216.9977, device='cuda:0')
episode: 451 training return: tensor(-30.5112, device='cuda:0')
epoch: 113 test_true_pfm: 3440.772315095934 sim_pfm: 192.50724230602887
episode: 452 training return: tensor(282.9843, device='cuda:0')
episode: 453 training return: tensor(231.2455, device='cuda:0')
episode: 454 training return: tensor(253.2936, device='cuda:0')
episode: 455 training return: tensor(-198.1887, device='cuda:0')
epoch: 114 test_true_pfm: 3465.442403768798 sim_pfm: 214.4083995226732
episode: 456 training return: tensor(-225.0651, device='cuda:0')
episode: 457 training return: tensor(268.5389, device='cuda:0')
episode: 458 training return: tensor(222.6615, device='cuda:0')
episode: 459 training return: tensor(207.2511, device='cuda:0')
epoch: 115 test_true_pfm: 3383.7119881539998 sim_pfm: 197.54229156052074
episode: 460 training return: tensor(266.8727, device='cuda:0')
episode: 461 training return: tensor(156.1357, device='cuda:0')
episode: 462 training return: tensor(168.3322, device='cuda:0')
episode: 463 training return: tensor(297.6785, device='cuda:0')
epoch: 116 test_true_pfm: 3427.2172566907448 sim_pfm: 192.16089554633558
episode: 464 training return: tensor(227.0879, device='cuda:0')
episode: 465 training return: tensor(193.3873, device='cuda:0')
episode: 466 training return: tensor(125.9076, device='cuda:0')
episode: 467 training return: tensor(245.5719, device='cuda:0')
epoch: 117 test_true_pfm: 3421.553428731753 sim_pfm: 207.1830280936459
episode: 468 training return: tensor(299.8495, device='cuda:0')
episode: 469 training return: tensor(-65.5379, device='cuda:0')
episode: 470 training return: tensor(273.2234, device='cuda:0')
episode: 471 training return: tensor(184.0171, device='cuda:0')
epoch: 118 test_true_pfm: 3434.680792716503 sim_pfm: 210.22225555189652
episode: 472 training return: tensor(284.1503, device='cuda:0')
episode: 473 training return: tensor(233.0490, device='cuda:0')
episode: 474 training return: tensor(228.4282, device='cuda:0')
episode: 475 training return: tensor(-373.5056, device='cuda:0')
epoch: 119 test_true_pfm: 3475.7534777643364 sim_pfm: 192.2572733972144
episode: 476 training return: tensor(110.8936, device='cuda:0')
episode: 477 training return: tensor(175.9660, device='cuda:0')
episode: 478 training return: tensor(229.7803, device='cuda:0')
episode: 479 training return: tensor(188.2143, device='cuda:0')
epoch: 120 test_true_pfm: 3494.4371575205714 sim_pfm: 240.46368305321084
episode: 480 training return: tensor(224.1738, device='cuda:0')
episode: 481 training return: tensor(-258.0481, device='cuda:0')
episode: 482 training return: tensor(144.8092, device='cuda:0')
episode: 483 training return: tensor(246.0878, device='cuda:0')
epoch: 121 test_true_pfm: 3462.7477754897795 sim_pfm: 229.35486516558254
episode: 484 training return: tensor(233.0158, device='cuda:0')
episode: 485 training return: tensor(192.5384, device='cuda:0')
episode: 486 training return: tensor(205.3599, device='cuda:0')
episode: 487 training return: tensor(143.1715, device='cuda:0')
epoch: 122 test_true_pfm: 3469.3063794053146 sim_pfm: 278.3667432858104
episode: 488 training return: tensor(163.3100, device='cuda:0')
episode: 489 training return: tensor(109.2758, device='cuda:0')
episode: 490 training return: tensor(169.1584, device='cuda:0')
episode: 491 training return: tensor(265.8638, device='cuda:0')
epoch: 123 test_true_pfm: 3443.2295398695737 sim_pfm: 180.34421783227785
episode: 492 training return: tensor(220.1421, device='cuda:0')
episode: 493 training return: tensor(-147.5367, device='cuda:0')
episode: 494 training return: tensor(175.7969, device='cuda:0')
episode: 495 training return: tensor(236.9656, device='cuda:0')
epoch: 124 test_true_pfm: 3413.576126522874 sim_pfm: 256.3817622173422
episode: 496 training return: tensor(201.1097, device='cuda:0')
episode: 497 training return: tensor(228.6018, device='cuda:0')
episode: 498 training return: tensor(270.3647, device='cuda:0')
episode: 499 training return: tensor(205.0398, device='cuda:0')
epoch: 125 test_true_pfm: 3472.141581361961 sim_pfm: 216.07498792989645
episode: 500 training return: tensor(-48.4964, device='cuda:0')
episode: 501 training return: tensor(149.5248, device='cuda:0')
episode: 502 training return: tensor(-273.4085, device='cuda:0')
episode: 503 training return: tensor(264.2608, device='cuda:0')
epoch: 126 test_true_pfm: 3366.5353654841056 sim_pfm: 191.56970707387276
episode: 504 training return: tensor(221.0873, device='cuda:0')
episode: 505 training return: tensor(242.1534, device='cuda:0')
episode: 506 training return: tensor(257.5419, device='cuda:0')
episode: 507 training return: tensor(251.0467, device='cuda:0')
epoch: 127 test_true_pfm: 3463.80212141413 sim_pfm: 248.3164876254741
episode: 508 training return: tensor(107.3873, device='cuda:0')
episode: 509 training return: tensor(287.5624, device='cuda:0')
episode: 510 training return: tensor(179.3300, device='cuda:0')
episode: 511 training return: tensor(130.7935, device='cuda:0')
epoch: 128 test_true_pfm: 3458.898700932042 sim_pfm: 248.19759860617341
episode: 512 training return: tensor(-152.7208, device='cuda:0')
episode: 513 training return: tensor(-399.1701, device='cuda:0')
episode: 514 training return: tensor(204.8203, device='cuda:0')
episode: 515 training return: tensor(319.4048, device='cuda:0')
epoch: 129 test_true_pfm: 3471.5867693112705 sim_pfm: 212.39666212381175
episode: 516 training return: tensor(249.8171, device='cuda:0')
episode: 517 training return: tensor(118.3036, device='cuda:0')
episode: 518 training return: tensor(228.0277, device='cuda:0')
episode: 519 training return: tensor(136.3601, device='cuda:0')
epoch: 130 test_true_pfm: 3497.4832711906747 sim_pfm: 247.94264271615734
episode: 520 training return: tensor(228.7768, device='cuda:0')
episode: 521 training return: tensor(193.5101, device='cuda:0')
episode: 522 training return: tensor(185.4472, device='cuda:0')
episode: 523 training return: tensor(156.4604, device='cuda:0')
epoch: 131 test_true_pfm: 3493.7173092912285 sim_pfm: 262.91968561088044
episode: 524 training return: tensor(236.7628, device='cuda:0')
episode: 525 training return: tensor(124.8292, device='cuda:0')
episode: 526 training return: tensor(93.9187, device='cuda:0')
episode: 527 training return: tensor(196.3746, device='cuda:0')
epoch: 132 test_true_pfm: 3447.765309234381 sim_pfm: 251.0037219125952
episode: 528 training return: tensor(191.3884, device='cuda:0')
episode: 529 training return: tensor(220.3889, device='cuda:0')
episode: 530 training return: tensor(211.4659, device='cuda:0')
episode: 531 training return: tensor(213.5333, device='cuda:0')
epoch: 133 test_true_pfm: 3524.578664393765 sim_pfm: 286.8915446602817
episode: 532 training return: tensor(15.6246, device='cuda:0')
episode: 533 training return: tensor(213.8277, device='cuda:0')
episode: 534 training return: tensor(200.5009, device='cuda:0')
episode: 535 training return: tensor(308.6911, device='cuda:0')
epoch: 134 test_true_pfm: 3450.9594593366965 sim_pfm: 224.16846701778317
episode: 536 training return: tensor(159.8865, device='cuda:0')
episode: 537 training return: tensor(154.9861, device='cuda:0')
episode: 538 training return: tensor(177.1313, device='cuda:0')
episode: 539 training return: tensor(-312.0546, device='cuda:0')
epoch: 135 test_true_pfm: 3436.5044206111575 sim_pfm: 187.20959699323672
episode: 540 training return: tensor(217.7404, device='cuda:0')
episode: 541 training return: tensor(227.3239, device='cuda:0')
episode: 542 training return: tensor(-346.7583, device='cuda:0')
episode: 543 training return: tensor(241.4977, device='cuda:0')
epoch: 136 test_true_pfm: 3443.830676383921 sim_pfm: 285.47182997025084
episode: 544 training return: tensor(211.4386, device='cuda:0')
episode: 545 training return: tensor(237.0486, device='cuda:0')
episode: 546 training return: tensor(139.4391, device='cuda:0')
episode: 547 training return: tensor(179.1984, device='cuda:0')
epoch: 137 test_true_pfm: 3016.9470355212393 sim_pfm: 248.93158029326392
episode: 548 training return: tensor(235.4471, device='cuda:0')
episode: 549 training return: tensor(252.9443, device='cuda:0')
episode: 550 training return: tensor(194.6407, device='cuda:0')
episode: 551 training return: tensor(160.5472, device='cuda:0')
epoch: 138 test_true_pfm: 3456.413533159703 sim_pfm: 97.4517351357111
episode: 552 training return: tensor(202.7588, device='cuda:0')
episode: 553 training return: tensor(127.7398, device='cuda:0')
episode: 554 training return: tensor(228.0199, device='cuda:0')
episode: 555 training return: tensor(248.2858, device='cuda:0')
epoch: 139 test_true_pfm: 3432.2228984669746 sim_pfm: 197.29846898515825
episode: 556 training return: tensor(-262.9710, device='cuda:0')
episode: 557 training return: tensor(240.2036, device='cuda:0')
episode: 558 training return: tensor(225.6317, device='cuda:0')
episode: 559 training return: tensor(174.3826, device='cuda:0')
epoch: 140 test_true_pfm: 3139.9435601423297 sim_pfm: 208.03582537721377
episode: 560 training return: tensor(39.4311, device='cuda:0')
episode: 561 training return: tensor(-129.0008, device='cuda:0')
episode: 562 training return: tensor(229.6484, device='cuda:0')
episode: 563 training return: tensor(140.5034, device='cuda:0')
epoch: 141 test_true_pfm: 3476.756988330572 sim_pfm: 218.71735705792284
episode: 564 training return: tensor(167.3213, device='cuda:0')
episode: 565 training return: tensor(213.2466, device='cuda:0')
episode: 566 training return: tensor(216.8040, device='cuda:0')
episode: 567 training return: tensor(179.7559, device='cuda:0')
epoch: 142 test_true_pfm: 3293.955755181056 sim_pfm: 252.0019115441149
episode: 568 training return: tensor(215.2528, device='cuda:0')
episode: 569 training return: tensor(229.0887, device='cuda:0')
episode: 570 training return: tensor(152.5860, device='cuda:0')
episode: 571 training return: tensor(250.7437, device='cuda:0')
epoch: 143 test_true_pfm: 3413.7486877974716 sim_pfm: 265.64012584106723
episode: 572 training return: tensor(205.3151, device='cuda:0')
episode: 573 training return: tensor(230.7514, device='cuda:0')
episode: 574 training return: tensor(272.9797, device='cuda:0')
episode: 575 training return: tensor(90.3153, device='cuda:0')
epoch: 144 test_true_pfm: 3442.7489877550215 sim_pfm: 265.01801725825254
episode: 576 training return: tensor(235.6754, device='cuda:0')
episode: 577 training return: tensor(123.7641, device='cuda:0')
episode: 578 training return: tensor(206.4390, device='cuda:0')
episode: 579 training return: tensor(293.2019, device='cuda:0')
epoch: 145 test_true_pfm: 3462.830150690728 sim_pfm: 231.379022830748
episode: 580 training return: tensor(-228.3763, device='cuda:0')
episode: 581 training return: tensor(175.5060, device='cuda:0')
episode: 582 training return: tensor(261.4367, device='cuda:0')
episode: 583 training return: tensor(111.5304, device='cuda:0')
epoch: 146 test_true_pfm: 3482.1783807662164 sim_pfm: 248.05132630028916
episode: 584 training return: tensor(199.8353, device='cuda:0')
episode: 585 training return: tensor(269.8820, device='cuda:0')
episode: 586 training return: tensor(277.3954, device='cuda:0')
episode: 587 training return: tensor(291.5398, device='cuda:0')
epoch: 147 test_true_pfm: 3437.200921511734 sim_pfm: 255.06614373877528
episode: 588 training return: tensor(235.3885, device='cuda:0')
episode: 589 training return: tensor(96.2348, device='cuda:0')
episode: 590 training return: tensor(184.5733, device='cuda:0')
episode: 591 training return: tensor(207.3014, device='cuda:0')
epoch: 148 test_true_pfm: 3499.294001678171 sim_pfm: 232.9111426989548
episode: 592 training return: tensor(-131.3446, device='cuda:0')
episode: 593 training return: tensor(250.3518, device='cuda:0')
episode: 594 training return: tensor(116.4857, device='cuda:0')
episode: 595 training return: tensor(147.4391, device='cuda:0')
epoch: 149 test_true_pfm: 3442.273989430652 sim_pfm: 282.1195884671761
episode: 596 training return: tensor(146.4149, device='cuda:0')
episode: 597 training return: tensor(234.3719, device='cuda:0')
episode: 598 training return: tensor(147.2453, device='cuda:0')
episode: 599 training return: tensor(195.8031, device='cuda:0')
epoch: 150 test_true_pfm: 3491.580072869667 sim_pfm: 263.4627343731311
