['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'behavior', '--traj', 'mixed', '--seed', '0', '--data', '100000']
epoch: 0 training_loss 0.23926230028271675 test_loss: 0.1660148024559021
epoch: 1 training_loss 0.14978622056543828 test_loss: 0.1405916690826416
epoch: 2 training_loss 0.12849410075694323 test_loss: 0.12438560724258423
epoch: 3 training_loss 0.12036750998347998 test_loss: 0.14433296918869018
epoch: 4 training_loss 0.11446273870766163 test_loss: 0.11275522708892823
epoch: 5 training_loss 0.11193651143461465 test_loss: 0.11346633434295654
epoch: 6 training_loss 0.11236247602850198 test_loss: 0.11258704662322998
epoch: 7 training_loss 0.10670919723808765 test_loss: 0.1049538254737854
epoch: 8 training_loss 0.11113082263618708 test_loss: 0.10312106609344482
epoch: 9 training_loss 0.10697803411632777 test_loss: 0.11334877014160157
epoch: 10 training_loss 0.10692393008619547 test_loss: 0.10426417589187623
epoch: 11 training_loss 0.10321202836930751 test_loss: 0.10334964990615844
epoch: 12 training_loss 0.10111925240606069 test_loss: 0.10405576229095459
epoch: 13 training_loss 0.09910130321979523 test_loss: 0.09791728258132934
epoch: 14 training_loss 0.10294609377160668 test_loss: 0.10105671882629394
epoch: 15 training_loss 0.09976173039525747 test_loss: 0.09898852705955505
epoch: 16 training_loss 0.10138918466866016 test_loss: 0.10023277997970581
epoch: 17 training_loss 0.09313215220347047 test_loss: 0.10419619083404541
epoch: 18 training_loss 0.09297657266259193 test_loss: 0.09389851689338684
epoch: 19 training_loss 0.09539552170783282 test_loss: 0.0982765793800354
epoch: 20 training_loss 0.09118048716336488 test_loss: 0.09085586071014404
epoch: 21 training_loss 0.09073949411511421 test_loss: 0.09460701942443847
epoch: 22 training_loss 0.08477001227438449 test_loss: 0.08371334075927735
epoch: 23 training_loss 0.09552144486457109 test_loss: 0.09232527613639832
epoch: 24 training_loss 0.09051286293193699 test_loss: 0.09532521367073059
epoch: 25 training_loss 0.09248880747705698 test_loss: 0.09280693531036377
epoch: 26 training_loss 0.08813164435327053 test_loss: 0.08848943710327148
epoch: 27 training_loss 0.08646031491458415 test_loss: 0.08182932734489441
epoch: 28 training_loss 0.0805221520550549 test_loss: 0.08574061393737793
epoch: 29 training_loss 0.0807821417413652 test_loss: 0.0829380452632904
epoch: 30 training_loss 0.08335508910939098 test_loss: 0.09258695244789124
epoch: 31 training_loss 0.07640257742255926 test_loss: 0.0754385232925415
epoch: 32 training_loss 0.07921619141474366 test_loss: 0.08159027695655822
epoch: 33 training_loss 0.07975151427090169 test_loss: 0.08504069447517396
epoch: 34 training_loss 0.07889045093208552 test_loss: 0.0686185598373413
epoch: 35 training_loss 0.0731390942260623 test_loss: 0.0887769877910614
epoch: 36 training_loss 0.0781173058412969 test_loss: 0.07781392335891724
epoch: 37 training_loss 0.06877532564103603 test_loss: 0.07630163431167603
epoch: 38 training_loss 0.07653243778273464 test_loss: 0.08269407749176025
epoch: 39 training_loss 0.07347271449863911 test_loss: 0.08440799713134765
epoch: 40 training_loss 0.07496903954073787 test_loss: 0.08015412092208862
epoch: 41 training_loss 0.07863133703358471 test_loss: 0.07234901785850525
epoch: 42 training_loss 0.07808002894744277 test_loss: 0.0686242938041687
epoch: 43 training_loss 0.07116075120866298 test_loss: 0.05986492037773132
epoch: 44 training_loss 0.07567947445437312 test_loss: 0.07797749042510986
epoch: 45 training_loss 0.07458501962944865 test_loss: 0.07977395057678223
epoch: 46 training_loss 0.06648507500067354 test_loss: 0.06802114248275756
epoch: 47 training_loss 0.07049413286149502 test_loss: 0.08396872878074646
epoch: 48 training_loss 0.07412252197042107 test_loss: 0.06907526254653931
epoch: 49 training_loss 0.06510938430204988 test_loss: 0.07109351754188538
epoch: 50 training_loss 0.07484846509993076 test_loss: 0.07843844294548034
epoch: 51 training_loss 0.0710388765949756 test_loss: 0.07118942737579345
epoch: 52 training_loss 0.07004220802336932 test_loss: 0.06551904678344726
epoch: 53 training_loss 0.06906530376523733 test_loss: 0.07550817728042603
epoch: 54 training_loss 0.06747391190379858 test_loss: 0.07021133899688721
epoch: 55 training_loss 0.07336838517338037 test_loss: 0.07760566473007202
epoch: 56 training_loss 0.06600364997982978 test_loss: 0.06413763165473937
epoch: 57 training_loss 0.07163118923082948 test_loss: 0.0788514792919159
epoch: 58 training_loss 0.07056386534124613 test_loss: 0.06351395845413207
epoch: 59 training_loss 0.06897664006799459 test_loss: 0.07961588501930236
epoch: 60 training_loss 0.06717805054038763 test_loss: 0.06645920276641845
epoch: 61 training_loss 0.06904562283307314 test_loss: 0.07630079984664917
epoch: 62 training_loss 0.06621812531724572 test_loss: 0.07426068782806397
epoch: 63 training_loss 0.06812074368819594 test_loss: 0.06676023006439209
epoch: 64 training_loss 0.06943317422643304 test_loss: 0.07799304723739624
epoch: 65 training_loss 0.06797323435544968 test_loss: 0.07216206789016724
epoch: 66 training_loss 0.06736762991175055 test_loss: 0.08345943093299865
epoch: 67 training_loss 0.06535753162577748 test_loss: 0.07682470083236695
epoch: 68 training_loss 0.06870858563110233 test_loss: 0.07263404130935669
epoch: 69 training_loss 0.06627331763505935 test_loss: 0.06461887359619141
epoch: 70 training_loss 0.06622461579740048 test_loss: 0.06260085105895996
epoch: 71 training_loss 0.07152119243517518 test_loss: 0.07288269400596618
epoch: 72 training_loss 0.06881940972991288 test_loss: 0.061527210474014285
epoch: 73 training_loss 0.06562821563333272 test_loss: 0.06689659357070923
epoch: 74 training_loss 0.07022998464293778 test_loss: 0.062150591611862184
epoch: 75 training_loss 0.06586236383765937 test_loss: 0.06685447692871094
epoch: 76 training_loss 0.06808272825554013 test_loss: 0.08046863675117492
epoch: 77 training_loss 0.06709674872457981 test_loss: 0.06661618947982788
epoch: 78 training_loss 0.06559585250914096 test_loss: 0.06839591264724731
epoch: 79 training_loss 0.06841143658384681 test_loss: 0.0676269769668579
epoch: 80 training_loss 0.06355056012514979 test_loss: 0.07095913290977478
epoch: 81 training_loss 0.06795487134717405 test_loss: 0.07978296875953675
epoch: 82 training_loss 0.06977892935276031 test_loss: 0.057195842266082764
epoch: 83 training_loss 0.06673954397439957 test_loss: 0.06381494402885438
epoch: 84 training_loss 0.06718812794424593 test_loss: 0.07164384722709656
epoch: 85 training_loss 0.06571415543556214 test_loss: 0.07211664319038391
epoch: 86 training_loss 0.06254249597899615 test_loss: 0.0616873025894165
epoch: 87 training_loss 0.06385878706350923 test_loss: 0.06814957857131958
epoch: 88 training_loss 0.06553813208825886 test_loss: 0.0676790714263916
epoch: 89 training_loss 0.07201324597001076 test_loss: 0.07882803082466125
epoch: 90 training_loss 0.06203412554692477 test_loss: 0.061447107791900636
epoch: 91 training_loss 0.06452315201982856 test_loss: 0.0714725375175476
epoch: 92 training_loss 0.06598910111933946 test_loss: 0.06530499458312988
epoch: 93 training_loss 0.06792152918875217 test_loss: 0.07014803886413574
epoch: 94 training_loss 0.06589986495673657 test_loss: 0.06859026551246643
epoch: 95 training_loss 0.06386402446776629 test_loss: 0.07047865390777588
epoch: 96 training_loss 0.06186905073001981 test_loss: 0.07008411288261414
epoch: 97 training_loss 0.06439471794292331 test_loss: 0.06991223096847535
epoch: 98 training_loss 0.06664608940482139 test_loss: 0.07157273888587952
epoch: 99 training_loss 0.07353069821372628 test_loss: 0.06243535876274109
epoch: 100 training_loss 0.06805867772549391 test_loss: 0.061160719394683837
epoch: 101 training_loss 0.05991518447175622 test_loss: 0.06995294690132141
epoch: 102 training_loss 0.06712619546800852 test_loss: 0.06390770077705384
epoch: 103 training_loss 0.06795482419431209 test_loss: 0.06550373435020447
epoch: 104 training_loss 0.07069838754832744 test_loss: 0.07690908908843994
epoch: 105 training_loss 0.06132310587912798 test_loss: 0.07279828786849976
epoch: 106 training_loss 0.06880597244948149 test_loss: 0.06950386762619018
epoch: 107 training_loss 0.06680271446239203 test_loss: 0.07117166519165039
epoch: 108 training_loss 0.0668660469725728 test_loss: 0.07196086049079894
epoch: 109 training_loss 0.06352066995576024 test_loss: 0.07346546649932861
epoch: 110 training_loss 0.0668616545572877 test_loss: 0.06714311838150025
epoch: 111 training_loss 0.06499234959483147 test_loss: 0.0719055712223053
epoch: 112 training_loss 0.06191500960849226 test_loss: 0.06307747960090637
epoch: 113 training_loss 0.06595676057040692 test_loss: 0.07175467610359192
epoch: 114 training_loss 0.0730531381862238 test_loss: 0.06932303905487061
epoch: 115 training_loss 0.06178749805316329 test_loss: 0.06883321404457092
epoch: 116 training_loss 0.0681029411032796 test_loss: 0.07841982245445252
epoch: 117 training_loss 0.06945217986591161 test_loss: 0.06362371444702149
epoch: 118 training_loss 0.06825267696753144 test_loss: 0.06944875121116638
epoch: 119 training_loss 0.06455143958330155 test_loss: 0.061282283067703246
epoch: 120 training_loss 0.06279583340510726 test_loss: 0.07205446362495423
epoch: 121 training_loss 0.06467016946524382 test_loss: 0.06431356072425842
epoch: 122 training_loss 0.06746578216552734 test_loss: 0.06496657133102417
epoch: 123 training_loss 0.06624416418373585 test_loss: 0.0646823525428772
epoch: 124 training_loss 0.06596520575694739 test_loss: 0.06497028470039368
epoch: 125 training_loss 0.06277221001684666 test_loss: 0.06895039081573487
epoch: 126 training_loss 0.06744344199541956 test_loss: 0.07745715975761414
epoch: 127 training_loss 0.06296555638313293 test_loss: 0.05780596137046814
epoch: 128 training_loss 0.06598752322606742 test_loss: 0.06475746035575866
epoch: 129 training_loss 0.07197008840739727 test_loss: 0.06390237212181091
epoch: 130 training_loss 0.06359537532553076 test_loss: 0.06829763650894165
epoch: 131 training_loss 0.06579311529174448 test_loss: 0.0703236222267151
epoch: 132 training_loss 0.06430570995435118 test_loss: 0.0750741183757782
epoch: 133 training_loss 0.06493736742064357 test_loss: 0.0697118878364563
epoch: 134 training_loss 0.06817286841571331 test_loss: 0.06214691996574402
epoch: 135 training_loss 0.06576895111240447 test_loss: 0.07362411618232727
epoch: 136 training_loss 0.06302896801382303 test_loss: 0.051974755525588986
epoch: 137 training_loss 0.0645946685038507 test_loss: 0.07292140126228333
epoch: 138 training_loss 0.06251479687169194 test_loss: 0.07442120909690857
epoch: 139 training_loss 0.06325694827362895 test_loss: 0.06952708959579468
epoch: 140 training_loss 0.06481194712221622 test_loss: 0.06552118062973022
epoch: 141 training_loss 0.06453729905188084 test_loss: 0.06928690671920776
epoch: 142 training_loss 0.0653550806734711 test_loss: 0.06517564058303833
epoch: 143 training_loss 0.06562790123745799 test_loss: 0.06384323239326477
epoch: 144 training_loss 0.06220286631025374 test_loss: 0.06268510818481446
epoch: 145 training_loss 0.06252457749098539 test_loss: 0.05924510359764099
epoch: 146 training_loss 0.062337263906374576 test_loss: 0.07320655584335327
epoch: 147 training_loss 0.0682201424986124 test_loss: 0.06278643608093262
epoch: 148 training_loss 0.06163288544863463 test_loss: 0.05517475008964538
epoch: 149 training_loss 0.06685005630366504 test_loss: 0.07140676975250244
epoch: 0 training_loss 57.06504411697388 test_loss: 34.99703369140625
epoch: 1 training_loss 26.94844497680664 test_loss: 22.45589599609375
epoch: 2 training_loss 19.93019319534302 test_loss: 18.179586791992186
epoch: 3 training_loss 16.68056697845459 test_loss: 15.49794921875
epoch: 4 training_loss 14.636027011871338 test_loss: 13.934544372558594
epoch: 5 training_loss 13.41919900894165 test_loss: 12.486450958251954
epoch: 6 training_loss 12.212734842300415 test_loss: 11.7
epoch: 7 training_loss 11.078627653121949 test_loss: 10.726150512695312
epoch: 8 training_loss 10.246199960708617 test_loss: 9.858601379394532
epoch: 9 training_loss 9.652930784225465 test_loss: 9.158563995361328
epoch: 10 training_loss 9.033721356391906 test_loss: 8.72042236328125
epoch: 11 training_loss 8.665814142227173 test_loss: 8.467388916015626
epoch: 12 training_loss 8.174333634376525 test_loss: 7.996869659423828
epoch: 13 training_loss 7.860625343322754 test_loss: 7.440045166015625
epoch: 14 training_loss 7.4522886037826535 test_loss: 7.534618377685547
epoch: 15 training_loss 7.19052098274231 test_loss: 7.116422271728515
epoch: 16 training_loss 6.988440451622009 test_loss: 6.9468231201171875
epoch: 17 training_loss 6.664175443649292 test_loss: 6.603773498535157
epoch: 18 training_loss 6.50613429069519 test_loss: 6.569159698486328
epoch: 19 training_loss 6.404283838272095 test_loss: 6.585226440429688
epoch: 20 training_loss 6.306048502922058 test_loss: 6.365572357177735
epoch: 21 training_loss 6.118014273643493 test_loss: 6.176444244384766
epoch: 22 training_loss 6.127118754386902 test_loss: 5.978462600708008
epoch: 23 training_loss 5.939589657783508 test_loss: 5.572320938110352
epoch: 24 training_loss 5.789257836341858 test_loss: 5.800798797607422
epoch: 25 training_loss 5.688885369300842 test_loss: 5.61390495300293
epoch: 26 training_loss 5.577202072143555 test_loss: 5.468254089355469
epoch: 27 training_loss 5.5786192417144775 test_loss: 5.3507545471191404
epoch: 28 training_loss 5.454350528717041 test_loss: 5.2992103576660154
epoch: 29 training_loss 5.353209090232849 test_loss: 5.204288101196289
epoch: 30 training_loss 5.344235973358154 test_loss: 5.160463333129883
epoch: 31 training_loss 5.181635808944702 test_loss: 5.156469345092773
epoch: 32 training_loss 5.155827126502991 test_loss: 5.2758338928222654
epoch: 33 training_loss 5.1310627865791325 test_loss: 4.974837875366211
epoch: 34 training_loss 4.983061690330505 test_loss: 5.172872924804688
epoch: 35 training_loss 4.905894718170166 test_loss: 4.715639114379883
epoch: 36 training_loss 4.968607931137085 test_loss: 4.843975448608399
epoch: 37 training_loss 4.791425545215606 test_loss: 4.767219924926758
epoch: 38 training_loss 4.825303356647492 test_loss: 4.8935791015625
epoch: 39 training_loss 4.729560956954956 test_loss: 4.723723220825195
epoch: 40 training_loss 4.788355009555817 test_loss: 4.694062042236328
epoch: 41 training_loss 4.58900285243988 test_loss: 4.698305892944336
epoch: 42 training_loss 4.583536145687103 test_loss: 4.496098327636719
epoch: 43 training_loss 4.650472674369812 test_loss: 4.582220840454101
epoch: 44 training_loss 4.573823826313019 test_loss: 4.516553115844727
epoch: 45 training_loss 4.549073522090912 test_loss: 4.247774124145508
epoch: 46 training_loss 4.499750866889953 test_loss: 4.341856002807617
epoch: 47 training_loss 4.466562306880951 test_loss: 4.300431442260742
epoch: 48 training_loss 4.384504680633545 test_loss: 4.199941635131836
epoch: 49 training_loss 4.348699176311493 test_loss: 4.375830459594726
epoch: 50 training_loss 4.332612030506134 test_loss: 4.241046524047851
epoch: 51 training_loss 4.341996824741363 test_loss: 4.2820289611816404
epoch: 52 training_loss 4.314381754398346 test_loss: 4.394812774658203
epoch: 53 training_loss 4.157119801044464 test_loss: 4.149745178222656
epoch: 54 training_loss 4.151041476726532 test_loss: 4.264068603515625
epoch: 55 training_loss 4.092829215526581 test_loss: 4.068896484375
epoch: 56 training_loss 4.10313723564148 test_loss: 4.030649948120117
epoch: 57 training_loss 4.034536116123199 test_loss: 3.9810855865478514
epoch: 58 training_loss 4.047868633270264 test_loss: 3.991986846923828
epoch: 59 training_loss 4.070955529212951 test_loss: 4.17553939819336
epoch: 60 training_loss 3.9113487339019777 test_loss: 4.022664260864258
epoch: 61 training_loss 3.9092093110084534 test_loss: 3.9205032348632813
epoch: 62 training_loss 3.9437895727157595 test_loss: 4.020577239990234
epoch: 63 training_loss 3.9499609446525574 test_loss: 3.9489990234375
epoch: 64 training_loss 3.966606252193451 test_loss: 3.8967975616455077
epoch: 65 training_loss 3.9472207903861998 test_loss: 3.942378616333008
epoch: 66 training_loss 3.851351010799408 test_loss: 3.909855270385742
epoch: 67 training_loss 3.779260277748108 test_loss: 3.725537109375
epoch: 68 training_loss 3.9047573828697204 test_loss: 4.15606803894043
epoch: 69 training_loss 3.8063538336753844 test_loss: 3.835275650024414
epoch: 70 training_loss 3.782137906551361 test_loss: 3.6559803009033205
epoch: 71 training_loss 3.7217674803733827 test_loss: 3.937199020385742
epoch: 72 training_loss 3.7862172961235045 test_loss: 3.8373077392578123
epoch: 73 training_loss 3.6626587653160096 test_loss: 3.7183223724365235
epoch: 74 training_loss 3.7641866993904114 test_loss: 3.685421371459961
epoch: 75 training_loss 3.6531133961677553 test_loss: 3.668527603149414
epoch: 76 training_loss 3.7340814876556396 test_loss: 3.7608734130859376
epoch: 77 training_loss 3.598554513454437 test_loss: 3.536955642700195
epoch: 78 training_loss 3.7180298519134523 test_loss: 3.6054351806640623
epoch: 79 training_loss 3.6561002039909365 test_loss: 3.5416072845458983
epoch: 80 training_loss 3.611716446876526 test_loss: 3.76146240234375
epoch: 81 training_loss 3.6692022943496703 test_loss: 3.701917266845703
epoch: 82 training_loss 3.5490347003936766 test_loss: 3.613315963745117
epoch: 83 training_loss 3.69865752696991 test_loss: 3.4730396270751953
epoch: 84 training_loss 3.519445652961731 test_loss: 3.459482955932617
epoch: 85 training_loss 3.530592827796936 test_loss: 3.5264251708984373
epoch: 86 training_loss 3.517352623939514 test_loss: 3.4874534606933594
epoch: 87 training_loss 3.5321388912200926 test_loss: 3.4390220642089844
epoch: 88 training_loss 3.506139359474182 test_loss: 3.4522823333740233
epoch: 89 training_loss 3.4269180727005004 test_loss: 3.399686813354492
epoch: 90 training_loss 3.435853168964386 test_loss: 3.3805110931396483
epoch: 91 training_loss 3.5000357913970945 test_loss: 3.545259475708008
epoch: 92 training_loss 3.4800686287879943 test_loss: 3.4722454071044924
epoch: 93 training_loss 3.422084493637085 test_loss: 3.2432857513427735
epoch: 94 training_loss 3.472338309288025 test_loss: 3.275399017333984
epoch: 95 training_loss 3.4014582991600038 test_loss: 3.5287750244140623
epoch: 96 training_loss 3.387248990535736 test_loss: 3.3508010864257813
epoch: 97 training_loss 3.3770464706420897 test_loss: 3.350646209716797
epoch: 98 training_loss 3.425888116359711 test_loss: 3.2077938079833985
epoch: 99 training_loss 3.3971628975868224 test_loss: 3.6229583740234377
epoch: 100 training_loss 3.391587481498718 test_loss: 3.3496170043945312
epoch: 101 training_loss 3.3291185808181765 test_loss: 3.469820022583008
epoch: 102 training_loss 3.3345508408546447 test_loss: 3.268524169921875
epoch: 103 training_loss 3.2508848476409913 test_loss: 3.2576732635498047
epoch: 104 training_loss 3.247872061729431 test_loss: 3.198977470397949
epoch: 105 training_loss 3.281411862373352 test_loss: 3.3450344085693358
epoch: 106 training_loss 3.2557100462913513 test_loss: 3.192257118225098
epoch: 107 training_loss 3.2114626479148867 test_loss: 3.290439987182617
epoch: 108 training_loss 3.287007038593292 test_loss: 3.37042236328125
epoch: 109 training_loss 3.2683930683135984 test_loss: 3.225138473510742
epoch: 110 training_loss 3.292858848571777 test_loss: 3.24334831237793
epoch: 111 training_loss 3.287650632858276 test_loss: 3.209511947631836
epoch: 112 training_loss 3.222892026901245 test_loss: 3.2393157958984373
epoch: 113 training_loss 3.166660578250885 test_loss: 3.3329498291015627
epoch: 114 training_loss 3.3199510884284975 test_loss: 3.15478401184082
epoch: 115 training_loss 3.244854407310486 test_loss: 3.283578109741211
epoch: 116 training_loss 3.212224051952362 test_loss: 3.166026306152344
epoch: 117 training_loss 3.2157826733589174 test_loss: 3.176010322570801
epoch: 118 training_loss 3.145705177783966 test_loss: 3.014856147766113
epoch: 119 training_loss 3.1191881918907165 test_loss: 3.1840829849243164
epoch: 120 training_loss 3.268606252670288 test_loss: 3.146285057067871
epoch: 121 training_loss 3.1672508311271668 test_loss: 3.154262924194336
epoch: 122 training_loss 3.168203363418579 test_loss: 3.1079769134521484
epoch: 123 training_loss 3.171193292140961 test_loss: 3.0269472122192385
epoch: 124 training_loss 3.133891441822052 test_loss: 3.1951459884643554
epoch: 125 training_loss 3.0649435138702392 test_loss: 3.148616981506348
epoch: 126 training_loss 3.1078186511993406 test_loss: 3.2946041107177733
epoch: 127 training_loss 3.1509112334251403 test_loss: 3.059579277038574
epoch: 128 training_loss 3.061044526100159 test_loss: 3.066865921020508
epoch: 129 training_loss 3.132983512878418 test_loss: 3.072785758972168
epoch: 130 training_loss 3.111591863632202 test_loss: 3.083867645263672
epoch: 131 training_loss 3.117965536117554 test_loss: 3.05889778137207
epoch: 132 training_loss 3.04699405670166 test_loss: 3.1393150329589843
epoch: 133 training_loss 3.1465036773681643 test_loss: 3.0377578735351562
epoch: 134 training_loss 3.0422845911979675 test_loss: 3.0371423721313477
epoch: 135 training_loss 2.9286630868911745 test_loss: 2.9617382049560548
epoch: 136 training_loss 2.9980525016784667 test_loss: 3.0625905990600586
epoch: 137 training_loss 3.0556411075592043 test_loss: 2.9432687759399414
epoch: 138 training_loss 2.9860473132133483 test_loss: 2.955453872680664
epoch: 139 training_loss 3.014613676071167 test_loss: 2.889164161682129
epoch: 140 training_loss 2.9828523325920107 test_loss: 3.095756721496582
epoch: 141 training_loss 3.0461134338378906 test_loss: 3.0369150161743166
epoch: 142 training_loss 2.9693004584312437 test_loss: 2.981781005859375
epoch: 143 training_loss 2.9903970909118653 test_loss: 2.9304166793823243
epoch: 144 training_loss 2.975464954376221 test_loss: 3.009431838989258
epoch: 145 training_loss 2.983358917236328 test_loss: 2.9906200408935546
epoch: 146 training_loss 3.017710726261139 test_loss: 3.1015716552734376
epoch: 147 training_loss 2.9751614046096804 test_loss: 2.935447883605957
epoch: 148 training_loss 2.926843140125275 test_loss: 2.9296274185180664
epoch: 149 training_loss 2.9455553340911864 test_loss: 3.014829635620117
5175.58933392692
episode: 0 training return: tensor(-536.4761, device='cuda:0')
episode: 1 training return: tensor(-427.6129, device='cuda:0')
episode: 2 training return: tensor(-568.0882, device='cuda:0')
episode: 3 training return: tensor(-480.9160, device='cuda:0')
epoch: 1 test_true_pfm: 5190.96978834642 sim_pfm: -413.81368810206186
episode: 4 training return: tensor(-521.8722, device='cuda:0')
episode: 5 training return: tensor(-422.4854, device='cuda:0')
episode: 6 training return: tensor(-436.7009, device='cuda:0')
episode: 7 training return: tensor(-483.7281, device='cuda:0')
epoch: 2 test_true_pfm: 5268.420023525713 sim_pfm: -440.70893389574485
episode: 8 training return: tensor(-479.2177, device='cuda:0')
episode: 9 training return: tensor(-500.4268, device='cuda:0')
episode: 10 training return: tensor(-481.3505, device='cuda:0')
episode: 11 training return: tensor(-452.1988, device='cuda:0')
epoch: 3 test_true_pfm: 5291.046403627646 sim_pfm: -394.9455869857047
episode: 12 training return: tensor(-525.4446, device='cuda:0')
episode: 13 training return: tensor(-472.6342, device='cuda:0')
episode: 14 training return: tensor(-393.5290, device='cuda:0')
episode: 15 training return: tensor(-408.8719, device='cuda:0')
epoch: 4 test_true_pfm: 4836.37417518265 sim_pfm: -387.57182285337086
episode: 16 training return: tensor(-464.9927, device='cuda:0')
episode: 17 training return: tensor(-453.5665, device='cuda:0')
episode: 18 training return: tensor(-526.4723, device='cuda:0')
episode: 19 training return: tensor(-579.1031, device='cuda:0')
epoch: 5 test_true_pfm: 5076.143217625618 sim_pfm: -423.68039999335696
episode: 20 training return: tensor(-467.3224, device='cuda:0')
episode: 21 training return: tensor(-584.9096, device='cuda:0')
episode: 22 training return: tensor(-480.1558, device='cuda:0')
episode: 23 training return: tensor(-536.6564, device='cuda:0')
epoch: 6 test_true_pfm: 5231.819555460274 sim_pfm: -332.57074519094505
episode: 24 training return: tensor(-573.6360, device='cuda:0')
episode: 25 training return: tensor(-451.0425, device='cuda:0')
episode: 26 training return: tensor(-484.3450, device='cuda:0')
episode: 27 training return: tensor(-421.0464, device='cuda:0')
epoch: 7 test_true_pfm: 5271.464005486289 sim_pfm: -524.0053257765248
episode: 28 training return: tensor(-492.5385, device='cuda:0')
episode: 29 training return: tensor(-360.5648, device='cuda:0')
episode: 30 training return: tensor(-371.0757, device='cuda:0')
episode: 31 training return: tensor(-290.2825, device='cuda:0')
epoch: 8 test_true_pfm: 5291.326333301404 sim_pfm: -409.72474472183967
episode: 32 training return: tensor(-386.7768, device='cuda:0')
episode: 33 training return: tensor(-463.0981, device='cuda:0')
episode: 34 training return: tensor(-316.1786, device='cuda:0')
episode: 35 training return: tensor(-331.5848, device='cuda:0')
epoch: 9 test_true_pfm: 5226.24741760414 sim_pfm: -442.1035885179784
episode: 36 training return: tensor(-402.0323, device='cuda:0')
episode: 37 training return: tensor(-322.6869, device='cuda:0')
episode: 38 training return: tensor(-480.1660, device='cuda:0')
episode: 39 training return: tensor(-326.6304, device='cuda:0')
epoch: 10 test_true_pfm: 5129.588050513553 sim_pfm: -435.98660996131366
episode: 40 training return: tensor(-505.7368, device='cuda:0')
episode: 41 training return: tensor(-496.6178, device='cuda:0')
episode: 42 training return: tensor(-213.8188, device='cuda:0')
episode: 43 training return: tensor(-539.8128, device='cuda:0')
epoch: 11 test_true_pfm: 5298.149342065061 sim_pfm: -401.5326691534913
episode: 44 training return: tensor(-490.5525, device='cuda:0')
episode: 45 training return: tensor(-541.5658, device='cuda:0')
episode: 46 training return: tensor(-439.9795, device='cuda:0')
episode: 47 training return: tensor(-385.8147, device='cuda:0')
epoch: 12 test_true_pfm: 5402.2175438751865 sim_pfm: -383.72019439180923
episode: 48 training return: tensor(-457.6404, device='cuda:0')
episode: 49 training return: tensor(-342.8210, device='cuda:0')
episode: 50 training return: tensor(-393.9815, device='cuda:0')
episode: 51 training return: tensor(-491.3846, device='cuda:0')
epoch: 13 test_true_pfm: 5322.048337624271 sim_pfm: -401.26330595488736
episode: 52 training return: tensor(-374.0199, device='cuda:0')
episode: 53 training return: tensor(-480.0396, device='cuda:0')
episode: 54 training return: tensor(-408.4792, device='cuda:0')
episode: 55 training return: tensor(-432.6593, device='cuda:0')
epoch: 14 test_true_pfm: 5359.02347423229 sim_pfm: -350.9161221628504
episode: 56 training return: tensor(-476.6263, device='cuda:0')
episode: 57 training return: tensor(-475.7534, device='cuda:0')
episode: 58 training return: tensor(-423.9399, device='cuda:0')
episode: 59 training return: tensor(-389.2826, device='cuda:0')
epoch: 15 test_true_pfm: 5316.023552428836 sim_pfm: -326.9976618837488
episode: 60 training return: tensor(-367.1448, device='cuda:0')
episode: 61 training return: tensor(-450.9955, device='cuda:0')
episode: 62 training return: tensor(-339.5403, device='cuda:0')
episode: 63 training return: tensor(-458.7203, device='cuda:0')
epoch: 16 test_true_pfm: 5435.041253407316 sim_pfm: -334.6543140191061
episode: 64 training return: tensor(-382.6229, device='cuda:0')
episode: 65 training return: tensor(-491.4280, device='cuda:0')
episode: 66 training return: tensor(-388.2179, device='cuda:0')
episode: 67 training return: tensor(-337.4587, device='cuda:0')
epoch: 17 test_true_pfm: 5405.636366454775 sim_pfm: -336.88287120559835
episode: 68 training return: tensor(-390.7993, device='cuda:0')
episode: 69 training return: tensor(-427.1674, device='cuda:0')
episode: 70 training return: tensor(-352.0254, device='cuda:0')
episode: 71 training return: tensor(-460.2893, device='cuda:0')
epoch: 18 test_true_pfm: 5363.414079685709 sim_pfm: -312.71866632218007
episode: 72 training return: tensor(-461.4912, device='cuda:0')
episode: 73 training return: tensor(-399.0909, device='cuda:0')
episode: 74 training return: tensor(-432.1105, device='cuda:0')
episode: 75 training return: tensor(-411.8680, device='cuda:0')
epoch: 19 test_true_pfm: 5357.072429155677 sim_pfm: -300.7937725695762
episode: 76 training return: tensor(-398.9952, device='cuda:0')
episode: 77 training return: tensor(-450.2346, device='cuda:0')
episode: 78 training return: tensor(-395.9511, device='cuda:0')
episode: 79 training return: tensor(-424.5383, device='cuda:0')
epoch: 20 test_true_pfm: 5273.087709240728 sim_pfm: -323.18243539622443
episode: 80 training return: tensor(-383.1544, device='cuda:0')
episode: 81 training return: tensor(-473.4582, device='cuda:0')
episode: 82 training return: tensor(-316.2752, device='cuda:0')
episode: 83 training return: tensor(-287.1211, device='cuda:0')
epoch: 21 test_true_pfm: 5442.528715666454 sim_pfm: -347.4880127369058
episode: 84 training return: tensor(-304.9129, device='cuda:0')
episode: 85 training return: tensor(-441.4955, device='cuda:0')
episode: 86 training return: tensor(-444.0250, device='cuda:0')
episode: 87 training return: tensor(-419.5754, device='cuda:0')
epoch: 22 test_true_pfm: 5282.8608339153625 sim_pfm: -351.45560534416774
episode: 88 training return: tensor(-417.7096, device='cuda:0')
episode: 89 training return: tensor(-374.1951, device='cuda:0')
episode: 90 training return: tensor(-352.7744, device='cuda:0')
episode: 91 training return: tensor(-476.4322, device='cuda:0')
epoch: 23 test_true_pfm: 5380.992663515922 sim_pfm: -386.6495137240466
episode: 92 training return: tensor(-465.0221, device='cuda:0')
episode: 93 training return: tensor(-379.9336, device='cuda:0')
episode: 94 training return: tensor(-407.6118, device='cuda:0')
episode: 95 training return: tensor(-433.3141, device='cuda:0')
epoch: 24 test_true_pfm: 5430.121984393374 sim_pfm: -245.83281143801287
episode: 96 training return: tensor(-437.4590, device='cuda:0')
episode: 97 training return: tensor(-384.2119, device='cuda:0')
episode: 98 training return: tensor(-269.1499, device='cuda:0')
episode: 99 training return: tensor(-465.3954, device='cuda:0')
epoch: 25 test_true_pfm: 5359.290986137205 sim_pfm: -306.36848275764106
episode: 100 training return: tensor(-431.1722, device='cuda:0')
episode: 101 training return: tensor(-452.6856, device='cuda:0')
episode: 102 training return: tensor(-313.8349, device='cuda:0')
episode: 103 training return: tensor(-413.9576, device='cuda:0')
epoch: 26 test_true_pfm: 5455.204741148369 sim_pfm: -365.99750276936294
episode: 104 training return: tensor(-459.3939, device='cuda:0')
episode: 105 training return: tensor(-384.3191, device='cuda:0')
episode: 106 training return: tensor(-420.9284, device='cuda:0')
episode: 107 training return: tensor(-372.6510, device='cuda:0')
epoch: 27 test_true_pfm: 4175.780455512056 sim_pfm: -295.66275860924117
episode: 108 training return: tensor(-366.8363, device='cuda:0')
episode: 109 training return: tensor(-379.4584, device='cuda:0')
episode: 110 training return: tensor(-341.2302, device='cuda:0')
episode: 111 training return: tensor(-396.0564, device='cuda:0')
epoch: 28 test_true_pfm: 5468.159983497386 sim_pfm: -374.16119278850965
episode: 112 training return: tensor(-294.6271, device='cuda:0')
episode: 113 training return: tensor(-405.4236, device='cuda:0')
episode: 114 training return: tensor(-464.8174, device='cuda:0')
episode: 115 training return: tensor(-373.7002, device='cuda:0')
epoch: 29 test_true_pfm: 5415.804190079588 sim_pfm: -291.0637680848692
episode: 116 training return: tensor(-368.1119, device='cuda:0')
episode: 117 training return: tensor(-462.8329, device='cuda:0')
episode: 118 training return: tensor(-442.3181, device='cuda:0')
episode: 119 training return: tensor(-453.4666, device='cuda:0')
epoch: 30 test_true_pfm: 5409.252921125042 sim_pfm: -366.22331867391284
episode: 120 training return: tensor(-406.4735, device='cuda:0')
episode: 121 training return: tensor(-494.1353, device='cuda:0')
episode: 122 training return: tensor(-286.1739, device='cuda:0')
episode: 123 training return: tensor(-345.4125, device='cuda:0')
epoch: 31 test_true_pfm: 5425.200929474205 sim_pfm: -312.3273162978876
episode: 124 training return: tensor(-348.7444, device='cuda:0')
episode: 125 training return: tensor(-333.5739, device='cuda:0')
episode: 126 training return: tensor(-365.4544, device='cuda:0')
episode: 127 training return: tensor(-431.4626, device='cuda:0')
epoch: 32 test_true_pfm: 5429.046213836279 sim_pfm: -269.28261644939386
episode: 128 training return: tensor(-330.3167, device='cuda:0')
episode: 129 training return: tensor(-418.4914, device='cuda:0')
episode: 130 training return: tensor(-381.9482, device='cuda:0')
episode: 131 training return: tensor(-317.0386, device='cuda:0')
epoch: 33 test_true_pfm: 6343.074057041636 sim_pfm: -368.0805044204365
episode: 132 training return: tensor(-248.0189, device='cuda:0')
episode: 133 training return: tensor(-378.5269, device='cuda:0')
episode: 134 training return: tensor(-293.7004, device='cuda:0')
episode: 135 training return: tensor(-368.8174, device='cuda:0')
epoch: 34 test_true_pfm: 5954.06436106003 sim_pfm: -270.06173405671143
episode: 136 training return: tensor(-404.1240, device='cuda:0')
episode: 137 training return: tensor(-412.5436, device='cuda:0')
episode: 138 training return: tensor(-310.4560, device='cuda:0')
episode: 139 training return: tensor(-337.2379, device='cuda:0')
epoch: 35 test_true_pfm: 5402.419116614757 sim_pfm: -270.70711823234643
episode: 140 training return: tensor(-353.2387, device='cuda:0')
episode: 141 training return: tensor(-406.2808, device='cuda:0')
episode: 142 training return: tensor(-415.1413, device='cuda:0')
episode: 143 training return: tensor(-300.6321, device='cuda:0')
epoch: 36 test_true_pfm: 5448.967989451224 sim_pfm: -331.35769013889757
episode: 144 training return: tensor(-468.7204, device='cuda:0')
episode: 145 training return: tensor(-427.6969, device='cuda:0')
episode: 146 training return: tensor(-460.5759, device='cuda:0')
episode: 147 training return: tensor(-361.4811, device='cuda:0')
epoch: 37 test_true_pfm: 5490.29817750675 sim_pfm: -189.15030395934204
episode: 148 training return: tensor(-421.2302, device='cuda:0')
episode: 149 training return: tensor(-440.6970, device='cuda:0')
episode: 150 training return: tensor(-395.5174, device='cuda:0')
episode: 151 training return: tensor(-387.6241, device='cuda:0')
epoch: 38 test_true_pfm: 5385.246605589434 sim_pfm: -380.95753306226106
episode: 152 training return: tensor(-366.4706, device='cuda:0')
episode: 153 training return: tensor(-377.5172, device='cuda:0')
episode: 154 training return: tensor(-506.7992, device='cuda:0')
episode: 155 training return: tensor(-307.2566, device='cuda:0')
epoch: 39 test_true_pfm: 5505.749357621532 sim_pfm: -300.9430510694704
episode: 156 training return: tensor(-399.7820, device='cuda:0')
episode: 157 training return: tensor(-446.3607, device='cuda:0')
episode: 158 training return: tensor(-364.7624, device='cuda:0')
episode: 159 training return: tensor(-459.7545, device='cuda:0')
epoch: 40 test_true_pfm: 5301.371850028029 sim_pfm: -336.8169636095602
episode: 160 training return: tensor(-318.7281, device='cuda:0')
episode: 161 training return: tensor(-316.7644, device='cuda:0')
episode: 162 training return: tensor(-370.2518, device='cuda:0')
episode: 163 training return: tensor(-373.8676, device='cuda:0')
epoch: 41 test_true_pfm: 5751.3617089590925 sim_pfm: -245.1166693752845
episode: 164 training return: tensor(-335.6144, device='cuda:0')
episode: 165 training return: tensor(-299.8780, device='cuda:0')
episode: 166 training return: tensor(-526.5441, device='cuda:0')
episode: 167 training return: tensor(-337.0691, device='cuda:0')
epoch: 42 test_true_pfm: 5677.449667658225 sim_pfm: -262.9982839348765
episode: 168 training return: tensor(-400.3858, device='cuda:0')
episode: 169 training return: tensor(-383.1873, device='cuda:0')
episode: 170 training return: tensor(-460.7902, device='cuda:0')
episode: 171 training return: tensor(-433.3755, device='cuda:0')
epoch: 43 test_true_pfm: 5449.269965995748 sim_pfm: -243.09080426592845
episode: 172 training return: tensor(-289.7993, device='cuda:0')
episode: 173 training return: tensor(-324.7660, device='cuda:0')
episode: 174 training return: tensor(-327.2349, device='cuda:0')
episode: 175 training return: tensor(-395.1222, device='cuda:0')
epoch: 44 test_true_pfm: 5448.896551000474 sim_pfm: -268.3451702041978
episode: 176 training return: tensor(-344.3181, device='cuda:0')
episode: 177 training return: tensor(-316.5353, device='cuda:0')
episode: 178 training return: tensor(-368.5713, device='cuda:0')
episode: 179 training return: tensor(-422.7836, device='cuda:0')
epoch: 45 test_true_pfm: 5423.33151690698 sim_pfm: -376.0660405002321
episode: 180 training return: tensor(-341.4437, device='cuda:0')
episode: 181 training return: tensor(-326.5341, device='cuda:0')
episode: 182 training return: tensor(-326.8791, device='cuda:0')
episode: 183 training return: tensor(-360.0295, device='cuda:0')
epoch: 46 test_true_pfm: 5570.917050416444 sim_pfm: -203.06756901643044
episode: 184 training return: tensor(-310.1057, device='cuda:0')
episode: 185 training return: tensor(-320.2488, device='cuda:0')
episode: 186 training return: tensor(-383.5843, device='cuda:0')
episode: 187 training return: tensor(-303.1106, device='cuda:0')
epoch: 47 test_true_pfm: 5573.202146830464 sim_pfm: -258.83222985145403
episode: 188 training return: tensor(-252.8802, device='cuda:0')
episode: 189 training return: tensor(-297.5938, device='cuda:0')
episode: 190 training return: tensor(-356.9232, device='cuda:0')
episode: 191 training return: tensor(-424.5216, device='cuda:0')
epoch: 48 test_true_pfm: 5465.144444244113 sim_pfm: -284.3477212711102
episode: 192 training return: tensor(-313.1701, device='cuda:0')
episode: 193 training return: tensor(-361.0479, device='cuda:0')
episode: 194 training return: tensor(-319.0859, device='cuda:0')
episode: 195 training return: tensor(-432.0990, device='cuda:0')
epoch: 49 test_true_pfm: 5363.900265554391 sim_pfm: -237.1016931151098
episode: 196 training return: tensor(-292.3557, device='cuda:0')
episode: 197 training return: tensor(-278.0973, device='cuda:0')
episode: 198 training return: tensor(-407.6899, device='cuda:0')
episode: 199 training return: tensor(-536.5907, device='cuda:0')
epoch: 50 test_true_pfm: 5462.964802515683 sim_pfm: -337.08027431500767
episode: 200 training return: tensor(-434.3826, device='cuda:0')
episode: 201 training return: tensor(-379.8165, device='cuda:0')
episode: 202 training return: tensor(-348.7383, device='cuda:0')
episode: 203 training return: tensor(-361.7271, device='cuda:0')
epoch: 51 test_true_pfm: 5731.798772691422 sim_pfm: -176.08868096064543
episode: 204 training return: tensor(-313.6948, device='cuda:0')
episode: 205 training return: tensor(-369.5633, device='cuda:0')
episode: 206 training return: tensor(-370.6950, device='cuda:0')
episode: 207 training return: tensor(-359.1783, device='cuda:0')
epoch: 52 test_true_pfm: 5599.261731471875 sim_pfm: -302.5423578136445
episode: 208 training return: tensor(-382.1018, device='cuda:0')
episode: 209 training return: tensor(-388.9386, device='cuda:0')
episode: 210 training return: tensor(-271.8059, device='cuda:0')
episode: 211 training return: tensor(-280.8009, device='cuda:0')
epoch: 53 test_true_pfm: 5395.521856145747 sim_pfm: -249.4007880757175
episode: 212 training return: tensor(-421.6147, device='cuda:0')
episode: 213 training return: tensor(-361.5224, device='cuda:0')
episode: 214 training return: tensor(-324.6808, device='cuda:0')
episode: 215 training return: tensor(-437.2949, device='cuda:0')
epoch: 54 test_true_pfm: 5512.646320091141 sim_pfm: -233.72425842230828
episode: 216 training return: tensor(-334.4319, device='cuda:0')
episode: 217 training return: tensor(-400.7738, device='cuda:0')
episode: 218 training return: tensor(-330.3023, device='cuda:0')
episode: 219 training return: tensor(-296.5775, device='cuda:0')
epoch: 55 test_true_pfm: 5574.04298288258 sim_pfm: -282.4709697952494
episode: 220 training return: tensor(-406.9995, device='cuda:0')
episode: 221 training return: tensor(-380.7794, device='cuda:0')
episode: 222 training return: tensor(-319.3860, device='cuda:0')
episode: 223 training return: tensor(-387.3737, device='cuda:0')
epoch: 56 test_true_pfm: 5533.814290604571 sim_pfm: -456.5280915172577
episode: 224 training return: tensor(-359.9221, device='cuda:0')
episode: 225 training return: tensor(-435.0721, device='cuda:0')
episode: 226 training return: tensor(-262.9095, device='cuda:0')
episode: 227 training return: tensor(-361.5149, device='cuda:0')
epoch: 57 test_true_pfm: 3836.466752891372 sim_pfm: -245.97001442558636
episode: 228 training return: tensor(-358.7121, device='cuda:0')
episode: 229 training return: tensor(-282.5956, device='cuda:0')
episode: 230 training return: tensor(-333.1057, device='cuda:0')
episode: 231 training return: tensor(-335.4811, device='cuda:0')
epoch: 58 test_true_pfm: 5566.8157974820715 sim_pfm: -351.5201758711967
episode: 232 training return: tensor(-271.7958, device='cuda:0')
episode: 233 training return: tensor(-308.5480, device='cuda:0')
episode: 234 training return: tensor(-284.1479, device='cuda:0')
episode: 235 training return: tensor(-578.9552, device='cuda:0')
epoch: 59 test_true_pfm: 5439.530752017089 sim_pfm: -274.16734990394133
episode: 236 training return: tensor(-297.9834, device='cuda:0')
episode: 237 training return: tensor(-361.9071, device='cuda:0')
episode: 238 training return: tensor(-454.2194, device='cuda:0')
episode: 239 training return: tensor(-418.3486, device='cuda:0')
epoch: 60 test_true_pfm: 5427.229858206289 sim_pfm: -285.45398500326945
episode: 240 training return: tensor(-316.7995, device='cuda:0')
episode: 241 training return: tensor(-346.4897, device='cuda:0')
episode: 242 training return: tensor(-377.8504, device='cuda:0')
episode: 243 training return: tensor(-257.6908, device='cuda:0')
epoch: 61 test_true_pfm: 5555.026106141995 sim_pfm: -207.69404769113558
episode: 244 training return: tensor(-776.1606, device='cuda:0')
episode: 245 training return: tensor(-369.0842, device='cuda:0')
episode: 246 training return: tensor(-315.2794, device='cuda:0')
episode: 247 training return: tensor(-294.4093, device='cuda:0')
epoch: 62 test_true_pfm: 5575.398491623598 sim_pfm: -310.67407872969244
episode: 248 training return: tensor(-722.0336, device='cuda:0')
episode: 249 training return: tensor(-341.6265, device='cuda:0')
episode: 250 training return: tensor(-286.8429, device='cuda:0')
episode: 251 training return: tensor(-289.5437, device='cuda:0')
epoch: 63 test_true_pfm: 5525.698465981492 sim_pfm: -223.87865321831973
episode: 252 training return: tensor(-370.2726, device='cuda:0')
episode: 253 training return: tensor(-389.8536, device='cuda:0')
episode: 254 training return: tensor(-388.4160, device='cuda:0')
episode: 255 training return: tensor(-338.4716, device='cuda:0')
epoch: 64 test_true_pfm: 5527.9173249956 sim_pfm: -247.4938326947255
episode: 256 training return: tensor(-348.8083, device='cuda:0')
episode: 257 training return: tensor(-459.0101, device='cuda:0')
episode: 258 training return: tensor(-306.8872, device='cuda:0')
episode: 259 training return: tensor(-364.8590, device='cuda:0')
epoch: 65 test_true_pfm: 5496.34703421506 sim_pfm: -221.2263064282597
episode: 260 training return: tensor(-323.1949, device='cuda:0')
episode: 261 training return: tensor(-346.0464, device='cuda:0')
episode: 262 training return: tensor(-281.0418, device='cuda:0')
episode: 263 training return: tensor(-327.2019, device='cuda:0')
epoch: 66 test_true_pfm: 5494.493159105602 sim_pfm: -173.20487535597445
episode: 264 training return: tensor(-276.1527, device='cuda:0')
episode: 265 training return: tensor(-457.4025, device='cuda:0')
episode: 266 training return: tensor(-369.2648, device='cuda:0')
episode: 267 training return: tensor(-320.8759, device='cuda:0')
epoch: 67 test_true_pfm: 5448.158333615564 sim_pfm: -248.29660065971743
episode: 268 training return: tensor(-338.0969, device='cuda:0')
episode: 269 training return: tensor(-384.1884, device='cuda:0')
episode: 270 training return: tensor(-290.7559, device='cuda:0')
episode: 271 training return: tensor(-318.6563, device='cuda:0')
epoch: 68 test_true_pfm: 5768.116556715501 sim_pfm: -295.8752574058017
episode: 272 training return: tensor(-338.2629, device='cuda:0')
episode: 273 training return: tensor(-263.0426, device='cuda:0')
episode: 274 training return: tensor(-332.9742, device='cuda:0')
episode: 275 training return: tensor(-331.4005, device='cuda:0')
epoch: 69 test_true_pfm: 5775.917071408226 sim_pfm: -231.72994641754
episode: 276 training return: tensor(-400.2917, device='cuda:0')
episode: 277 training return: tensor(-281.6330, device='cuda:0')
episode: 278 training return: tensor(-306.9169, device='cuda:0')
episode: 279 training return: tensor(-348.4701, device='cuda:0')
epoch: 70 test_true_pfm: 5509.608039541144 sim_pfm: -259.8984118795391
episode: 280 training return: tensor(-245.2516, device='cuda:0')
episode: 281 training return: tensor(-239.0064, device='cuda:0')
episode: 282 training return: tensor(-291.7201, device='cuda:0')
episode: 283 training return: tensor(-296.2846, device='cuda:0')
epoch: 71 test_true_pfm: 5520.334898080277 sim_pfm: -273.1313012226213
episode: 284 training return: tensor(-246.7324, device='cuda:0')
episode: 285 training return: tensor(-309.2824, device='cuda:0')
episode: 286 training return: tensor(-202.0088, device='cuda:0')
episode: 287 training return: tensor(-357.8701, device='cuda:0')
epoch: 72 test_true_pfm: 5442.22827438782 sim_pfm: -377.9852432894598
episode: 288 training return: tensor(-271.1002, device='cuda:0')
episode: 289 training return: tensor(-291.1333, device='cuda:0')
episode: 290 training return: tensor(-295.7141, device='cuda:0')
episode: 291 training return: tensor(-399.1612, device='cuda:0')
epoch: 73 test_true_pfm: 5293.1234127553 sim_pfm: -254.51921953747902
episode: 292 training return: tensor(-442.4345, device='cuda:0')
episode: 293 training return: tensor(-284.4124, device='cuda:0')
episode: 294 training return: tensor(-401.1090, device='cuda:0')
episode: 295 training return: tensor(-333.5437, device='cuda:0')
epoch: 74 test_true_pfm: 5365.343331540378 sim_pfm: -223.56049765156544
episode: 296 training return: tensor(-403.8789, device='cuda:0')
episode: 297 training return: tensor(-142.2938, device='cuda:0')
episode: 298 training return: tensor(-343.2266, device='cuda:0')
episode: 299 training return: tensor(-187.0266, device='cuda:0')
epoch: 75 test_true_pfm: 6022.318645239985 sim_pfm: -183.7010296363636
episode: 300 training return: tensor(-323.5430, device='cuda:0')
episode: 301 training return: tensor(-295.9366, device='cuda:0')
episode: 302 training return: tensor(-318.3209, device='cuda:0')
episode: 303 training return: tensor(-385.6282, device='cuda:0')
epoch: 76 test_true_pfm: 5518.794869166876 sim_pfm: -229.46138613836956
episode: 304 training return: tensor(-316.8269, device='cuda:0')
episode: 305 training return: tensor(-303.9567, device='cuda:0')
episode: 306 training return: tensor(-366.4014, device='cuda:0')
episode: 307 training return: tensor(-233.2173, device='cuda:0')
epoch: 77 test_true_pfm: 5489.583987954745 sim_pfm: -110.29535508030676
episode: 308 training return: tensor(-366.1383, device='cuda:0')
episode: 309 training return: tensor(-279.9497, device='cuda:0')
episode: 310 training return: tensor(-417.3121, device='cuda:0')
episode: 311 training return: tensor(-313.5186, device='cuda:0')
epoch: 78 test_true_pfm: 5486.269650306612 sim_pfm: -254.82291547457376
episode: 312 training return: tensor(-293.0948, device='cuda:0')
episode: 313 training return: tensor(-340.9095, device='cuda:0')
episode: 314 training return: tensor(-179.4218, device='cuda:0')
episode: 315 training return: tensor(-137.3611, device='cuda:0')
epoch: 79 test_true_pfm: 5508.843878832042 sim_pfm: -177.333961517259
episode: 316 training return: tensor(-362.5379, device='cuda:0')
episode: 317 training return: tensor(-292.4510, device='cuda:0')
episode: 318 training return: tensor(-440.2284, device='cuda:0')
episode: 319 training return: tensor(-389.6994, device='cuda:0')
epoch: 80 test_true_pfm: 5697.501464389047 sim_pfm: -177.9330209993253
episode: 320 training return: tensor(-305.7698, device='cuda:0')
episode: 321 training return: tensor(-257.4590, device='cuda:0')
episode: 322 training return: tensor(-371.8951, device='cuda:0')
episode: 323 training return: tensor(-266.8383, device='cuda:0')
epoch: 81 test_true_pfm: 5399.785245352403 sim_pfm: -345.5039975505594
episode: 324 training return: tensor(-294.9921, device='cuda:0')
episode: 325 training return: tensor(-332.3019, device='cuda:0')
episode: 326 training return: tensor(-356.0605, device='cuda:0')
episode: 327 training return: tensor(-242.6584, device='cuda:0')
epoch: 82 test_true_pfm: 5516.812901866845 sim_pfm: -263.8508103133645
episode: 328 training return: tensor(-306.6508, device='cuda:0')
episode: 329 training return: tensor(-351.8971, device='cuda:0')
episode: 330 training return: tensor(-288.4861, device='cuda:0')
episode: 331 training return: tensor(-389.1700, device='cuda:0')
epoch: 83 test_true_pfm: 5437.059413295997 sim_pfm: -270.19384387434303
episode: 332 training return: tensor(-315.1188, device='cuda:0')
episode: 333 training return: tensor(-382.7443, device='cuda:0')
episode: 334 training return: tensor(-268.3994, device='cuda:0')
episode: 335 training return: tensor(-299.3515, device='cuda:0')
epoch: 84 test_true_pfm: 5469.048282984313 sim_pfm: -183.60783927134858
episode: 336 training return: tensor(-301.0455, device='cuda:0')
episode: 337 training return: tensor(-310.7914, device='cuda:0')
episode: 338 training return: tensor(-343.6453, device='cuda:0')
episode: 339 training return: tensor(-287.0317, device='cuda:0')
epoch: 85 test_true_pfm: 5559.637755062681 sim_pfm: -168.36031236490817
episode: 340 training return: tensor(-243.0022, device='cuda:0')
episode: 341 training return: tensor(-386.6840, device='cuda:0')
episode: 342 training return: tensor(-352.7193, device='cuda:0')
episode: 343 training return: tensor(-344.7515, device='cuda:0')
epoch: 86 test_true_pfm: 5482.622858877422 sim_pfm: -170.98470839017924
episode: 344 training return: tensor(-284.7292, device='cuda:0')
episode: 345 training return: tensor(-328.1238, device='cuda:0')
episode: 346 training return: tensor(-359.4185, device='cuda:0')
episode: 347 training return: tensor(-290.5188, device='cuda:0')
epoch: 87 test_true_pfm: 5508.723360092091 sim_pfm: -276.0733086438947
episode: 348 training return: tensor(-351.2593, device='cuda:0')
episode: 349 training return: tensor(-363.5539, device='cuda:0')
episode: 350 training return: tensor(-319.8963, device='cuda:0')
episode: 351 training return: tensor(-241.2878, device='cuda:0')
epoch: 88 test_true_pfm: 5792.3410894907765 sim_pfm: -231.3916564285173
episode: 352 training return: tensor(-300.7397, device='cuda:0')
episode: 353 training return: tensor(-290.9102, device='cuda:0')
episode: 354 training return: tensor(-334.8582, device='cuda:0')
episode: 355 training return: tensor(-352.7871, device='cuda:0')
epoch: 89 test_true_pfm: 5329.679951871574 sim_pfm: -211.58193976260372
episode: 356 training return: tensor(-257.1001, device='cuda:0')
episode: 357 training return: tensor(-280.1820, device='cuda:0')
episode: 358 training return: tensor(-474.3079, device='cuda:0')
episode: 359 training return: tensor(-274.8000, device='cuda:0')
epoch: 90 test_true_pfm: 5355.373484055553 sim_pfm: -199.0595987883086
episode: 360 training return: tensor(-457.8549, device='cuda:0')
episode: 361 training return: tensor(-369.5423, device='cuda:0')
episode: 362 training return: tensor(-251.9916, device='cuda:0')
episode: 363 training return: tensor(-338.5639, device='cuda:0')
epoch: 91 test_true_pfm: 5625.4066368454405 sim_pfm: -243.32991892301166
episode: 364 training return: tensor(-307.9531, device='cuda:0')
episode: 365 training return: tensor(-287.6843, device='cuda:0')
episode: 366 training return: tensor(-350.7351, device='cuda:0')
episode: 367 training return: tensor(-378.1062, device='cuda:0')
epoch: 92 test_true_pfm: 5516.799526856183 sim_pfm: -254.39840853380156
episode: 368 training return: tensor(-265.2280, device='cuda:0')
episode: 369 training return: tensor(-282.6706, device='cuda:0')
episode: 370 training return: tensor(-299.1643, device='cuda:0')
episode: 371 training return: tensor(-315.6084, device='cuda:0')
epoch: 93 test_true_pfm: 5504.247118068449 sim_pfm: -235.03793895424073
episode: 372 training return: tensor(-382.1863, device='cuda:0')
episode: 373 training return: tensor(-317.2512, device='cuda:0')
episode: 374 training return: tensor(-405.2503, device='cuda:0')
episode: 375 training return: tensor(-335.6969, device='cuda:0')
epoch: 94 test_true_pfm: 6129.262604830835 sim_pfm: -266.2021371181472
episode: 376 training return: tensor(-216.8571, device='cuda:0')
episode: 377 training return: tensor(-348.1822, device='cuda:0')
episode: 378 training return: tensor(-354.2096, device='cuda:0')
episode: 379 training return: tensor(-271.4853, device='cuda:0')
epoch: 95 test_true_pfm: 5621.376478968929 sim_pfm: -225.6786108679565
episode: 380 training return: tensor(-362.5671, device='cuda:0')
episode: 381 training return: tensor(-301.2808, device='cuda:0')
episode: 382 training return: tensor(-187.7015, device='cuda:0')
episode: 383 training return: tensor(-420.5602, device='cuda:0')
epoch: 96 test_true_pfm: 5624.169336870694 sim_pfm: -234.05537730955015
episode: 384 training return: tensor(-415.3154, device='cuda:0')
episode: 385 training return: tensor(-277.3587, device='cuda:0')
episode: 386 training return: tensor(-268.6643, device='cuda:0')
episode: 387 training return: tensor(-320.2274, device='cuda:0')
epoch: 97 test_true_pfm: 5559.232378224755 sim_pfm: -214.85391951541533
episode: 388 training return: tensor(-281.9430, device='cuda:0')
episode: 389 training return: tensor(-309.4503, device='cuda:0')
episode: 390 training return: tensor(-338.4597, device='cuda:0')
episode: 391 training return: tensor(-205.8847, device='cuda:0')
epoch: 98 test_true_pfm: 5632.460758760321 sim_pfm: -218.9815879286325
episode: 392 training return: tensor(-265.9423, device='cuda:0')
episode: 393 training return: tensor(-260.5376, device='cuda:0')
episode: 394 training return: tensor(-332.8168, device='cuda:0')
episode: 395 training return: tensor(-276.7387, device='cuda:0')
epoch: 99 test_true_pfm: 5458.78873486998 sim_pfm: -238.1586459291187
episode: 396 training return: tensor(-364.0046, device='cuda:0')
episode: 397 training return: tensor(-286.7285, device='cuda:0')
episode: 398 training return: tensor(-291.2418, device='cuda:0')
episode: 399 training return: tensor(-290.2457, device='cuda:0')
epoch: 100 test_true_pfm: 5765.33435921861 sim_pfm: -198.55742372722793
episode: 400 training return: tensor(-314.2586, device='cuda:0')
episode: 401 training return: tensor(-309.7344, device='cuda:0')
episode: 402 training return: tensor(-294.9606, device='cuda:0')
episode: 403 training return: tensor(-294.8003, device='cuda:0')
epoch: 101 test_true_pfm: 5603.427316430487 sim_pfm: -128.65628073126814
episode: 404 training return: tensor(-249.2291, device='cuda:0')
episode: 405 training return: tensor(-372.0638, device='cuda:0')
episode: 406 training return: tensor(-360.6803, device='cuda:0')
episode: 407 training return: tensor(-311.4647, device='cuda:0')
epoch: 102 test_true_pfm: 5631.786954444841 sim_pfm: -272.7269685390832
episode: 408 training return: tensor(-346.0678, device='cuda:0')
episode: 409 training return: tensor(-224.1484, device='cuda:0')
episode: 410 training return: tensor(-269.9327, device='cuda:0')
episode: 411 training return: tensor(-331.7823, device='cuda:0')
epoch: 103 test_true_pfm: 5863.894884018057 sim_pfm: -193.39597113393634
episode: 412 training return: tensor(-280.4198, device='cuda:0')
episode: 413 training return: tensor(-301.3181, device='cuda:0')
episode: 414 training return: tensor(-318.2111, device='cuda:0')
episode: 415 training return: tensor(-212.0927, device='cuda:0')
epoch: 104 test_true_pfm: 5610.897803367791 sim_pfm: -183.20363722663024
episode: 416 training return: tensor(-271.2964, device='cuda:0')
episode: 417 training return: tensor(-278.2738, device='cuda:0')
episode: 418 training return: tensor(-325.6353, device='cuda:0')
episode: 419 training return: tensor(-272.7780, device='cuda:0')
epoch: 105 test_true_pfm: 5635.55755514733 sim_pfm: -217.25687805758207
episode: 420 training return: tensor(-258.7334, device='cuda:0')
episode: 421 training return: tensor(-265.9359, device='cuda:0')
episode: 422 training return: tensor(-266.6686, device='cuda:0')
episode: 423 training return: tensor(-296.6223, device='cuda:0')
epoch: 106 test_true_pfm: 5584.033763222203 sim_pfm: -165.37982032442233
episode: 424 training return: tensor(-328.6392, device='cuda:0')
episode: 425 training return: tensor(-190.4357, device='cuda:0')
episode: 426 training return: tensor(-257.0428, device='cuda:0')
episode: 427 training return: tensor(-169.6762, device='cuda:0')
epoch: 107 test_true_pfm: 5621.898687743987 sim_pfm: -260.30601128274185
episode: 428 training return: tensor(-230.8069, device='cuda:0')
episode: 429 training return: tensor(-237.2837, device='cuda:0')
episode: 430 training return: tensor(-391.8480, device='cuda:0')
episode: 431 training return: tensor(-298.4353, device='cuda:0')
epoch: 108 test_true_pfm: 5647.454991705344 sim_pfm: -194.86619248819383
episode: 432 training return: tensor(-280.0743, device='cuda:0')
episode: 433 training return: tensor(-264.6325, device='cuda:0')
episode: 434 training return: tensor(-232.6212, device='cuda:0')
episode: 435 training return: tensor(-223.7255, device='cuda:0')
epoch: 109 test_true_pfm: 5042.497657838346 sim_pfm: -36.56121001590509
episode: 436 training return: tensor(-23.6865, device='cuda:0')
episode: 437 training return: tensor(-244.5214, device='cuda:0')
episode: 438 training return: tensor(-353.4975, device='cuda:0')
episode: 439 training return: tensor(-271.3770, device='cuda:0')
epoch: 110 test_true_pfm: 5734.486587491631 sim_pfm: -215.20247306088763
episode: 440 training return: tensor(-342.2846, device='cuda:0')
episode: 441 training return: tensor(-287.6451, device='cuda:0')
episode: 442 training return: tensor(-337.4094, device='cuda:0')
episode: 443 training return: tensor(-131.6024, device='cuda:0')
epoch: 111 test_true_pfm: 6461.718797274675 sim_pfm: -232.33448823680132
episode: 444 training return: tensor(-299.1246, device='cuda:0')
episode: 445 training return: tensor(-240.4513, device='cuda:0')
episode: 446 training return: tensor(-345.5753, device='cuda:0')
episode: 447 training return: tensor(-871.5015, device='cuda:0')
epoch: 112 test_true_pfm: 5538.2089695270415 sim_pfm: -264.34647934110643
episode: 448 training return: tensor(-277.8790, device='cuda:0')
episode: 449 training return: tensor(-356.8022, device='cuda:0')
episode: 450 training return: tensor(-332.1730, device='cuda:0')
episode: 451 training return: tensor(-239.6731, device='cuda:0')
epoch: 113 test_true_pfm: 5556.8686485767785 sim_pfm: -202.08755908214758
episode: 452 training return: tensor(-286.6635, device='cuda:0')
episode: 453 training return: tensor(-416.0999, device='cuda:0')
episode: 454 training return: tensor(-329.1863, device='cuda:0')
episode: 455 training return: tensor(-369.5763, device='cuda:0')
epoch: 114 test_true_pfm: 5633.836373391695 sim_pfm: -242.1085580344467
episode: 456 training return: tensor(-341.8830, device='cuda:0')
episode: 457 training return: tensor(-332.6916, device='cuda:0')
episode: 458 training return: tensor(-240.7081, device='cuda:0')
episode: 459 training return: tensor(-307.7242, device='cuda:0')
epoch: 115 test_true_pfm: 5550.462422969417 sim_pfm: -207.29098655389194
episode: 460 training return: tensor(-305.4669, device='cuda:0')
episode: 461 training return: tensor(-325.9567, device='cuda:0')
episode: 462 training return: tensor(-157.6955, device='cuda:0')
episode: 463 training return: tensor(-402.7770, device='cuda:0')
epoch: 116 test_true_pfm: 5671.041355528411 sim_pfm: -156.63085248110778
episode: 464 training return: tensor(-231.7377, device='cuda:0')
episode: 465 training return: tensor(-201.9848, device='cuda:0')
episode: 466 training return: tensor(-334.3187, device='cuda:0')
episode: 467 training return: tensor(-302.6633, device='cuda:0')
epoch: 117 test_true_pfm: 5574.196475512664 sim_pfm: -156.32145231317068
episode: 468 training return: tensor(-299.8376, device='cuda:0')
episode: 469 training return: tensor(-199.4878, device='cuda:0')
episode: 470 training return: tensor(-295.4490, device='cuda:0')
episode: 471 training return: tensor(-249.9340, device='cuda:0')
epoch: 118 test_true_pfm: 5521.947834770453 sim_pfm: -250.42338887701044
episode: 472 training return: tensor(-442.2868, device='cuda:0')
episode: 473 training return: tensor(-353.7091, device='cuda:0')
episode: 474 training return: tensor(-309.1600, device='cuda:0')
episode: 475 training return: tensor(-289.3401, device='cuda:0')
epoch: 119 test_true_pfm: 5636.486731554552 sim_pfm: -163.61110838926592
episode: 476 training return: tensor(-399.0387, device='cuda:0')
episode: 477 training return: tensor(-219.0725, device='cuda:0')
episode: 478 training return: tensor(-467.5828, device='cuda:0')
episode: 479 training return: tensor(-263.0049, device='cuda:0')
epoch: 120 test_true_pfm: 5628.004750876717 sim_pfm: -263.8834720197483
episode: 480 training return: tensor(-308.4987, device='cuda:0')
episode: 481 training return: tensor(-334.0448, device='cuda:0')
episode: 482 training return: tensor(-283.8209, device='cuda:0')
episode: 483 training return: tensor(-274.2561, device='cuda:0')
epoch: 121 test_true_pfm: 5417.808185973537 sim_pfm: -183.37766269321824
episode: 484 training return: tensor(-272.3373, device='cuda:0')
episode: 485 training return: tensor(-262.1533, device='cuda:0')
episode: 486 training return: tensor(-438.1442, device='cuda:0')
episode: 487 training return: tensor(-314.0752, device='cuda:0')
epoch: 122 test_true_pfm: 5996.128104774211 sim_pfm: -157.7320982662301
episode: 488 training return: tensor(-342.2186, device='cuda:0')
episode: 489 training return: tensor(-269.3153, device='cuda:0')
episode: 490 training return: tensor(-340.7462, device='cuda:0')
episode: 491 training return: tensor(-329.3728, device='cuda:0')
epoch: 123 test_true_pfm: 5562.367882545208 sim_pfm: -230.97725821725908
episode: 492 training return: tensor(-373.5580, device='cuda:0')
episode: 493 training return: tensor(-238.3291, device='cuda:0')
episode: 494 training return: tensor(-284.0640, device='cuda:0')
episode: 495 training return: tensor(-316.2307, device='cuda:0')
epoch: 124 test_true_pfm: 5482.700775410672 sim_pfm: -140.34825678605316
episode: 496 training return: tensor(-214.3029, device='cuda:0')
episode: 497 training return: tensor(-255.1044, device='cuda:0')
episode: 498 training return: tensor(-282.2045, device='cuda:0')
episode: 499 training return: tensor(-362.1372, device='cuda:0')
epoch: 125 test_true_pfm: 5677.136527650721 sim_pfm: -253.85313234262867
episode: 500 training return: tensor(-293.7356, device='cuda:0')
episode: 501 training return: tensor(-318.9875, device='cuda:0')
episode: 502 training return: tensor(-355.7325, device='cuda:0')
episode: 503 training return: tensor(-431.1997, device='cuda:0')
epoch: 126 test_true_pfm: 5557.29375975161 sim_pfm: -155.57593116837475
episode: 504 training return: tensor(-311.1866, device='cuda:0')
episode: 505 training return: tensor(-353.2791, device='cuda:0')
episode: 506 training return: tensor(-325.9653, device='cuda:0')
episode: 507 training return: tensor(-372.8294, device='cuda:0')
epoch: 127 test_true_pfm: 6077.338678465846 sim_pfm: -207.09553770861626
episode: 508 training return: tensor(-354.5303, device='cuda:0')
episode: 509 training return: tensor(-262.5345, device='cuda:0')
episode: 510 training return: tensor(-212.3987, device='cuda:0')
episode: 511 training return: tensor(-293.2704, device='cuda:0')
epoch: 128 test_true_pfm: 5654.127147934129 sim_pfm: -205.48914345928156
episode: 512 training return: tensor(-284.4636, device='cuda:0')
episode: 513 training return: tensor(-323.5741, device='cuda:0')
episode: 514 training return: tensor(-314.0385, device='cuda:0')
episode: 515 training return: tensor(-310.7203, device='cuda:0')
epoch: 129 test_true_pfm: 5817.431650027036 sim_pfm: -171.52880395393004
episode: 516 training return: tensor(-334.2864, device='cuda:0')
episode: 517 training return: tensor(-225.0045, device='cuda:0')
episode: 518 training return: tensor(-284.5836, device='cuda:0')
episode: 519 training return: tensor(-371.3228, device='cuda:0')
epoch: 130 test_true_pfm: 5526.166704145614 sim_pfm: -232.8471008791239
episode: 520 training return: tensor(-190.2611, device='cuda:0')
episode: 521 training return: tensor(-235.0247, device='cuda:0')
episode: 522 training return: tensor(-297.5542, device='cuda:0')
episode: 523 training return: tensor(-253.6537, device='cuda:0')
epoch: 131 test_true_pfm: 6225.947764341146 sim_pfm: -148.2516896102364
episode: 524 training return: tensor(-333.1667, device='cuda:0')
episode: 525 training return: tensor(-334.5694, device='cuda:0')
episode: 526 training return: tensor(-266.5298, device='cuda:0')
episode: 527 training return: tensor(-304.8756, device='cuda:0')
epoch: 132 test_true_pfm: 5666.260073018381 sim_pfm: -162.03247366416812
episode: 528 training return: tensor(-243.3967, device='cuda:0')
episode: 529 training return: tensor(-270.7316, device='cuda:0')
episode: 530 training return: tensor(-486.8047, device='cuda:0')
episode: 531 training return: tensor(-304.9374, device='cuda:0')
epoch: 133 test_true_pfm: 5613.530864906585 sim_pfm: -188.837370932044
episode: 532 training return: tensor(-699.7969, device='cuda:0')
episode: 533 training return: tensor(-320.2834, device='cuda:0')
episode: 534 training return: tensor(-205.6987, device='cuda:0')
episode: 535 training return: tensor(-277.5598, device='cuda:0')
epoch: 134 test_true_pfm: 5541.911459729451 sim_pfm: -193.62962307650983
episode: 536 training return: tensor(-225.2136, device='cuda:0')
episode: 537 training return: tensor(-235.0150, device='cuda:0')
episode: 538 training return: tensor(-249.1451, device='cuda:0')
episode: 539 training return: tensor(-290.9794, device='cuda:0')
epoch: 135 test_true_pfm: 5717.780291717961 sim_pfm: -309.2474553247157
episode: 540 training return: tensor(-394.6937, device='cuda:0')
episode: 541 training return: tensor(-287.1177, device='cuda:0')
episode: 542 training return: tensor(-319.4852, device='cuda:0')
episode: 543 training return: tensor(-224.9226, device='cuda:0')
epoch: 136 test_true_pfm: 5605.19507649227 sim_pfm: -130.34696003859668
episode: 544 training return: tensor(-278.4963, device='cuda:0')
episode: 545 training return: tensor(-327.0997, device='cuda:0')
episode: 546 training return: tensor(-311.2569, device='cuda:0')
episode: 547 training return: tensor(-396.1790, device='cuda:0')
epoch: 137 test_true_pfm: 5598.183384219047 sim_pfm: -147.5526557401754
episode: 548 training return: tensor(-268.9846, device='cuda:0')
episode: 549 training return: tensor(-188.5507, device='cuda:0')
episode: 550 training return: tensor(-219.2735, device='cuda:0')
episode: 551 training return: tensor(-374.1826, device='cuda:0')
epoch: 138 test_true_pfm: 5952.766880102851 sim_pfm: -168.29044616455212
episode: 552 training return: tensor(-272.3911, device='cuda:0')
episode: 553 training return: tensor(-342.9904, device='cuda:0')
episode: 554 training return: tensor(-263.0328, device='cuda:0')
episode: 555 training return: tensor(-343.2977, device='cuda:0')
epoch: 139 test_true_pfm: 5620.178244237889 sim_pfm: -172.80422116870372
episode: 556 training return: tensor(-225.1754, device='cuda:0')
episode: 557 training return: tensor(-276.4012, device='cuda:0')
episode: 558 training return: tensor(-280.5383, device='cuda:0')
episode: 559 training return: tensor(-220.3288, device='cuda:0')
epoch: 140 test_true_pfm: 5557.887524014484 sim_pfm: -218.48600218751622
episode: 560 training return: tensor(-396.5106, device='cuda:0')
episode: 561 training return: tensor(-355.1624, device='cuda:0')
episode: 562 training return: tensor(-279.3943, device='cuda:0')
episode: 563 training return: tensor(-269.9282, device='cuda:0')
epoch: 141 test_true_pfm: 5634.1524736802085 sim_pfm: -181.17695982917212
episode: 564 training return: tensor(-225.4725, device='cuda:0')
episode: 565 training return: tensor(-248.1955, device='cuda:0')
episode: 566 training return: tensor(-296.3235, device='cuda:0')
episode: 567 training return: tensor(-234.2166, device='cuda:0')
epoch: 142 test_true_pfm: 6193.122632731221 sim_pfm: -222.20728183630854
episode: 568 training return: tensor(-289.3607, device='cuda:0')
episode: 569 training return: tensor(-344.9687, device='cuda:0')
episode: 570 training return: tensor(-326.7003, device='cuda:0')
episode: 571 training return: tensor(-449.5357, device='cuda:0')
epoch: 143 test_true_pfm: 5667.058979502185 sim_pfm: -138.18199373202515
episode: 572 training return: tensor(-236.9702, device='cuda:0')
episode: 573 training return: tensor(-226.9292, device='cuda:0')
episode: 574 training return: tensor(-239.9223, device='cuda:0')
episode: 575 training return: tensor(-307.9052, device='cuda:0')
epoch: 144 test_true_pfm: 5667.7501624294855 sim_pfm: -262.8398929001705
episode: 576 training return: tensor(-293.3967, device='cuda:0')
episode: 577 training return: tensor(-261.3764, device='cuda:0')
episode: 578 training return: tensor(-325.0994, device='cuda:0')
episode: 579 training return: tensor(-311.7470, device='cuda:0')
epoch: 145 test_true_pfm: 5568.289880283231 sim_pfm: -179.12310196777494
episode: 580 training return: tensor(-319.7897, device='cuda:0')
episode: 581 training return: tensor(-320.9365, device='cuda:0')
episode: 582 training return: tensor(-317.0218, device='cuda:0')
episode: 583 training return: tensor(-364.4808, device='cuda:0')
epoch: 146 test_true_pfm: 5667.147958723591 sim_pfm: -156.2436800680977
episode: 584 training return: tensor(-310.1895, device='cuda:0')
episode: 585 training return: tensor(-350.8884, device='cuda:0')
episode: 586 training return: tensor(-327.6172, device='cuda:0')
episode: 587 training return: tensor(-310.3372, device='cuda:0')
epoch: 147 test_true_pfm: 6010.504614747019 sim_pfm: -190.62726429488976
episode: 588 training return: tensor(-264.5497, device='cuda:0')
episode: 589 training return: tensor(-470.1265, device='cuda:0')
episode: 590 training return: tensor(-378.4647, device='cuda:0')
episode: 591 training return: tensor(-347.8397, device='cuda:0')
epoch: 148 test_true_pfm: 5550.690197270873 sim_pfm: -136.94451416777642
episode: 592 training return: tensor(-254.6646, device='cuda:0')
episode: 593 training return: tensor(-288.4115, device='cuda:0')
episode: 594 training return: tensor(-242.5235, device='cuda:0')
episode: 595 training return: tensor(-147.6739, device='cuda:0')
epoch: 149 test_true_pfm: 5869.34960119861 sim_pfm: -72.67763733530107
episode: 596 training return: tensor(-286.3633, device='cuda:0')
episode: 597 training return: tensor(-343.0894, device='cuda:0')
episode: 598 training return: tensor(-265.7416, device='cuda:0')
episode: 599 training return: tensor(-265.3361, device='cuda:0')
epoch: 150 test_true_pfm: 6010.690579843435 sim_pfm: -212.489136852324
