['--learn', 'behavior', '--env', 'HalfCheetah-v2', '--traj', 'expert', '--seed', '5']
epoch: 0 training_loss 0.35774418860673907 test_loss: 0.2495728015899658
epoch: 1 training_loss 0.23649517506361006 test_loss: 0.21809027194976807
epoch: 2 training_loss 0.21167637750506402 test_loss: 0.16774728298187255
epoch: 3 training_loss 0.18649505026638508 test_loss: 0.15922267436981202
epoch: 4 training_loss 0.16950811490416526 test_loss: 0.1728236436843872
epoch: 5 training_loss 0.15122061934322117 test_loss: 0.17087432146072387
epoch: 6 training_loss 0.14554586537182332 test_loss: 0.15890835523605346
epoch: 7 training_loss 0.14329434994608164 test_loss: 0.1641430139541626
epoch: 8 training_loss 0.13941191405057907 test_loss: 0.1525670886039734
epoch: 9 training_loss 0.1417762641608715 test_loss: 0.15091238021850586
epoch: 10 training_loss 0.1322092593833804 test_loss: 0.12341787815093994
epoch: 11 training_loss 0.13433817707002163 test_loss: 0.12714511156082153
epoch: 12 training_loss 0.13293135188519956 test_loss: 0.15238882303237916
epoch: 13 training_loss 0.1352251198887825 test_loss: 0.11818530559539794
epoch: 14 training_loss 0.12573005229234696 test_loss: 0.12427707910537719
epoch: 15 training_loss 0.1275984338670969 test_loss: 0.12086060047149658
epoch: 16 training_loss 0.13336405601352452 test_loss: 0.12197651863098144
epoch: 17 training_loss 0.11719072330743074 test_loss: 0.12120670080184937
epoch: 18 training_loss 0.1254177351295948 test_loss: 0.13513610363006592
epoch: 19 training_loss 0.12306290101259947 test_loss: 0.12283778190612793
epoch: 20 training_loss 0.12866425275802612 test_loss: 0.13764071464538574
epoch: 21 training_loss 0.12326874360442161 test_loss: 0.12615548372268676
epoch: 22 training_loss 0.1217132518813014 test_loss: 0.13484833240509034
epoch: 23 training_loss 0.12614844888448715 test_loss: 0.12854723930358886
epoch: 24 training_loss 0.11983790140599013 test_loss: 0.11683263778686523
epoch: 25 training_loss 0.11641623549163342 test_loss: 0.12657879590988158
epoch: 26 training_loss 0.11971001945436001 test_loss: 0.11064531803131103
epoch: 27 training_loss 0.13468511834740637 test_loss: 0.14334179162979127
epoch: 28 training_loss 0.11416774827986956 test_loss: 0.11230840682983398
epoch: 29 training_loss 0.11609011769294739 test_loss: 0.11894766092300416
epoch: 30 training_loss 0.11428573854267597 test_loss: 0.10547176599502564
epoch: 31 training_loss 0.10844197005033493 test_loss: 0.12374212741851806
epoch: 32 training_loss 0.1309681634604931 test_loss: 0.11796866655349732
epoch: 33 training_loss 0.11075821962207556 test_loss: 0.13114147186279296
epoch: 34 training_loss 0.11793787876144052 test_loss: 0.11273990869522095
epoch: 35 training_loss 0.11658714642748237 test_loss: 0.11740506887435913
epoch: 36 training_loss 0.1123491820320487 test_loss: 0.12723124027252197
epoch: 37 training_loss 0.12292958155274392 test_loss: 0.10427895784378052
epoch: 38 training_loss 0.10835261670872569 test_loss: 0.11039015054702758
epoch: 39 training_loss 0.10536086197942496 test_loss: 0.11939783096313476
epoch: 40 training_loss 0.11527176253497601 test_loss: 0.11482717990875244
epoch: 41 training_loss 0.12070681311190129 test_loss: 0.117740797996521
epoch: 42 training_loss 0.11149643518030644 test_loss: 0.12513004541397094
epoch: 43 training_loss 0.11488024059683084 test_loss: 0.11303211450576782
epoch: 44 training_loss 0.11599149737507104 test_loss: 0.12458571195602416
epoch: 45 training_loss 0.11037473233416677 test_loss: 0.12942273616790773
epoch: 46 training_loss 0.11767583820968866 test_loss: 0.11784487962722778
epoch: 47 training_loss 0.10243387080729008 test_loss: 0.12198928594589234
epoch: 48 training_loss 0.10926145363599062 test_loss: 0.11813930273056031
epoch: 49 training_loss 0.10702960088849067 test_loss: 0.11870944499969482
epoch: 50 training_loss 0.11539573892951012 test_loss: 0.12380712032318116
epoch: 51 training_loss 0.11778066024184226 test_loss: 0.11414023637771606
epoch: 52 training_loss 0.1173411339893937 test_loss: 0.11969888210296631
epoch: 53 training_loss 0.10494650302454829 test_loss: 0.09736568331718445
epoch: 54 training_loss 0.11307985339313746 test_loss: 0.12233167886734009
epoch: 55 training_loss 0.11645194053649903 test_loss: 0.1089207410812378
epoch: 56 training_loss 0.11134744826704264 test_loss: 0.10460219383239747
epoch: 57 training_loss 0.11306240040808917 test_loss: 0.13384480476379396
epoch: 58 training_loss 0.11310425568372011 test_loss: 0.0933157742023468
epoch: 59 training_loss 0.1158348998054862 test_loss: 0.12282242774963378
epoch: 60 training_loss 0.11315107369795442 test_loss: 0.12694413661956788
epoch: 61 training_loss 0.11066979769617319 test_loss: 0.11626209020614624
epoch: 62 training_loss 0.11046852611005306 test_loss: 0.11357985734939575
epoch: 63 training_loss 0.1103689395263791 test_loss: 0.10047674179077148
epoch: 64 training_loss 0.10962397951632738 test_loss: 0.1128466248512268
epoch: 65 training_loss 0.10603939775377512 test_loss: 0.11408662796020508
epoch: 66 training_loss 0.11140438422560692 test_loss: 0.10555381774902343
epoch: 67 training_loss 0.11070858288556337 test_loss: 0.11119635105133056
epoch: 68 training_loss 0.1080855138786137 test_loss: 0.10729770660400391
epoch: 69 training_loss 0.11397386841475964 test_loss: 0.13470101356506348
epoch: 70 training_loss 0.10399576032534241 test_loss: 0.10228832960128784
epoch: 71 training_loss 0.11018630089238286 test_loss: 0.08292452692985534
epoch: 72 training_loss 0.10709980513900519 test_loss: 0.12154544591903686
epoch: 73 training_loss 0.1154593551158905 test_loss: 0.10957125425338746
epoch: 74 training_loss 0.10888074155896903 test_loss: 0.10699138641357422
epoch: 75 training_loss 0.10498638052493334 test_loss: 0.1025265097618103
epoch: 76 training_loss 0.11159282501786948 test_loss: 0.11245757341384888
epoch: 77 training_loss 0.10203077662736178 test_loss: 0.11961535215377808
epoch: 78 training_loss 0.1103363773226738 test_loss: 0.11277832984924316
epoch: 79 training_loss 0.10703869542106986 test_loss: 0.11986935138702393
epoch: 80 training_loss 0.11152727134525776 test_loss: 0.10585846900939941
epoch: 81 training_loss 0.10484823334962129 test_loss: 0.10897326469421387
epoch: 82 training_loss 0.11299409348517657 test_loss: 0.10187561511993408
epoch: 83 training_loss 0.11277968302369118 test_loss: 0.10503830909729003
epoch: 84 training_loss 0.10870437333360314 test_loss: 0.11770054101943969
epoch: 85 training_loss 0.10818544205278158 test_loss: 0.10900042057037354
epoch: 86 training_loss 0.10942774221301078 test_loss: 0.12738687992095948
epoch: 87 training_loss 0.11187749041244388 test_loss: 0.11050184965133666
epoch: 88 training_loss 0.11567010700702668 test_loss: 0.11010566949844361
epoch: 89 training_loss 0.10759787444025277 test_loss: 0.10516027212142945
epoch: 90 training_loss 0.10401601385325193 test_loss: 0.09622820019721985
epoch: 91 training_loss 0.11085321702063083 test_loss: 0.11268378496170044
epoch: 92 training_loss 0.10729826517403125 test_loss: 0.1055072546005249
epoch: 93 training_loss 0.11269373077899218 test_loss: 0.11121574640274048
epoch: 94 training_loss 0.10763053551316261 test_loss: 0.11437374353408813
epoch: 95 training_loss 0.10668783359229565 test_loss: 0.10509780645370484
epoch: 96 training_loss 0.10645230624824763 test_loss: 0.11231909990310669
epoch: 97 training_loss 0.11303468376398086 test_loss: 0.14313578605651855
epoch: 98 training_loss 0.1110403037071228 test_loss: 0.12688210010528564
epoch: 99 training_loss 0.10553422547876835 test_loss: 0.09691070318222046
epoch: 100 training_loss 0.10690777651965618 test_loss: 0.12274459600448609
epoch: 101 training_loss 0.1132682766765356 test_loss: 0.10482832193374633
epoch: 102 training_loss 0.11085348106920719 test_loss: 0.10410999059677124
epoch: 103 training_loss 0.10611182518303394 test_loss: 0.12196574211120606
epoch: 104 training_loss 0.10520794637501239 test_loss: 0.10796339511871338
epoch: 105 training_loss 0.11503337204456329 test_loss: 0.11606276035308838
epoch: 106 training_loss 0.10830672893673182 test_loss: 0.10994470119476318
epoch: 107 training_loss 0.10167977942153811 test_loss: 0.1031565546989441
epoch: 108 training_loss 0.10755474843084813 test_loss: 0.09178423285484313
epoch: 109 training_loss 0.1045069520920515 test_loss: 0.09392775297164917
epoch: 110 training_loss 0.10819177519530059 test_loss: 0.1125923752784729
epoch: 111 training_loss 0.11272254634648561 test_loss: 0.11450979709625245
epoch: 112 training_loss 0.1113507403433323 test_loss: 0.13615556955337524
epoch: 113 training_loss 0.10931542983278632 test_loss: 0.10333470106124878
epoch: 114 training_loss 0.10826080868020654 test_loss: 0.11038227081298828
epoch: 115 training_loss 0.1089799078181386 test_loss: 0.10661472082138061
epoch: 116 training_loss 0.10977897331118584 test_loss: 0.1133420705795288
epoch: 117 training_loss 0.108310506939888 test_loss: 0.11363837718963624
epoch: 118 training_loss 0.11623759511858225 test_loss: 0.11082531213760376
epoch: 119 training_loss 0.11217006551101803 test_loss: 0.11020158529281616
epoch: 120 training_loss 0.10414512043818831 test_loss: 0.10797593593597413
epoch: 121 training_loss 0.11075014643371105 test_loss: 0.11989619731903076
epoch: 122 training_loss 0.1012170422822237 test_loss: 0.12847155332565308
epoch: 123 training_loss 0.10835571000352502 test_loss: 0.10959813594818116
epoch: 124 training_loss 0.1161164047382772 test_loss: 0.1056505560874939
epoch: 125 training_loss 0.1003993658721447 test_loss: 0.10133445262908936
epoch: 126 training_loss 0.10904084715992213 test_loss: 0.10318056344985962
epoch: 127 training_loss 0.10425172448158264 test_loss: 0.1165955901145935
epoch: 128 training_loss 0.10133485142141581 test_loss: 0.10667827129364013
epoch: 129 training_loss 0.11203177604824305 test_loss: 0.0923403799533844
epoch: 130 training_loss 0.1057871264219284 test_loss: 0.1124176025390625
epoch: 131 training_loss 0.11239314988255501 test_loss: 0.09943029284477234
epoch: 132 training_loss 0.11177107617259026 test_loss: 0.1042016863822937
epoch: 133 training_loss 0.11152343291789293 test_loss: 0.10846261978149414
epoch: 134 training_loss 0.10333671666681767 test_loss: 0.11326929330825805
epoch: 135 training_loss 0.11375094890594482 test_loss: 0.10940546989440918
epoch: 136 training_loss 0.116590362675488 test_loss: 0.11214237213134766
epoch: 137 training_loss 0.11177640791982413 test_loss: 0.11418206691741943
epoch: 138 training_loss 0.09773771105334163 test_loss: 0.10740773677825928
epoch: 139 training_loss 0.11223369672894477 test_loss: 0.10511128902435303
epoch: 140 training_loss 0.11072766337543725 test_loss: 0.09789363145828248
epoch: 141 training_loss 0.10932895887643099 test_loss: 0.10876640081405639
epoch: 142 training_loss 0.10177294973284007 test_loss: 0.11060642004013062
epoch: 143 training_loss 0.1062159968726337 test_loss: 0.09887781739234924
epoch: 144 training_loss 0.1084003471583128 test_loss: 0.09579834938049317
epoch: 145 training_loss 0.1114598035439849 test_loss: 0.10187398195266724
epoch: 146 training_loss 0.1091651614755392 test_loss: 0.11004657745361328
epoch: 147 training_loss 0.1075022698380053 test_loss: 0.08917101621627807
epoch: 148 training_loss 0.11165413629263639 test_loss: 0.09804940819740296
epoch: 149 training_loss 0.1110978914424777 test_loss: 0.11349403858184814
epoch: 0 training_loss 45.51932569503784 test_loss: 24.15766143798828
epoch: 1 training_loss 20.18445650100708 test_loss: 17.0981201171875
epoch: 2 training_loss 15.929705677032471 test_loss: 14.214202880859375
epoch: 3 training_loss 13.040183734893798 test_loss: 12.482572937011719
epoch: 4 training_loss 11.27379599571228 test_loss: 10.311566925048828
epoch: 5 training_loss 9.942576775550842 test_loss: 9.103633880615234
epoch: 6 training_loss 8.973984961509705 test_loss: 8.542208862304687
epoch: 7 training_loss 8.171621479988097 test_loss: 7.848725891113281
epoch: 8 training_loss 7.559088282585144 test_loss: 7.346952819824219
epoch: 9 training_loss 7.3703034257888795 test_loss: 7.242794036865234
epoch: 10 training_loss 6.9066810178756715 test_loss: 6.5182647705078125
epoch: 11 training_loss 6.486661796569824 test_loss: 6.251792907714844
epoch: 12 training_loss 6.168240661621094 test_loss: 6.2008617401123045
epoch: 13 training_loss 5.942989301681519 test_loss: 6.028839492797852
epoch: 14 training_loss 5.644633555412293 test_loss: 5.71404800415039
epoch: 15 training_loss 5.498308892250061 test_loss: 5.209354019165039
epoch: 16 training_loss 5.299390697479248 test_loss: 5.000213623046875
epoch: 17 training_loss 5.130267753601074 test_loss: 5.098550033569336
epoch: 18 training_loss 4.976731114387512 test_loss: 4.934424591064453
epoch: 19 training_loss 4.813508651256561 test_loss: 4.770914840698242
epoch: 20 training_loss 4.7363978552818295 test_loss: 4.604988098144531
epoch: 21 training_loss 4.430650112628936 test_loss: 4.374074935913086
epoch: 22 training_loss 4.402286171913147 test_loss: 4.318743515014648
epoch: 23 training_loss 4.297621660232544 test_loss: 4.323886489868164
epoch: 24 training_loss 4.311618461608886 test_loss: 4.066889953613281
epoch: 25 training_loss 4.121871073246002 test_loss: 4.320671463012696
epoch: 26 training_loss 4.133426337242127 test_loss: 4.0733489990234375
epoch: 27 training_loss 3.9932059502601622 test_loss: 3.9076923370361327
epoch: 28 training_loss 3.9617600321769713 test_loss: 3.9604518890380858
epoch: 29 training_loss 4.031896324157715 test_loss: 3.818165588378906
epoch: 30 training_loss 3.8067581820487977 test_loss: 3.5425979614257814
epoch: 31 training_loss 3.7851773643493654 test_loss: 3.807353210449219
epoch: 32 training_loss 3.639813027381897 test_loss: 3.6654102325439455
epoch: 33 training_loss 3.704133703708649 test_loss: 3.7599903106689454
epoch: 34 training_loss 3.576627194881439 test_loss: 3.575587844848633
epoch: 35 training_loss 3.6207163238525393 test_loss: 3.508646011352539
epoch: 36 training_loss 3.5411360120773314 test_loss: 3.3985179901123046
epoch: 37 training_loss 3.460346019268036 test_loss: 3.421563720703125
epoch: 38 training_loss 3.3442166209220887 test_loss: 3.349392318725586
epoch: 39 training_loss 3.3306618118286133 test_loss: 3.3602760314941404
epoch: 40 training_loss 3.363216621875763 test_loss: 3.286915588378906
epoch: 41 training_loss 3.2638470101356507 test_loss: 3.3281845092773437
epoch: 42 training_loss 3.254077413082123 test_loss: 3.173690414428711
epoch: 43 training_loss 3.145118787288666 test_loss: 3.187526321411133
epoch: 44 training_loss 3.0781934428215028 test_loss: 3.1470218658447267
epoch: 45 training_loss 3.1190121841430662 test_loss: 3.3114742279052733
epoch: 46 training_loss 3.0132183384895326 test_loss: 3.085136604309082
epoch: 47 training_loss 3.008636939525604 test_loss: 2.9884449005126954
epoch: 48 training_loss 3.0144155621528625 test_loss: 2.8920997619628905
epoch: 49 training_loss 3.0242275953292848 test_loss: 3.056407165527344
epoch: 50 training_loss 2.9511845660209657 test_loss: 2.9302209854125976
epoch: 51 training_loss 2.9317152190208433 test_loss: 3.077133560180664
epoch: 52 training_loss 2.8952511882781984 test_loss: 2.8818492889404297
epoch: 53 training_loss 2.856544351577759 test_loss: 2.9938520431518554
epoch: 54 training_loss 2.8180310773849486 test_loss: 2.826694869995117
epoch: 55 training_loss 2.785528402328491 test_loss: 2.821964073181152
epoch: 56 training_loss 2.757111842632294 test_loss: 2.8503475189208984
epoch: 57 training_loss 2.740500466823578 test_loss: 2.5938899993896483
epoch: 58 training_loss 2.702105751037598 test_loss: 2.685604286193848
epoch: 59 training_loss 2.7207180070877075 test_loss: 2.6799989700317384
epoch: 60 training_loss 2.6844437313079834 test_loss: 2.6367424011230467
epoch: 61 training_loss 2.6254692101478576 test_loss: 2.6028522491455077
epoch: 62 training_loss 2.7078416037559507 test_loss: 2.574702262878418
epoch: 63 training_loss 2.617707312107086 test_loss: 2.5279693603515625
epoch: 64 training_loss 2.5764003133773805 test_loss: 2.643896484375
epoch: 65 training_loss 2.5538403844833373 test_loss: 2.670511817932129
epoch: 66 training_loss 2.5763865160942077 test_loss: 2.503426933288574
epoch: 67 training_loss 2.581217551231384 test_loss: 2.5722396850585936
epoch: 68 training_loss 2.5919049477577207 test_loss: 2.4334442138671877
epoch: 69 training_loss 2.5126389169692995 test_loss: 2.451029968261719
epoch: 70 training_loss 2.5087649619579313 test_loss: 2.4790454864501954
epoch: 71 training_loss 2.514749466180801 test_loss: 2.6176593780517576
epoch: 72 training_loss 2.457850133180618 test_loss: 2.5078048706054688
epoch: 73 training_loss 2.391048539876938 test_loss: 2.7447410583496095
epoch: 74 training_loss 2.4065257120132446 test_loss: 2.194266128540039
epoch: 75 training_loss 2.4431097424030304 test_loss: 2.4906248092651366
epoch: 76 training_loss 2.5512998235225677 test_loss: 2.3092750549316405
epoch: 77 training_loss 2.33262722492218 test_loss: 2.3569158554077148
epoch: 78 training_loss 2.365542825460434 test_loss: 2.3337968826293944
epoch: 79 training_loss 2.31064183473587 test_loss: 2.4879331588745117
epoch: 80 training_loss 2.3343948352336885 test_loss: 2.333736610412598
epoch: 81 training_loss 2.350951402187347 test_loss: 2.3786664962768556
epoch: 82 training_loss 2.264438118934631 test_loss: 2.436899948120117
epoch: 83 training_loss 2.2630734753608706 test_loss: 2.370556640625
epoch: 84 training_loss 2.375366973876953 test_loss: 2.2102001190185545
epoch: 85 training_loss 2.2787766993045806 test_loss: 2.5524234771728516
epoch: 86 training_loss 2.3272876393795014 test_loss: 2.287836265563965
epoch: 87 training_loss 2.261176874637604 test_loss: 2.2557662963867187
epoch: 88 training_loss 2.256129868030548 test_loss: 2.275619697570801
epoch: 89 training_loss 2.257346234321594 test_loss: 2.356118583679199
epoch: 90 training_loss 2.216867280006409 test_loss: 2.1724102020263674
epoch: 91 training_loss 2.171393629312515 test_loss: 2.3771745681762697
epoch: 92 training_loss 2.2550825548171995 test_loss: 2.2175127029418946
epoch: 93 training_loss 2.2190560829639434 test_loss: 2.0243555068969727
epoch: 94 training_loss 2.2107368159294127 test_loss: 2.2576772689819338
epoch: 95 training_loss 2.2038603973388673 test_loss: 2.2139116287231446
epoch: 96 training_loss 2.1762868106365203 test_loss: 2.2778255462646486
epoch: 97 training_loss 2.2029689168930053 test_loss: 2.2529060363769533
epoch: 98 training_loss 2.12542610168457 test_loss: 2.1256147384643556
epoch: 99 training_loss 2.1065895223617552 test_loss: 2.0921606063842773
epoch: 100 training_loss 2.1539469265937807 test_loss: 2.2405218124389648
epoch: 101 training_loss 2.165908229351044 test_loss: 2.080730438232422
epoch: 102 training_loss 2.100886615514755 test_loss: 2.2766210556030275
epoch: 103 training_loss 2.0638157331943514 test_loss: 2.054083251953125
epoch: 104 training_loss 2.095369555950165 test_loss: 2.206377410888672
epoch: 105 training_loss 2.0968460071086885 test_loss: 1.940835952758789
epoch: 106 training_loss 2.0496730363368987 test_loss: 2.080708694458008
epoch: 107 training_loss 2.043650622367859 test_loss: 2.028423309326172
epoch: 108 training_loss 2.0639142072200776 test_loss: 2.1353176116943358
epoch: 109 training_loss 2.091266304254532 test_loss: 2.0634332656860352
epoch: 110 training_loss 2.0127667450904845 test_loss: 2.0158782958984376
epoch: 111 training_loss 2.0043475604057313 test_loss: 1.9735435485839843
epoch: 112 training_loss 2.0240848159790037 test_loss: 1.9766775131225587
epoch: 113 training_loss 2.0477807581424714 test_loss: 2.0171297073364256
epoch: 114 training_loss 2.010822366476059 test_loss: 2.007844161987305
epoch: 115 training_loss 2.0073903357982634 test_loss: 1.9465145111083983
epoch: 116 training_loss 2.0443709897994995 test_loss: 2.018153190612793
epoch: 117 training_loss 2.0504475915431977 test_loss: 1.8665719985961915
epoch: 118 training_loss 1.9829577040672302 test_loss: 1.996966552734375
epoch: 119 training_loss 1.9753052544593812 test_loss: 1.997861099243164
epoch: 120 training_loss 1.9819075572490692 test_loss: 1.9790142059326172
epoch: 121 training_loss 1.9335871410369874 test_loss: 2.0753334045410154
epoch: 122 training_loss 1.998956973552704 test_loss: 2.080348014831543
epoch: 123 training_loss 1.9838703846931458 test_loss: 2.002182388305664
epoch: 124 training_loss 1.9252391409873963 test_loss: 1.9670160293579102
epoch: 125 training_loss 1.915396671295166 test_loss: 1.9649930953979493
epoch: 126 training_loss 1.9198977112770081 test_loss: 2.04178409576416
epoch: 127 training_loss 1.9174008631706239 test_loss: 2.0781150817871095
epoch: 128 training_loss 1.9324887454509736 test_loss: 1.8826932907104492
epoch: 129 training_loss 1.9085359513759612 test_loss: 1.915517807006836
epoch: 130 training_loss 1.896768251657486 test_loss: 1.958761215209961
epoch: 131 training_loss 1.966777777671814 test_loss: 1.9270824432373046
epoch: 132 training_loss 1.8637661278247832 test_loss: 1.9043487548828124
epoch: 133 training_loss 1.8580676925182342 test_loss: 1.943629264831543
epoch: 134 training_loss 1.9117810380458833 test_loss: 1.8598955154418946
epoch: 135 training_loss 1.8702570402622223 test_loss: 2.0299985885620115
epoch: 136 training_loss 1.8832592356204987 test_loss: 1.7864015579223633
epoch: 137 training_loss 1.8911727166175842 test_loss: 1.8315958023071288
epoch: 138 training_loss 1.8881649386882782 test_loss: 1.9592456817626953
epoch: 139 training_loss 1.8447100925445556 test_loss: 1.836905288696289
epoch: 140 training_loss 1.8062477672100068 test_loss: 2.0176244735717774
epoch: 141 training_loss 1.83344451546669 test_loss: 1.8263971328735351
epoch: 142 training_loss 1.859782773256302 test_loss: 1.9906572341918944
epoch: 143 training_loss 1.834317090511322 test_loss: 1.800384521484375
epoch: 144 training_loss 1.8212809324264527 test_loss: 1.9777507781982422
epoch: 145 training_loss 1.8589564621448518 test_loss: 1.837651824951172
epoch: 146 training_loss 1.8363868284225464 test_loss: 1.7768203735351562
epoch: 147 training_loss 1.8097043812274933 test_loss: 1.8743930816650392
epoch: 148 training_loss 1.8268417274951936 test_loss: 1.7718935012817383
epoch: 149 training_loss 1.8049486315250396 test_loss: 1.8219240188598633
8332.684773493975
episode: 0 training return: tensor(-809.8254, device='cuda:0')
episode: 1 training return: tensor(-743.8271, device='cuda:0')
episode: 2 training return: tensor(-911.9673, device='cuda:0')
episode: 3 training return: tensor(-362.0829, device='cuda:0')
epoch: 1 test_true_pfm: 10026.884258344644 sim_pfm: -929.7684553408375
episode: 4 training return: tensor(-476.9449, device='cuda:0')
episode: 5 training return: tensor(-578.4537, device='cuda:0')
episode: 6 training return: tensor(-816.0673, device='cuda:0')
episode: 7 training return: tensor(-996.7462, device='cuda:0')
epoch: 2 test_true_pfm: 7839.343311917688 sim_pfm: -318.4613645237987
episode: 8 training return: tensor(-884.4531, device='cuda:0')
episode: 9 training return: tensor(-388.1623, device='cuda:0')
episode: 10 training return: tensor(-926.1485, device='cuda:0')
episode: 11 training return: tensor(-999.9708, device='cuda:0')
epoch: 3 test_true_pfm: 3352.784700749788 sim_pfm: -482.2138910497791
episode: 12 training return: tensor(-870.4498, device='cuda:0')
episode: 13 training return: tensor(-686.1053, device='cuda:0')
episode: 14 training return: tensor(-496.1813, device='cuda:0')
episode: 15 training return: tensor(-531.3392, device='cuda:0')
epoch: 4 test_true_pfm: 7083.97054220136 sim_pfm: -288.0087834281342
episode: 16 training return: tensor(-816.6107, device='cuda:0')
episode: 17 training return: tensor(-722.8356, device='cuda:0')
episode: 18 training return: tensor(-36.2372, device='cuda:0')
episode: 19 training return: tensor(-894.8630, device='cuda:0')
epoch: 5 test_true_pfm: 7353.573168750783 sim_pfm: -366.0706829857372
episode: 20 training return: tensor(-999.2788, device='cuda:0')
episode: 21 training return: tensor(-902.6441, device='cuda:0')
episode: 22 training return: tensor(-892.4557, device='cuda:0')
episode: 23 training return: tensor(-237.8371, device='cuda:0')
epoch: 6 test_true_pfm: 7449.246286718187 sim_pfm: -324.56556203222135
episode: 24 training return: tensor(-473.9636, device='cuda:0')
episode: 25 training return: tensor(-257.4586, device='cuda:0')
episode: 26 training return: tensor(-647.7095, device='cuda:0')
episode: 27 training return: tensor(-999.9384, device='cuda:0')
epoch: 7 test_true_pfm: 10085.611085570004 sim_pfm: -450.29781573211466
episode: 28 training return: tensor(-559.0071, device='cuda:0')
episode: 29 training return: tensor(-249.4323, device='cuda:0')
episode: 30 training return: tensor(-999.9611, device='cuda:0')
episode: 31 training return: tensor(-382.6026, device='cuda:0')
epoch: 8 test_true_pfm: 6749.24720525794 sim_pfm: -550.9182303481502
episode: 32 training return: tensor(-749.0255, device='cuda:0')
episode: 33 training return: tensor(-946.2377, device='cuda:0')
episode: 34 training return: tensor(-589.6108, device='cuda:0')
episode: 35 training return: tensor(-389.2648, device='cuda:0')
epoch: 9 test_true_pfm: 7402.324717358177 sim_pfm: -169.20514463588674
episode: 36 training return: tensor(-920.9064, device='cuda:0')
episode: 37 training return: tensor(-999.9922, device='cuda:0')
episode: 38 training return: tensor(-441.2124, device='cuda:0')
episode: 39 training return: tensor(-266.8840, device='cuda:0')
epoch: 10 test_true_pfm: 8317.624028884142 sim_pfm: -221.66787141522704
episode: 40 training return: tensor(-316.5428, device='cuda:0')
episode: 41 training return: tensor(-996.8939, device='cuda:0')
episode: 42 training return: tensor(-729.6460, device='cuda:0')
episode: 43 training return: tensor(-142.2811, device='cuda:0')
epoch: 11 test_true_pfm: 1387.3585808339092 sim_pfm: -118.37322331898031
episode: 44 training return: tensor(-402.0790, device='cuda:0')
episode: 45 training return: tensor(-253.0775, device='cuda:0')
episode: 46 training return: tensor(-985.9205, device='cuda:0')
episode: 47 training return: tensor(-875.4327, device='cuda:0')
epoch: 12 test_true_pfm: 8977.925962153799 sim_pfm: -114.64211000451662
episode: 48 training return: tensor(-277.8953, device='cuda:0')
episode: 49 training return: tensor(-793.0504, device='cuda:0')
episode: 50 training return: tensor(-866.9898, device='cuda:0')
episode: 51 training return: tensor(-355.0415, device='cuda:0')
epoch: 13 test_true_pfm: 7508.090030169736 sim_pfm: -480.1907455770124
episode: 52 training return: tensor(-208.5355, device='cuda:0')
episode: 53 training return: tensor(-980.9237, device='cuda:0')
episode: 54 training return: tensor(-240.0021, device='cuda:0')
episode: 55 training return: tensor(-412.4813, device='cuda:0')
epoch: 14 test_true_pfm: 10067.624020163246 sim_pfm: -468.3511504899652
episode: 56 training return: tensor(-997.1061, device='cuda:0')
episode: 57 training return: tensor(23.4480, device='cuda:0')
episode: 58 training return: tensor(-307.4235, device='cuda:0')
episode: 59 training return: tensor(-422.5694, device='cuda:0')
epoch: 15 test_true_pfm: 3230.6483437743905 sim_pfm: -904.3748176911225
episode: 60 training return: tensor(-999.8607, device='cuda:0')
episode: 61 training return: tensor(-918.7515, device='cuda:0')
episode: 62 training return: tensor(-642.6362, device='cuda:0')
episode: 63 training return: tensor(-999.9828, device='cuda:0')
epoch: 16 test_true_pfm: 8325.562798066616 sim_pfm: -44.767635470372625
episode: 64 training return: tensor(-485.0144, device='cuda:0')
episode: 65 training return: tensor(-997.3931, device='cuda:0')
episode: 66 training return: tensor(-999.6686, device='cuda:0')
episode: 67 training return: tensor(-266.8318, device='cuda:0')
epoch: 17 test_true_pfm: 6926.117022313893 sim_pfm: -446.9898356733999
episode: 68 training return: tensor(-446.0866, device='cuda:0')
episode: 69 training return: tensor(-284.2750, device='cuda:0')
episode: 70 training return: tensor(-542.1147, device='cuda:0')
episode: 71 training return: tensor(-726.4199, device='cuda:0')
epoch: 18 test_true_pfm: 6802.437043951274 sim_pfm: -133.4071199852042
episode: 72 training return: tensor(-928.0817, device='cuda:0')
episode: 73 training return: tensor(-402.7455, device='cuda:0')
episode: 74 training return: tensor(-222.3056, device='cuda:0')
episode: 75 training return: tensor(-580.3504, device='cuda:0')
epoch: 19 test_true_pfm: 6228.849331423721 sim_pfm: -150.30416874269335
episode: 76 training return: tensor(-168.8833, device='cuda:0')
episode: 77 training return: tensor(-517.3710, device='cuda:0')
episode: 78 training return: tensor(-640.3079, device='cuda:0')
episode: 79 training return: tensor(-289.8301, device='cuda:0')
epoch: 20 test_true_pfm: 10456.421273317777 sim_pfm: -373.08507773959235
episode: 80 training return: tensor(-999.9925, device='cuda:0')
episode: 81 training return: tensor(-313.4583, device='cuda:0')
episode: 82 training return: tensor(-800.4600, device='cuda:0')
episode: 83 training return: tensor(-735.7238, device='cuda:0')
epoch: 21 test_true_pfm: 10290.710177885783 sim_pfm: -373.09376695600804
episode: 84 training return: tensor(-584.1812, device='cuda:0')
episode: 85 training return: tensor(-205.1068, device='cuda:0')
episode: 86 training return: tensor(-560.2543, device='cuda:0')
episode: 87 training return: tensor(-999.9966, device='cuda:0')
epoch: 22 test_true_pfm: 8488.708551240587 sim_pfm: -148.5264566009088
episode: 88 training return: tensor(-311.3778, device='cuda:0')
episode: 89 training return: tensor(-758.6945, device='cuda:0')
episode: 90 training return: tensor(-395.7869, device='cuda:0')
episode: 91 training return: tensor(-995.2815, device='cuda:0')
epoch: 23 test_true_pfm: 5094.620270934153 sim_pfm: -52.58826728600737
episode: 92 training return: tensor(-501.1839, device='cuda:0')
episode: 93 training return: tensor(-799.0602, device='cuda:0')
episode: 94 training return: tensor(-242.2692, device='cuda:0')
episode: 95 training return: tensor(-707.8720, device='cuda:0')
epoch: 24 test_true_pfm: 3468.1766879567454 sim_pfm: -21.862778862317402
episode: 96 training return: tensor(-999.1132, device='cuda:0')
episode: 97 training return: tensor(-385.8930, device='cuda:0')
episode: 98 training return: tensor(-196.5601, device='cuda:0')
episode: 99 training return: tensor(-358.1874, device='cuda:0')
epoch: 25 test_true_pfm: 7254.564115724854 sim_pfm: -376.16804256947944
episode: 100 training return: tensor(-673.4564, device='cuda:0')
episode: 101 training return: tensor(-238.8478, device='cuda:0')
episode: 102 training return: tensor(-656.8638, device='cuda:0')
episode: 103 training return: tensor(-360.5651, device='cuda:0')
epoch: 26 test_true_pfm: 7880.161762549908 sim_pfm: -146.69605736063872
episode: 104 training return: tensor(-213.1459, device='cuda:0')
episode: 105 training return: tensor(-330.1987, device='cuda:0')
episode: 106 training return: tensor(-799.3682, device='cuda:0')
episode: 107 training return: tensor(-574.7622, device='cuda:0')
epoch: 27 test_true_pfm: 8611.688231271852 sim_pfm: -95.58199256886535
episode: 108 training return: tensor(-192.9651, device='cuda:0')
episode: 109 training return: tensor(-286.9191, device='cuda:0')
episode: 110 training return: tensor(-307.4821, device='cuda:0')
episode: 111 training return: tensor(-617.3120, device='cuda:0')
epoch: 28 test_true_pfm: 10147.326755172166 sim_pfm: -88.26605613723707
episode: 112 training return: tensor(-440.4107, device='cuda:0')
episode: 113 training return: tensor(-457.5052, device='cuda:0')
episode: 114 training return: tensor(-239.3443, device='cuda:0')
episode: 115 training return: tensor(-138.9770, device='cuda:0')
epoch: 29 test_true_pfm: 10133.693635570493 sim_pfm: 4.594433642186535
episode: 116 training return: tensor(-994.0604, device='cuda:0')
episode: 117 training return: tensor(-218.8630, device='cuda:0')
episode: 118 training return: tensor(-982.1157, device='cuda:0')
episode: 119 training return: tensor(-906.6512, device='cuda:0')
epoch: 30 test_true_pfm: 6738.936431251156 sim_pfm: -421.3204806133872
episode: 120 training return: tensor(-709.3596, device='cuda:0')
episode: 121 training return: tensor(-119.4555, device='cuda:0')
episode: 122 training return: tensor(-658.3340, device='cuda:0')
episode: 123 training return: tensor(-288.2160, device='cuda:0')
epoch: 31 test_true_pfm: 7018.26846301563 sim_pfm: -51.7694633754727
episode: 124 training return: tensor(-996.0438, device='cuda:0')
episode: 125 training return: tensor(-612.1320, device='cuda:0')
episode: 126 training return: tensor(-510.7242, device='cuda:0')
episode: 127 training return: tensor(-999.9888, device='cuda:0')
epoch: 32 test_true_pfm: 3349.721450466143 sim_pfm: -36.494429194931094
episode: 128 training return: tensor(-336.0101, device='cuda:0')
episode: 129 training return: tensor(-360.2711, device='cuda:0')
episode: 130 training return: tensor(-65.2584, device='cuda:0')
episode: 131 training return: tensor(-999.7556, device='cuda:0')
epoch: 33 test_true_pfm: 6564.141618898765 sim_pfm: -283.6475031611432
episode: 132 training return: tensor(-210.5489, device='cuda:0')
episode: 133 training return: tensor(-171.1973, device='cuda:0')
episode: 134 training return: tensor(-893.1029, device='cuda:0')
episode: 135 training return: tensor(-38.0532, device='cuda:0')
epoch: 34 test_true_pfm: 7190.522160563007 sim_pfm: -444.7484552716681
episode: 136 training return: tensor(-263.1840, device='cuda:0')
episode: 137 training return: tensor(-806.2438, device='cuda:0')
episode: 138 training return: tensor(-287.8259, device='cuda:0')
episode: 139 training return: tensor(-325.9843, device='cuda:0')
epoch: 35 test_true_pfm: 7713.639541784607 sim_pfm: -482.161498784951
episode: 140 training return: tensor(-514.6288, device='cuda:0')
episode: 141 training return: tensor(-293.2486, device='cuda:0')
episode: 142 training return: tensor(-999.9382, device='cuda:0')
episode: 143 training return: tensor(-127.0798, device='cuda:0')
epoch: 36 test_true_pfm: 9738.376675312726 sim_pfm: -30.237362193564575
episode: 144 training return: tensor(-339.8281, device='cuda:0')
episode: 145 training return: tensor(-997.6830, device='cuda:0')
episode: 146 training return: tensor(-961.9521, device='cuda:0')
episode: 147 training return: tensor(-962.0854, device='cuda:0')
epoch: 37 test_true_pfm: 6696.907528041625 sim_pfm: -740.0942256536024
episode: 148 training return: tensor(-66.5695, device='cuda:0')
episode: 149 training return: tensor(-207.5094, device='cuda:0')
episode: 150 training return: tensor(-164.4534, device='cuda:0')
episode: 151 training return: tensor(-66.8071, device='cuda:0')
epoch: 38 test_true_pfm: 9313.563921760639 sim_pfm: -440.109686767566
episode: 152 training return: tensor(-226.1878, device='cuda:0')
episode: 153 training return: tensor(-266.1965, device='cuda:0')
episode: 154 training return: tensor(-132.4706, device='cuda:0')
episode: 155 training return: tensor(-218.1429, device='cuda:0')
epoch: 39 test_true_pfm: 10037.830205594657 sim_pfm: -67.18880062971341
episode: 156 training return: tensor(-875.8639, device='cuda:0')
episode: 157 training return: tensor(-273.6892, device='cuda:0')
episode: 158 training return: tensor(-244.2307, device='cuda:0')
episode: 159 training return: tensor(-175.0221, device='cuda:0')
epoch: 40 test_true_pfm: 8682.002300533492 sim_pfm: 68.16279553158286
episode: 160 training return: tensor(-822.8135, device='cuda:0')
episode: 161 training return: tensor(-106.5852, device='cuda:0')
episode: 162 training return: tensor(-129.5739, device='cuda:0')
episode: 163 training return: tensor(-90.8353, device='cuda:0')
epoch: 41 test_true_pfm: 8573.145451535835 sim_pfm: -156.880964414236
episode: 164 training return: tensor(-999.9537, device='cuda:0')
episode: 165 training return: tensor(-293.5854, device='cuda:0')
episode: 166 training return: tensor(-285.4236, device='cuda:0')
episode: 167 training return: tensor(-223.2710, device='cuda:0')
epoch: 42 test_true_pfm: 10116.379866760171 sim_pfm: -72.28518720918025
episode: 168 training return: tensor(-2.9756, device='cuda:0')
episode: 169 training return: tensor(-999.8102, device='cuda:0')
episode: 170 training return: tensor(-148.6165, device='cuda:0')
episode: 171 training return: tensor(-516.9731, device='cuda:0')
epoch: 43 test_true_pfm: 10345.620258293782 sim_pfm: -17.168964996293653
episode: 172 training return: tensor(-272.1721, device='cuda:0')
episode: 173 training return: tensor(-998.5599, device='cuda:0')
episode: 174 training return: tensor(-315.6970, device='cuda:0')
episode: 175 training return: tensor(-271.0590, device='cuda:0')
epoch: 44 test_true_pfm: 6754.9633247996435 sim_pfm: -93.57733925638604
episode: 176 training return: tensor(-232.9656, device='cuda:0')
episode: 177 training return: tensor(-392.9631, device='cuda:0')
episode: 178 training return: tensor(-999.9988, device='cuda:0')
episode: 179 training return: tensor(-116.6389, device='cuda:0')
epoch: 45 test_true_pfm: 10457.43430467436 sim_pfm: 57.844979903136846
episode: 180 training return: tensor(-102.4952, device='cuda:0')
episode: 181 training return: tensor(-209.7362, device='cuda:0')
episode: 182 training return: tensor(-376.3732, device='cuda:0')
episode: 183 training return: tensor(-726.9775, device='cuda:0')
epoch: 46 test_true_pfm: 10367.73669382675 sim_pfm: -295.049365570206
episode: 184 training return: tensor(-319.6436, device='cuda:0')
episode: 185 training return: tensor(-208.9572, device='cuda:0')
episode: 186 training return: tensor(-346.9539, device='cuda:0')
episode: 187 training return: tensor(-292.0620, device='cuda:0')
epoch: 47 test_true_pfm: 9854.810952746719 sim_pfm: -357.55239646681974
episode: 188 training return: tensor(-39.6612, device='cuda:0')
episode: 189 training return: tensor(-3.9540, device='cuda:0')
episode: 190 training return: tensor(-999.9562, device='cuda:0')
episode: 191 training return: tensor(-228.7809, device='cuda:0')
epoch: 48 test_true_pfm: 10458.215804169473 sim_pfm: 98.77833929058397
episode: 192 training return: tensor(-540.0292, device='cuda:0')
episode: 193 training return: tensor(-853.6085, device='cuda:0')
episode: 194 training return: tensor(-641.9266, device='cuda:0')
episode: 195 training return: tensor(-270.1417, device='cuda:0')
epoch: 49 test_true_pfm: 10488.73888970015 sim_pfm: 52.019757124022966
episode: 196 training return: tensor(-267.2609, device='cuda:0')
episode: 197 training return: tensor(-999.9621, device='cuda:0')
episode: 198 training return: tensor(-513.7363, device='cuda:0')
episode: 199 training return: tensor(-394.7882, device='cuda:0')
epoch: 50 test_true_pfm: 6709.0813596405205 sim_pfm: -33.893998079137724
episode: 200 training return: tensor(-46.4685, device='cuda:0')
episode: 201 training return: tensor(-779.0108, device='cuda:0')
episode: 202 training return: tensor(-319.0135, device='cuda:0')
episode: 203 training return: tensor(-90.8870, device='cuda:0')
epoch: 51 test_true_pfm: 10352.224013787214 sim_pfm: 74.4027555192976
episode: 204 training return: tensor(16.8542, device='cuda:0')
episode: 205 training return: tensor(-997.8596, device='cuda:0')
episode: 206 training return: tensor(-729.3298, device='cuda:0')
episode: 207 training return: tensor(-179.1079, device='cuda:0')
epoch: 52 test_true_pfm: 10495.427571898692 sim_pfm: -147.27533475938253
episode: 208 training return: tensor(-272.4444, device='cuda:0')
episode: 209 training return: tensor(-999.9999, device='cuda:0')
episode: 210 training return: tensor(-69.5830, device='cuda:0')
episode: 211 training return: tensor(-973.2801, device='cuda:0')
epoch: 53 test_true_pfm: 3948.616575152837 sim_pfm: -247.699879384425
episode: 212 training return: tensor(-704.6467, device='cuda:0')
episode: 213 training return: tensor(-999.9648, device='cuda:0')
episode: 214 training return: tensor(-998.1903, device='cuda:0')
episode: 215 training return: tensor(-999.2870, device='cuda:0')
epoch: 54 test_true_pfm: 7447.792747817053 sim_pfm: 23.093608194079327
episode: 216 training return: tensor(-200.5697, device='cuda:0')
episode: 217 training return: tensor(-344.6647, device='cuda:0')
episode: 218 training return: tensor(-34.1999, device='cuda:0')
episode: 219 training return: tensor(-173.2528, device='cuda:0')
epoch: 55 test_true_pfm: 10493.494267515016 sim_pfm: -101.64898146435735
episode: 220 training return: tensor(-999.6766, device='cuda:0')
episode: 221 training return: tensor(-205.2695, device='cuda:0')
episode: 222 training return: tensor(-230.3489, device='cuda:0')
episode: 223 training return: tensor(33.2477, device='cuda:0')
epoch: 56 test_true_pfm: 5020.22937889557 sim_pfm: -343.98788986177533
episode: 224 training return: tensor(-155.0732, device='cuda:0')
episode: 225 training return: tensor(-999.9972, device='cuda:0')
episode: 226 training return: tensor(-139.2992, device='cuda:0')
episode: 227 training return: tensor(-221.6640, device='cuda:0')
epoch: 57 test_true_pfm: 10058.591759099267 sim_pfm: -276.2876136590494
episode: 228 training return: tensor(-146.5438, device='cuda:0')
episode: 229 training return: tensor(-158.8880, device='cuda:0')
episode: 230 training return: tensor(-259.3231, device='cuda:0')
episode: 231 training return: tensor(-214.1740, device='cuda:0')
epoch: 58 test_true_pfm: 10607.433919688548 sim_pfm: 115.88757146060622
episode: 232 training return: tensor(-376.5732, device='cuda:0')
episode: 233 training return: tensor(-41.9803, device='cuda:0')
episode: 234 training return: tensor(-104.0877, device='cuda:0')
episode: 235 training return: tensor(-184.9806, device='cuda:0')
epoch: 59 test_true_pfm: 3368.7703664681612 sim_pfm: -97.12241206118294
episode: 236 training return: tensor(-147.8783, device='cuda:0')
episode: 237 training return: tensor(-111.8565, device='cuda:0')
episode: 238 training return: tensor(-145.2307, device='cuda:0')
episode: 239 training return: tensor(45.5222, device='cuda:0')
epoch: 60 test_true_pfm: 10019.751512014258 sim_pfm: 37.69499508625207
episode: 240 training return: tensor(-853.4374, device='cuda:0')
episode: 241 training return: tensor(-202.6636, device='cuda:0')
episode: 242 training return: tensor(-165.7506, device='cuda:0')
episode: 243 training return: tensor(-315.7535, device='cuda:0')
epoch: 61 test_true_pfm: 10513.097108902022 sim_pfm: 5.253390678107583
episode: 244 training return: tensor(-55.3755, device='cuda:0')
episode: 245 training return: tensor(-93.6139, device='cuda:0')
episode: 246 training return: tensor(-410.8498, device='cuda:0')
episode: 247 training return: tensor(-243.9614, device='cuda:0')
epoch: 62 test_true_pfm: 10393.93870083385 sim_pfm: 103.75741989898961
episode: 248 training return: tensor(-164.8264, device='cuda:0')
episode: 249 training return: tensor(-445.3471, device='cuda:0')
episode: 250 training return: tensor(-180.2805, device='cuda:0')
episode: 251 training return: tensor(-133.0624, device='cuda:0')
epoch: 63 test_true_pfm: 10268.039103905545 sim_pfm: -308.2229466129793
episode: 252 training return: tensor(9.8571, device='cuda:0')
episode: 253 training return: tensor(-285.1164, device='cuda:0')
episode: 254 training return: tensor(-19.6322, device='cuda:0')
episode: 255 training return: tensor(-129.5066, device='cuda:0')
epoch: 64 test_true_pfm: 8778.51509725886 sim_pfm: -287.4732907702564
episode: 256 training return: tensor(13.1379, device='cuda:0')
episode: 257 training return: tensor(-222.2504, device='cuda:0')
episode: 258 training return: tensor(-710.6594, device='cuda:0')
episode: 259 training return: tensor(-118.6680, device='cuda:0')
epoch: 65 test_true_pfm: 10551.067190018184 sim_pfm: -272.37550350354286
episode: 260 training return: tensor(-43.5015, device='cuda:0')
episode: 261 training return: tensor(-999.9717, device='cuda:0')
episode: 262 training return: tensor(15.8986, device='cuda:0')
episode: 263 training return: tensor(-104.3653, device='cuda:0')
epoch: 66 test_true_pfm: 6904.081697003959 sim_pfm: 25.90721825741154
episode: 264 training return: tensor(-133.1340, device='cuda:0')
episode: 265 training return: tensor(-168.7502, device='cuda:0')
episode: 266 training return: tensor(-132.9662, device='cuda:0')
episode: 267 training return: tensor(-155.2618, device='cuda:0')
epoch: 67 test_true_pfm: 9339.782009754072 sim_pfm: 2.9739263977098744
episode: 268 training return: tensor(-126.7565, device='cuda:0')
episode: 269 training return: tensor(-29.6389, device='cuda:0')
episode: 270 training return: tensor(-584.6842, device='cuda:0')
episode: 271 training return: tensor(-41.2018, device='cuda:0')
epoch: 68 test_true_pfm: 6954.116927761796 sim_pfm: -168.94931915172492
episode: 272 training return: tensor(-703.5497, device='cuda:0')
episode: 273 training return: tensor(-72.5818, device='cuda:0')
episode: 274 training return: tensor(-287.8606, device='cuda:0')
episode: 275 training return: tensor(-107.4039, device='cuda:0')
epoch: 69 test_true_pfm: 10312.533330339646 sim_pfm: 117.35151005341322
episode: 276 training return: tensor(-888.8058, device='cuda:0')
episode: 277 training return: tensor(-132.3447, device='cuda:0')
episode: 278 training return: tensor(-937.5046, device='cuda:0')
episode: 279 training return: tensor(-888.6835, device='cuda:0')
epoch: 70 test_true_pfm: 9063.457445052196 sim_pfm: 139.3357771546192
episode: 280 training return: tensor(-165.2543, device='cuda:0')
episode: 281 training return: tensor(-213.1133, device='cuda:0')
episode: 282 training return: tensor(-999.9101, device='cuda:0')
episode: 283 training return: tensor(-51.8907, device='cuda:0')
epoch: 71 test_true_pfm: 10518.76607996996 sim_pfm: -110.62967253806225
episode: 284 training return: tensor(-254.8474, device='cuda:0')
episode: 285 training return: tensor(68.2507, device='cuda:0')
episode: 286 training return: tensor(-999.7980, device='cuda:0')
episode: 287 training return: tensor(-999.9973, device='cuda:0')
epoch: 72 test_true_pfm: 9596.023727745887 sim_pfm: 37.749295273407675
episode: 288 training return: tensor(105.6379, device='cuda:0')
episode: 289 training return: tensor(9.7291, device='cuda:0')
episode: 290 training return: tensor(-869.7832, device='cuda:0')
episode: 291 training return: tensor(-578.7220, device='cuda:0')
epoch: 73 test_true_pfm: 6935.769171538052 sim_pfm: -51.69546790242506
episode: 292 training return: tensor(-140.7465, device='cuda:0')
episode: 293 training return: tensor(-692.4067, device='cuda:0')
episode: 294 training return: tensor(-119.5411, device='cuda:0')
episode: 295 training return: tensor(36.9574, device='cuda:0')
epoch: 74 test_true_pfm: 6851.556651920568 sim_pfm: 23.437782898166915
episode: 296 training return: tensor(-132.1407, device='cuda:0')
episode: 297 training return: tensor(-274.2267, device='cuda:0')
episode: 298 training return: tensor(25.5285, device='cuda:0')
episode: 299 training return: tensor(-999.9860, device='cuda:0')
epoch: 75 test_true_pfm: 10326.114812264752 sim_pfm: 41.66457866850154
episode: 300 training return: tensor(-134.8680, device='cuda:0')
episode: 301 training return: tensor(-62.9965, device='cuda:0')
episode: 302 training return: tensor(-185.1834, device='cuda:0')
episode: 303 training return: tensor(19.7650, device='cuda:0')
epoch: 76 test_true_pfm: 6849.071626242553 sim_pfm: -231.5872489500713
episode: 304 training return: tensor(-106.6720, device='cuda:0')
episode: 305 training return: tensor(-103.4645, device='cuda:0')
episode: 306 training return: tensor(-999.9996, device='cuda:0')
episode: 307 training return: tensor(-172.2597, device='cuda:0')
epoch: 77 test_true_pfm: 10274.648166832803 sim_pfm: 47.67860916567346
episode: 308 training return: tensor(-88.4203, device='cuda:0')
episode: 309 training return: tensor(-28.3687, device='cuda:0')
episode: 310 training return: tensor(-886.8726, device='cuda:0')
episode: 311 training return: tensor(-110.8925, device='cuda:0')
epoch: 78 test_true_pfm: 10396.313480876503 sim_pfm: -54.506715482333675
episode: 312 training return: tensor(-874.6475, device='cuda:0')
episode: 313 training return: tensor(-246.0377, device='cuda:0')
episode: 314 training return: tensor(-110.3284, device='cuda:0')
episode: 315 training return: tensor(-242.6752, device='cuda:0')
epoch: 79 test_true_pfm: 10576.477417506014 sim_pfm: 191.2636003640267
episode: 316 training return: tensor(-207.9685, device='cuda:0')
episode: 317 training return: tensor(-12.4652, device='cuda:0')
episode: 318 training return: tensor(-57.0085, device='cuda:0')
episode: 319 training return: tensor(-115.0545, device='cuda:0')
epoch: 80 test_true_pfm: 10456.247026457102 sim_pfm: 83.59387028509325
episode: 320 training return: tensor(-210.7006, device='cuda:0')
episode: 321 training return: tensor(-18.0823, device='cuda:0')
episode: 322 training return: tensor(-366.7868, device='cuda:0')
episode: 323 training return: tensor(-131.9478, device='cuda:0')
epoch: 81 test_true_pfm: 10231.983124645909 sim_pfm: -54.44056994074102
episode: 324 training return: tensor(-502.3866, device='cuda:0')
episode: 325 training return: tensor(-107.0270, device='cuda:0')
episode: 326 training return: tensor(-55.0430, device='cuda:0')
episode: 327 training return: tensor(-57.6325, device='cuda:0')
epoch: 82 test_true_pfm: 10395.762737256924 sim_pfm: 54.51533362451786
episode: 328 training return: tensor(-29.4355, device='cuda:0')
episode: 329 training return: tensor(-326.6208, device='cuda:0')
episode: 330 training return: tensor(-190.5002, device='cuda:0')
episode: 331 training return: tensor(-144.4951, device='cuda:0')
epoch: 83 test_true_pfm: 10524.184787808112 sim_pfm: -44.657694772060495
episode: 332 training return: tensor(-687.6697, device='cuda:0')
episode: 333 training return: tensor(-180.6857, device='cuda:0')
episode: 334 training return: tensor(-991.4874, device='cuda:0')
episode: 335 training return: tensor(-494.7075, device='cuda:0')
epoch: 84 test_true_pfm: 10345.418872525423 sim_pfm: 5.697323356260313
episode: 336 training return: tensor(2.6294, device='cuda:0')
episode: 337 training return: tensor(-177.5356, device='cuda:0')
episode: 338 training return: tensor(-165.3682, device='cuda:0')
episode: 339 training return: tensor(-242.0822, device='cuda:0')
epoch: 85 test_true_pfm: 10328.844989220699 sim_pfm: 40.03589907192509
episode: 340 training return: tensor(-999.3420, device='cuda:0')
episode: 341 training return: tensor(-825.8758, device='cuda:0')
episode: 342 training return: tensor(-65.8579, device='cuda:0')
episode: 343 training return: tensor(-999.8964, device='cuda:0')
epoch: 86 test_true_pfm: 10614.152305949263 sim_pfm: 151.56986468294053
episode: 344 training return: tensor(-18.1226, device='cuda:0')
episode: 345 training return: tensor(-761.1229, device='cuda:0')
episode: 346 training return: tensor(-645.1171, device='cuda:0')
episode: 347 training return: tensor(27.5346, device='cuda:0')
epoch: 87 test_true_pfm: 10345.216786028763 sim_pfm: -72.9688431537749
episode: 348 training return: tensor(1.1866, device='cuda:0')
episode: 349 training return: tensor(10.6031, device='cuda:0')
episode: 350 training return: tensor(-146.4527, device='cuda:0')
episode: 351 training return: tensor(-999.9467, device='cuda:0')
epoch: 88 test_true_pfm: 8358.052928507082 sim_pfm: -2.238400984555483
episode: 352 training return: tensor(-354.8554, device='cuda:0')
episode: 353 training return: tensor(-95.5489, device='cuda:0')
episode: 354 training return: tensor(-238.3725, device='cuda:0')
episode: 355 training return: tensor(-174.5651, device='cuda:0')
epoch: 89 test_true_pfm: 10258.149147157397 sim_pfm: 56.85373987955003
episode: 356 training return: tensor(-480.1259, device='cuda:0')
episode: 357 training return: tensor(-157.9492, device='cuda:0')
episode: 358 training return: tensor(-999.9791, device='cuda:0')
episode: 359 training return: tensor(-188.5410, device='cuda:0')
epoch: 90 test_true_pfm: 10272.437128872762 sim_pfm: -600.5003523548172
episode: 360 training return: tensor(-38.2674, device='cuda:0')
episode: 361 training return: tensor(-57.2430, device='cuda:0')
episode: 362 training return: tensor(24.1372, device='cuda:0')
episode: 363 training return: tensor(-147.2911, device='cuda:0')
epoch: 91 test_true_pfm: 10420.58572747063 sim_pfm: 55.01183329282018
episode: 364 training return: tensor(-95.1984, device='cuda:0')
episode: 365 training return: tensor(-999.9957, device='cuda:0')
episode: 366 training return: tensor(-842.8174, device='cuda:0')
episode: 367 training return: tensor(-14.6980, device='cuda:0')
epoch: 92 test_true_pfm: 10362.631719431874 sim_pfm: 15.41039111558348
episode: 368 training return: tensor(110.4926, device='cuda:0')
episode: 369 training return: tensor(-190.5406, device='cuda:0')
episode: 370 training return: tensor(-998.6778, device='cuda:0')
episode: 371 training return: tensor(-85.3560, device='cuda:0')
epoch: 93 test_true_pfm: 10340.109167177197 sim_pfm: 42.996989487369625
episode: 372 training return: tensor(-45.7347, device='cuda:0')
episode: 373 training return: tensor(-15.5737, device='cuda:0')
episode: 374 training return: tensor(-250.2039, device='cuda:0')
episode: 375 training return: tensor(-91.2671, device='cuda:0')
epoch: 94 test_true_pfm: 5931.050933268296 sim_pfm: -122.92375759844435
episode: 376 training return: tensor(-40.7984, device='cuda:0')
episode: 377 training return: tensor(-774.3209, device='cuda:0')
episode: 378 training return: tensor(-194.2903, device='cuda:0')
episode: 379 training return: tensor(-71.5973, device='cuda:0')
epoch: 95 test_true_pfm: 10467.08412375115 sim_pfm: -304.7997326372424
episode: 380 training return: tensor(-46.3485, device='cuda:0')
episode: 381 training return: tensor(-999.9965, device='cuda:0')
episode: 382 training return: tensor(-999.9635, device='cuda:0')
episode: 383 training return: tensor(-143.5744, device='cuda:0')
epoch: 96 test_true_pfm: 10362.383428786974 sim_pfm: 2.4666081387064573
episode: 384 training return: tensor(-278.5428, device='cuda:0')
episode: 385 training return: tensor(-71.0572, device='cuda:0')
episode: 386 training return: tensor(-696.8618, device='cuda:0')
episode: 387 training return: tensor(-999.8530, device='cuda:0')
epoch: 97 test_true_pfm: 10232.118606570775 sim_pfm: -315.76197914798587
episode: 388 training return: tensor(-248.1068, device='cuda:0')
episode: 389 training return: tensor(-17.1282, device='cuda:0')
episode: 390 training return: tensor(-423.4237, device='cuda:0')
episode: 391 training return: tensor(-26.4742, device='cuda:0')
epoch: 98 test_true_pfm: 10476.851130428657 sim_pfm: -136.95030771246334
episode: 392 training return: tensor(-999.9908, device='cuda:0')
episode: 393 training return: tensor(-254.4656, device='cuda:0')
episode: 394 training return: tensor(-74.6281, device='cuda:0')
episode: 395 training return: tensor(20.7324, device='cuda:0')
epoch: 99 test_true_pfm: 8119.433585971601 sim_pfm: -123.6452652659209
episode: 396 training return: tensor(-230.1219, device='cuda:0')
episode: 397 training return: tensor(-998.2667, device='cuda:0')
episode: 398 training return: tensor(25.2604, device='cuda:0')
episode: 399 training return: tensor(-193.0368, device='cuda:0')
epoch: 100 test_true_pfm: 9853.286785723676 sim_pfm: -184.13840217109342
episode: 400 training return: tensor(-180.0423, device='cuda:0')
episode: 401 training return: tensor(-999.9895, device='cuda:0')
episode: 402 training return: tensor(-134.3979, device='cuda:0')
episode: 403 training return: tensor(-669.6450, device='cuda:0')
epoch: 101 test_true_pfm: 6882.574604082644 sim_pfm: 40.26806792081334
episode: 404 training return: tensor(-2.0576, device='cuda:0')
episode: 405 training return: tensor(-82.7211, device='cuda:0')
episode: 406 training return: tensor(-66.9612, device='cuda:0')
episode: 407 training return: tensor(-174.0210, device='cuda:0')
epoch: 102 test_true_pfm: 10584.063030209234 sim_pfm: -18.19127472668576
episode: 408 training return: tensor(35.2759, device='cuda:0')
episode: 409 training return: tensor(-73.6551, device='cuda:0')
episode: 410 training return: tensor(-172.5706, device='cuda:0')
episode: 411 training return: tensor(-205.4349, device='cuda:0')
epoch: 103 test_true_pfm: 10489.427175021592 sim_pfm: 96.11134651317843
episode: 412 training return: tensor(-51.0975, device='cuda:0')
episode: 413 training return: tensor(-999.9974, device='cuda:0')
episode: 414 training return: tensor(-766.0754, device='cuda:0')
episode: 415 training return: tensor(-389.2825, device='cuda:0')
epoch: 104 test_true_pfm: 10364.157374239765 sim_pfm: -76.11651528708171
episode: 416 training return: tensor(-162.9568, device='cuda:0')
episode: 417 training return: tensor(-723.4148, device='cuda:0')
episode: 418 training return: tensor(-210.1080, device='cuda:0')
episode: 419 training return: tensor(-43.9495, device='cuda:0')
epoch: 105 test_true_pfm: 8038.32810128535 sim_pfm: -84.96755463408772
episode: 420 training return: tensor(-49.6067, device='cuda:0')
episode: 421 training return: tensor(-180.5163, device='cuda:0')
episode: 422 training return: tensor(-9.0983, device='cuda:0')
episode: 423 training return: tensor(-47.5018, device='cuda:0')
epoch: 106 test_true_pfm: 6692.757201313376 sim_pfm: -39.34104368668826
episode: 424 training return: tensor(64.9651, device='cuda:0')
episode: 425 training return: tensor(-148.4591, device='cuda:0')
episode: 426 training return: tensor(-25.3465, device='cuda:0')
episode: 427 training return: tensor(-92.3305, device='cuda:0')
epoch: 107 test_true_pfm: 10301.916974256577 sim_pfm: -316.99374635882367
episode: 428 training return: tensor(-1.7881, device='cuda:0')
episode: 429 training return: tensor(-72.0310, device='cuda:0')
episode: 430 training return: tensor(-33.4702, device='cuda:0')
episode: 431 training return: tensor(-157.7996, device='cuda:0')
epoch: 108 test_true_pfm: 10390.356958294136 sim_pfm: -291.4752655174816
episode: 432 training return: tensor(-108.0860, device='cuda:0')
episode: 433 training return: tensor(34.7962, device='cuda:0')
episode: 434 training return: tensor(-122.6242, device='cuda:0')
episode: 435 training return: tensor(23.9251, device='cuda:0')
epoch: 109 test_true_pfm: 10480.692735582008 sim_pfm: 127.81708086618649
episode: 436 training return: tensor(-133.5470, device='cuda:0')
episode: 437 training return: tensor(-8.6336, device='cuda:0')
episode: 438 training return: tensor(-878.4434, device='cuda:0')
episode: 439 training return: tensor(-131.6019, device='cuda:0')
epoch: 110 test_true_pfm: 10415.100333860804 sim_pfm: 95.96792202702879
episode: 440 training return: tensor(-101.3646, device='cuda:0')
episode: 441 training return: tensor(-114.9862, device='cuda:0')
episode: 442 training return: tensor(-643.1257, device='cuda:0')
episode: 443 training return: tensor(-999.9827, device='cuda:0')
epoch: 111 test_true_pfm: 10384.652423044923 sim_pfm: -140.16177210997557
episode: 444 training return: tensor(20.3793, device='cuda:0')
episode: 445 training return: tensor(-81.4468, device='cuda:0')
episode: 446 training return: tensor(-39.7898, device='cuda:0')
episode: 447 training return: tensor(-131.9674, device='cuda:0')
epoch: 112 test_true_pfm: 10404.636537253886 sim_pfm: -77.3399372206283
episode: 448 training return: tensor(-227.4180, device='cuda:0')
episode: 449 training return: tensor(-172.7507, device='cuda:0')
episode: 450 training return: tensor(-999.9827, device='cuda:0')
episode: 451 training return: tensor(-214.7941, device='cuda:0')
epoch: 113 test_true_pfm: 6856.90510955302 sim_pfm: 23.719591964812327
episode: 452 training return: tensor(-572.0483, device='cuda:0')
episode: 453 training return: tensor(-987.9140, device='cuda:0')
episode: 454 training return: tensor(-999.9858, device='cuda:0')
episode: 455 training return: tensor(-133.4172, device='cuda:0')
epoch: 114 test_true_pfm: 10548.597516344089 sim_pfm: 78.30329346149422
episode: 456 training return: tensor(-999.6812, device='cuda:0')
episode: 457 training return: tensor(65.9849, device='cuda:0')
episode: 458 training return: tensor(-28.1726, device='cuda:0')
episode: 459 training return: tensor(-32.6252, device='cuda:0')
epoch: 115 test_true_pfm: 10371.650076770937 sim_pfm: -240.29396643591463
episode: 460 training return: tensor(-93.6213, device='cuda:0')
episode: 461 training return: tensor(-97.4608, device='cuda:0')
episode: 462 training return: tensor(-41.2666, device='cuda:0')
episode: 463 training return: tensor(-37.9702, device='cuda:0')
epoch: 116 test_true_pfm: 10542.238969572658 sim_pfm: 14.926521884005828
episode: 464 training return: tensor(-633.4174, device='cuda:0')
episode: 465 training return: tensor(-17.6805, device='cuda:0')
episode: 466 training return: tensor(-121.3601, device='cuda:0')
episode: 467 training return: tensor(38.8043, device='cuda:0')
epoch: 117 test_true_pfm: 10489.960266104541 sim_pfm: -2.9785899604903534
episode: 468 training return: tensor(6.2467, device='cuda:0')
episode: 469 training return: tensor(-568.0173, device='cuda:0')
episode: 470 training return: tensor(-13.2165, device='cuda:0')
episode: 471 training return: tensor(-31.6521, device='cuda:0')
epoch: 118 test_true_pfm: 9676.156289718208 sim_pfm: -43.8564337566883
episode: 472 training return: tensor(12.7627, device='cuda:0')
episode: 473 training return: tensor(-10.9007, device='cuda:0')
episode: 474 training return: tensor(70.2592, device='cuda:0')
episode: 475 training return: tensor(-768.2231, device='cuda:0')
epoch: 119 test_true_pfm: 6845.959400124361 sim_pfm: -125.60897564514501
episode: 476 training return: tensor(-644.0870, device='cuda:0')
episode: 477 training return: tensor(-137.7162, device='cuda:0')
episode: 478 training return: tensor(-999.9269, device='cuda:0')
episode: 479 training return: tensor(-998.6525, device='cuda:0')
epoch: 120 test_true_pfm: 7369.772322405569 sim_pfm: -40.61159564347084
episode: 480 training return: tensor(4.7422, device='cuda:0')
episode: 481 training return: tensor(-129.3367, device='cuda:0')
episode: 482 training return: tensor(68.0009, device='cuda:0')
episode: 483 training return: tensor(-412.3969, device='cuda:0')
epoch: 121 test_true_pfm: 10451.664233105623 sim_pfm: 165.56347888422897
episode: 484 training return: tensor(-787.0763, device='cuda:0')
episode: 485 training return: tensor(-982.2127, device='cuda:0')
episode: 486 training return: tensor(-176.6848, device='cuda:0')
episode: 487 training return: tensor(-33.9156, device='cuda:0')
epoch: 122 test_true_pfm: 10600.934418233226 sim_pfm: -279.89847436970257
episode: 488 training return: tensor(-26.0061, device='cuda:0')
episode: 489 training return: tensor(34.7335, device='cuda:0')
episode: 490 training return: tensor(-30.7945, device='cuda:0')
episode: 491 training return: tensor(24.8876, device='cuda:0')
epoch: 123 test_true_pfm: 7409.529167820001 sim_pfm: 186.29251209138116
episode: 492 training return: tensor(-999.9919, device='cuda:0')
episode: 493 training return: tensor(-999.9116, device='cuda:0')
episode: 494 training return: tensor(-44.2233, device='cuda:0')
episode: 495 training return: tensor(16.7578, device='cuda:0')
epoch: 124 test_true_pfm: 9669.298825694406 sim_pfm: 129.55077752944393
episode: 496 training return: tensor(-83.4592, device='cuda:0')
episode: 497 training return: tensor(-3.5090, device='cuda:0')
episode: 498 training return: tensor(-481.3112, device='cuda:0')
episode: 499 training return: tensor(-49.4776, device='cuda:0')
epoch: 125 test_true_pfm: 10410.615098655995 sim_pfm: 67.69883912530106
episode: 500 training return: tensor(15.0159, device='cuda:0')
episode: 501 training return: tensor(-920.7443, device='cuda:0')
episode: 502 training return: tensor(-129.6448, device='cuda:0')
episode: 503 training return: tensor(-177.3919, device='cuda:0')
epoch: 126 test_true_pfm: 10512.382363199917 sim_pfm: 36.821888165917095
episode: 504 training return: tensor(-999.9315, device='cuda:0')
episode: 505 training return: tensor(-999.9935, device='cuda:0')
episode: 506 training return: tensor(-868.8717, device='cuda:0')
episode: 507 training return: tensor(-126.9734, device='cuda:0')
epoch: 127 test_true_pfm: 10486.722423329906 sim_pfm: 128.9450420262874
episode: 508 training return: tensor(-235.8344, device='cuda:0')
episode: 509 training return: tensor(41.4793, device='cuda:0')
episode: 510 training return: tensor(-999.9894, device='cuda:0')
episode: 511 training return: tensor(-132.4075, device='cuda:0')
epoch: 128 test_true_pfm: 10260.178377999222 sim_pfm: -20.635888796823565
episode: 512 training return: tensor(-979.1246, device='cuda:0')
episode: 513 training return: tensor(-57.6534, device='cuda:0')
episode: 514 training return: tensor(32.0824, device='cuda:0')
episode: 515 training return: tensor(-187.3388, device='cuda:0')
epoch: 129 test_true_pfm: 10314.448794560876 sim_pfm: -64.30412885767873
episode: 516 training return: tensor(11.3612, device='cuda:0')
episode: 517 training return: tensor(45.2870, device='cuda:0')
episode: 518 training return: tensor(-125.9634, device='cuda:0')
episode: 519 training return: tensor(-78.7305, device='cuda:0')
epoch: 130 test_true_pfm: 10384.344601192228 sim_pfm: -46.02355368901044
episode: 520 training return: tensor(70.1040, device='cuda:0')
episode: 521 training return: tensor(-999.9236, device='cuda:0')
episode: 522 training return: tensor(-48.4983, device='cuda:0')
episode: 523 training return: tensor(69.8312, device='cuda:0')
epoch: 131 test_true_pfm: 9938.994537427772 sim_pfm: 82.14462716473888
episode: 524 training return: tensor(-999.8395, device='cuda:0')
episode: 525 training return: tensor(51.4894, device='cuda:0')
episode: 526 training return: tensor(-847.8049, device='cuda:0')
episode: 527 training return: tensor(-106.0448, device='cuda:0')
epoch: 132 test_true_pfm: 6623.473284200089 sim_pfm: 9.138236574275652
episode: 528 training return: tensor(-997.8542, device='cuda:0')
episode: 529 training return: tensor(-64.1225, device='cuda:0')
episode: 530 training return: tensor(-999.9755, device='cuda:0')
episode: 531 training return: tensor(-62.8078, device='cuda:0')
epoch: 133 test_true_pfm: 10396.37226875891 sim_pfm: -45.05327148854849
episode: 532 training return: tensor(-501.2761, device='cuda:0')
episode: 533 training return: tensor(-64.6310, device='cuda:0')
episode: 534 training return: tensor(42.0203, device='cuda:0')
episode: 535 training return: tensor(-239.0888, device='cuda:0')
epoch: 134 test_true_pfm: 8483.38739296105 sim_pfm: 80.56284880572154
episode: 536 training return: tensor(12.3624, device='cuda:0')
episode: 537 training return: tensor(-181.3915, device='cuda:0')
episode: 538 training return: tensor(-93.8908, device='cuda:0')
episode: 539 training return: tensor(-135.5485, device='cuda:0')
epoch: 135 test_true_pfm: 10525.893879995654 sim_pfm: -293.03817300184164
episode: 540 training return: tensor(-35.6945, device='cuda:0')
episode: 541 training return: tensor(-135.5576, device='cuda:0')
episode: 542 training return: tensor(-8.9616, device='cuda:0')
episode: 543 training return: tensor(-939.7725, device='cuda:0')
epoch: 136 test_true_pfm: 6027.774963009174 sim_pfm: 61.01252852179459
episode: 544 training return: tensor(-103.0483, device='cuda:0')
episode: 545 training return: tensor(-13.8359, device='cuda:0')
episode: 546 training return: tensor(-292.5146, device='cuda:0')
episode: 547 training return: tensor(29.7373, device='cuda:0')
epoch: 137 test_true_pfm: 10493.677248545528 sim_pfm: -209.73408271700222
episode: 548 training return: tensor(-87.7634, device='cuda:0')
episode: 549 training return: tensor(-138.7143, device='cuda:0')
episode: 550 training return: tensor(-179.9112, device='cuda:0')
episode: 551 training return: tensor(-279.4901, device='cuda:0')
epoch: 138 test_true_pfm: 8708.034850233786 sim_pfm: 186.79927191598108
episode: 552 training return: tensor(-87.2436, device='cuda:0')
episode: 553 training return: tensor(-38.2382, device='cuda:0')
episode: 554 training return: tensor(-185.0428, device='cuda:0')
episode: 555 training return: tensor(-423.9702, device='cuda:0')
epoch: 139 test_true_pfm: 7896.465836484589 sim_pfm: 29.253110252689414
episode: 556 training return: tensor(61.7762, device='cuda:0')
episode: 557 training return: tensor(-383.2242, device='cuda:0')
episode: 558 training return: tensor(-55.7331, device='cuda:0')
episode: 559 training return: tensor(-999.9402, device='cuda:0')
epoch: 140 test_true_pfm: 7568.852707929163 sim_pfm: -8.499717327795224
episode: 560 training return: tensor(29.8575, device='cuda:0')
episode: 561 training return: tensor(27.2115, device='cuda:0')
episode: 562 training return: tensor(-81.0946, device='cuda:0')
episode: 563 training return: tensor(-999.8978, device='cuda:0')
epoch: 141 test_true_pfm: 10443.428384508756 sim_pfm: 93.56872845103499
episode: 564 training return: tensor(32.7317, device='cuda:0')
episode: 565 training return: tensor(-111.0843, device='cuda:0')
episode: 566 training return: tensor(-29.3356, device='cuda:0')
episode: 567 training return: tensor(-999.9850, device='cuda:0')
epoch: 142 test_true_pfm: 10607.131225208343 sim_pfm: 113.13329658996857
episode: 568 training return: tensor(81.6778, device='cuda:0')
episode: 569 training return: tensor(-992.5649, device='cuda:0')
episode: 570 training return: tensor(-291.0863, device='cuda:0')
episode: 571 training return: tensor(-139.5869, device='cuda:0')
epoch: 143 test_true_pfm: 10429.11886094777 sim_pfm: 65.3014226203668
episode: 572 training return: tensor(-820.0223, device='cuda:0')
episode: 573 training return: tensor(-109.9256, device='cuda:0')
episode: 574 training return: tensor(-123.8328, device='cuda:0')
episode: 575 training return: tensor(-128.4693, device='cuda:0')
epoch: 144 test_true_pfm: 8728.474043521072 sim_pfm: 69.44487206214883
episode: 576 training return: tensor(-451.0764, device='cuda:0')
episode: 577 training return: tensor(-127.2222, device='cuda:0')
episode: 578 training return: tensor(-446.1857, device='cuda:0')
episode: 579 training return: tensor(-273.4347, device='cuda:0')
epoch: 145 test_true_pfm: 10458.099354838616 sim_pfm: -74.58795705157293
episode: 580 training return: tensor(-106.0283, device='cuda:0')
episode: 581 training return: tensor(-110.7283, device='cuda:0')
episode: 582 training return: tensor(-265.1637, device='cuda:0')
episode: 583 training return: tensor(-15.1390, device='cuda:0')
epoch: 146 test_true_pfm: 6877.841643873031 sim_pfm: 159.0660934302335
episode: 584 training return: tensor(-56.9686, device='cuda:0')
episode: 585 training return: tensor(-151.5316, device='cuda:0')
episode: 586 training return: tensor(-57.2410, device='cuda:0')
episode: 587 training return: tensor(-44.0033, device='cuda:0')
epoch: 147 test_true_pfm: 10347.416613758005 sim_pfm: 102.92121184721084
episode: 588 training return: tensor(-34.6002, device='cuda:0')
episode: 589 training return: tensor(24.5509, device='cuda:0')
episode: 590 training return: tensor(-40.8886, device='cuda:0')
episode: 591 training return: tensor(47.2663, device='cuda:0')
epoch: 148 test_true_pfm: 10315.969645693483 sim_pfm: 124.9570233601941
episode: 592 training return: tensor(73.1339, device='cuda:0')
episode: 593 training return: tensor(29.4819, device='cuda:0')
episode: 594 training return: tensor(24.8552, device='cuda:0')
episode: 595 training return: tensor(-999.9993, device='cuda:0')
epoch: 149 test_true_pfm: 8904.045166042937 sim_pfm: 40.80785448736666
episode: 596 training return: tensor(104.4979, device='cuda:0')
episode: 597 training return: tensor(-24.0689, device='cuda:0')
episode: 598 training return: tensor(46.6254, device='cuda:0')
episode: 599 training return: tensor(-413.0959, device='cuda:0')
epoch: 150 test_true_pfm: 10534.60870057228 sim_pfm: -242.01862829915868
