['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'behavior', '--traj', 'medium', '--seed', '4']
epoch: 0 training_loss 0.2988944184035063 test_loss: 0.19968656301498414
epoch: 1 training_loss 0.1713466853648424 test_loss: 0.18802872896194459
epoch: 2 training_loss 0.14227664437144993 test_loss: 0.13244255781173705
epoch: 3 training_loss 0.13239191889762877 test_loss: 0.13493790626525878
epoch: 4 training_loss 0.13350108467042446 test_loss: 0.12088613510131836
epoch: 5 training_loss 0.1304020471125841 test_loss: 0.11398166418075562
epoch: 6 training_loss 0.1258213621005416 test_loss: 0.11844350099563598
epoch: 7 training_loss 0.13448189586400985 test_loss: 0.11441224813461304
epoch: 8 training_loss 0.11379730124026537 test_loss: 0.1263113498687744
epoch: 9 training_loss 0.11492189917713404 test_loss: 0.1071563720703125
epoch: 10 training_loss 0.1162107684276998 test_loss: 0.13507362604141235
epoch: 11 training_loss 0.11531881786882878 test_loss: 0.11292488574981689
epoch: 12 training_loss 0.11577024197205901 test_loss: 0.12561618089675902
epoch: 13 training_loss 0.10959550810977817 test_loss: 0.12418869733810425
epoch: 14 training_loss 0.11765598814934491 test_loss: 0.10884236097335816
epoch: 15 training_loss 0.11624107159674167 test_loss: 0.1035188913345337
epoch: 16 training_loss 0.1127777504734695 test_loss: 0.09950796365737916
epoch: 17 training_loss 0.11223269201815128 test_loss: 0.11330975294113159
epoch: 18 training_loss 0.10512558203190565 test_loss: 0.11994165182113647
epoch: 19 training_loss 0.10876409087330102 test_loss: 0.11102827787399291
epoch: 20 training_loss 0.11566539384424686 test_loss: 0.09662574529647827
epoch: 21 training_loss 0.10517324704676867 test_loss: 0.10387084484100342
epoch: 22 training_loss 0.11050517188385128 test_loss: 0.11777623891830444
epoch: 23 training_loss 0.11308875564485789 test_loss: 0.1079246997833252
epoch: 24 training_loss 0.11106281027197838 test_loss: 0.10331908464431763
epoch: 25 training_loss 0.11784579504281283 test_loss: 0.11283127069473267
epoch: 26 training_loss 0.10828413762152195 test_loss: 0.11285443305969238
epoch: 27 training_loss 0.10879153162240982 test_loss: 0.12905473709106446
epoch: 28 training_loss 0.11509221563115717 test_loss: 0.11659448146820069
epoch: 29 training_loss 0.10884343937039376 test_loss: 0.11575150489807129
epoch: 30 training_loss 0.11522695552557707 test_loss: 0.11451506614685059
epoch: 31 training_loss 0.1100738437473774 test_loss: 0.10067322254180908
epoch: 32 training_loss 0.11100759033113718 test_loss: 0.10440644025802612
epoch: 33 training_loss 0.11653317742049694 test_loss: 0.10200057029724122
epoch: 34 training_loss 0.119687374047935 test_loss: 0.09557099342346191
epoch: 35 training_loss 0.10922296449542046 test_loss: 0.10677098035812378
epoch: 36 training_loss 0.11229691442102194 test_loss: 0.09573235511779785
epoch: 37 training_loss 0.11070122448727489 test_loss: 0.10213656425476074
epoch: 38 training_loss 0.10149155646562576 test_loss: 0.12165986299514771
epoch: 39 training_loss 0.10556888993829489 test_loss: 0.09353552460670471
epoch: 40 training_loss 0.10343869779258967 test_loss: 0.11471766233444214
epoch: 41 training_loss 0.10373382363468409 test_loss: 0.0987980306148529
epoch: 42 training_loss 0.11279576372355223 test_loss: 0.08682754039764404
epoch: 43 training_loss 0.10417871918529271 test_loss: 0.11485384702682495
epoch: 44 training_loss 0.10365485459566116 test_loss: 0.103214430809021
epoch: 45 training_loss 0.11078313782811165 test_loss: 0.11315659284591675
epoch: 46 training_loss 0.11135053716599941 test_loss: 0.11598520278930664
epoch: 47 training_loss 0.11296976294368505 test_loss: 0.1179085612297058
epoch: 48 training_loss 0.11199225503951311 test_loss: 0.11069346666336059
epoch: 49 training_loss 0.11104352463036776 test_loss: 0.1065470814704895
epoch: 50 training_loss 0.10811002302914857 test_loss: 0.10751148462295532
epoch: 51 training_loss 0.10476413141936064 test_loss: 0.09900487661361694
epoch: 52 training_loss 0.10916083507239818 test_loss: 0.10911405086517334
epoch: 53 training_loss 0.10902772586792707 test_loss: 0.10613569021224975
epoch: 54 training_loss 0.10971124049276114 test_loss: 0.1072995901107788
epoch: 55 training_loss 0.1018589037284255 test_loss: 0.11082017421722412
epoch: 56 training_loss 0.11156822230666875 test_loss: 0.10562715530395508
epoch: 57 training_loss 0.10167684465646744 test_loss: 0.096313738822937
epoch: 58 training_loss 0.10166137168183922 test_loss: 0.10219358205795288
epoch: 59 training_loss 0.10203542299568653 test_loss: 0.12488374710083008
epoch: 60 training_loss 0.10065638342872262 test_loss: 0.08835632801055908
epoch: 61 training_loss 0.10493359815329313 test_loss: 0.11300979852676392
epoch: 62 training_loss 0.11105998121201992 test_loss: 0.10859411954879761
epoch: 63 training_loss 0.10704596109688282 test_loss: 0.11172919273376465
epoch: 64 training_loss 0.10499903189018368 test_loss: 0.10252718925476074
epoch: 65 training_loss 0.10860980655997991 test_loss: 0.1042952299118042
epoch: 66 training_loss 0.10441260814666747 test_loss: 0.1025586485862732
epoch: 67 training_loss 0.10596968749538065 test_loss: 0.10875332355499268
epoch: 68 training_loss 0.11160112023353577 test_loss: 0.10455460548400879
epoch: 69 training_loss 0.10182309655472636 test_loss: 0.09435190558433533
epoch: 70 training_loss 0.10592734642326831 test_loss: 0.11097922325134277
epoch: 71 training_loss 0.10465431177988649 test_loss: 0.11568524837493896
epoch: 72 training_loss 0.10485277380794286 test_loss: 0.100773024559021
epoch: 73 training_loss 0.10616935553029179 test_loss: 0.11587927341461182
epoch: 74 training_loss 0.10220220010727644 test_loss: 0.09533226490020752
epoch: 75 training_loss 0.10637847110629081 test_loss: 0.10896148681640624
epoch: 76 training_loss 0.0979723865352571 test_loss: 0.0982150375843048
epoch: 77 training_loss 0.10546792678534984 test_loss: 0.08245312571525573
epoch: 78 training_loss 0.10492733187973499 test_loss: 0.11842039823532105
epoch: 79 training_loss 0.10431383188813925 test_loss: 0.09014144539833069
epoch: 80 training_loss 0.10576012782752514 test_loss: 0.10334115028381348
epoch: 81 training_loss 0.11173083145171404 test_loss: 0.10134952068328858
epoch: 82 training_loss 0.11246999748051166 test_loss: 0.09920065999031066
epoch: 83 training_loss 0.1027888542227447 test_loss: 0.09146549105644226
epoch: 84 training_loss 0.10611222464591265 test_loss: 0.0993074655532837
epoch: 85 training_loss 0.10617483958601952 test_loss: 0.10946221351623535
epoch: 86 training_loss 0.1081939833983779 test_loss: 0.10766545534133912
epoch: 87 training_loss 0.10464974528178572 test_loss: 0.10096542835235596
epoch: 88 training_loss 0.10799742436036468 test_loss: 0.11932660341262817
epoch: 89 training_loss 0.1181091434136033 test_loss: 0.10995019674301147
epoch: 90 training_loss 0.10115474972873927 test_loss: 0.09266810417175293
epoch: 91 training_loss 0.10289393154904246 test_loss: 0.12030160427093506
epoch: 92 training_loss 0.1042061710357666 test_loss: 0.10419658422470093
epoch: 93 training_loss 0.10977961611002683 test_loss: 0.09042572975158691
epoch: 94 training_loss 0.10707565367221833 test_loss: 0.11209251880645751
epoch: 95 training_loss 0.10623266819864512 test_loss: 0.10330837965011597
epoch: 96 training_loss 0.1064985778555274 test_loss: 0.10976803302764893
epoch: 97 training_loss 0.10544604126363993 test_loss: 0.09682844877243042
epoch: 98 training_loss 0.10910568583756686 test_loss: 0.0968904435634613
epoch: 99 training_loss 0.0968148484826088 test_loss: 0.11358715295791626
epoch: 100 training_loss 0.1031567258387804 test_loss: 0.10206079483032227
epoch: 101 training_loss 0.10277497885748745 test_loss: 0.12287474870681762
epoch: 102 training_loss 0.10042269621044397 test_loss: 0.10305901765823364
epoch: 103 training_loss 0.10470864910632371 test_loss: 0.10953810214996337
epoch: 104 training_loss 0.10887820394709706 test_loss: 0.11339329481124878
epoch: 105 training_loss 0.10317434890195727 test_loss: 0.09786556959152222
epoch: 106 training_loss 0.10257902752608061 test_loss: 0.10421614646911621
epoch: 107 training_loss 0.10023795060813427 test_loss: 0.11649736166000366
epoch: 108 training_loss 0.10258172258734703 test_loss: 0.10198779106140136
epoch: 109 training_loss 0.10543463792651891 test_loss: 0.09156873226165771
epoch: 110 training_loss 0.1044478426128626 test_loss: 0.11025004386901856
epoch: 111 training_loss 0.10441555408760905 test_loss: 0.1005757212638855
epoch: 112 training_loss 0.1030243630334735 test_loss: 0.11824319362640381
epoch: 113 training_loss 0.10317250857129694 test_loss: 0.12998611927032472
epoch: 114 training_loss 0.11226651046425104 test_loss: 0.11245561838150024
epoch: 115 training_loss 0.10485755983740092 test_loss: 0.09179146289825439
epoch: 116 training_loss 0.10610593996942043 test_loss: 0.11737996339797974
epoch: 117 training_loss 0.10239791283383966 test_loss: 0.1090928077697754
epoch: 118 training_loss 0.10947047505527735 test_loss: 0.11581177711486816
epoch: 119 training_loss 0.10100430049002171 test_loss: 0.11060160398483276
epoch: 120 training_loss 0.10831984549760819 test_loss: 0.09462128281593322
epoch: 121 training_loss 0.1078016379661858 test_loss: 0.09362375140190124
epoch: 122 training_loss 0.10283407857641577 test_loss: 0.10881161689758301
epoch: 123 training_loss 0.09984273660928011 test_loss: 0.11381491422653198
epoch: 124 training_loss 0.11315192196518183 test_loss: 0.11785784959793091
epoch: 125 training_loss 0.10585143782198429 test_loss: 0.11175005435943604
epoch: 126 training_loss 0.09816630243323743 test_loss: 0.09115668535232543
epoch: 127 training_loss 0.10971286635845899 test_loss: 0.1079509973526001
epoch: 128 training_loss 0.10515934765338898 test_loss: 0.10535672903060914
epoch: 129 training_loss 0.10555131018161773 test_loss: 0.0974940836429596
epoch: 130 training_loss 0.10658370833843947 test_loss: 0.10362851619720459
epoch: 131 training_loss 0.10622537441551685 test_loss: 0.10149194002151489
epoch: 132 training_loss 0.10269115306437016 test_loss: 0.10577770471572875
epoch: 133 training_loss 0.11195973679423332 test_loss: 0.10720030069351197
epoch: 134 training_loss 0.09802365217357874 test_loss: 0.10196050405502319
epoch: 135 training_loss 0.10720778720453382 test_loss: 0.11682016849517822
epoch: 136 training_loss 0.09251781068742275 test_loss: 0.10963771343231202
epoch: 137 training_loss 0.10992912091314792 test_loss: 0.10039628744125366
epoch: 138 training_loss 0.10175086099654436 test_loss: 0.09169321060180664
epoch: 139 training_loss 0.10482038661837578 test_loss: 0.105617094039917
epoch: 140 training_loss 0.10665125701576471 test_loss: 0.08870346546173095
epoch: 141 training_loss 0.10875319622457028 test_loss: 0.10009747743606567
epoch: 142 training_loss 0.10040584616363049 test_loss: 0.111769700050354
epoch: 143 training_loss 0.10738260574638843 test_loss: 0.11320701837539673
epoch: 144 training_loss 0.10450087130069732 test_loss: 0.10124732255935669
epoch: 145 training_loss 0.10452965941280126 test_loss: 0.09566643238067626
epoch: 146 training_loss 0.10597925629466771 test_loss: 0.10312856435775757
epoch: 147 training_loss 0.11085606299340725 test_loss: 0.10015633106231689
epoch: 148 training_loss 0.09964614417403936 test_loss: 0.11434125900268555
epoch: 149 training_loss 0.10547580666840077 test_loss: 0.10269706249237061
epoch: 0 training_loss 48.6687544631958 test_loss: 23.654598999023438
epoch: 1 training_loss 18.134256315231323 test_loss: 14.858058166503906
epoch: 2 training_loss 13.45438199043274 test_loss: 11.872150421142578
epoch: 3 training_loss 10.672081546783447 test_loss: 9.655561828613282
epoch: 4 training_loss 8.913513207435608 test_loss: 8.330359649658202
epoch: 5 training_loss 7.6629901885986325 test_loss: 7.121092987060547
epoch: 6 training_loss 6.68373544216156 test_loss: 6.013425827026367
epoch: 7 training_loss 5.959389081001282 test_loss: 5.511309814453125
epoch: 8 training_loss 5.282782545089722 test_loss: 5.060509872436524
epoch: 9 training_loss 4.935828816890717 test_loss: 4.846875762939453
epoch: 10 training_loss 4.651437921524048 test_loss: 4.440437316894531
epoch: 11 training_loss 4.298973324298859 test_loss: 4.164333724975586
epoch: 12 training_loss 4.016805007457733 test_loss: 3.7934490203857423
epoch: 13 training_loss 3.876436665058136 test_loss: 3.6817577362060545
epoch: 14 training_loss 3.697451014518738 test_loss: 3.605699920654297
epoch: 15 training_loss 3.502963228225708 test_loss: 3.3811676025390627
epoch: 16 training_loss 3.2912309074401858 test_loss: 3.3303184509277344
epoch: 17 training_loss 3.2699018001556395 test_loss: 3.2645355224609376
epoch: 18 training_loss 3.195839080810547 test_loss: 3.1725561141967775
epoch: 19 training_loss 3.016490843296051 test_loss: 2.99300479888916
epoch: 20 training_loss 2.907178456783295 test_loss: 2.842025947570801
epoch: 21 training_loss 2.8486120295524597 test_loss: 2.7172866821289063
epoch: 22 training_loss 2.8078028917312623 test_loss: 2.774609375
epoch: 23 training_loss 2.807185323238373 test_loss: 2.5469537734985352
epoch: 24 training_loss 2.6847064805030825 test_loss: 2.7200204849243166
epoch: 25 training_loss 2.636527326107025 test_loss: 2.5825149536132814
epoch: 26 training_loss 2.5993435621261596 test_loss: 2.57412166595459
epoch: 27 training_loss 2.5194732880592348 test_loss: 2.4356706619262694
epoch: 28 training_loss 2.467738914489746 test_loss: 2.462663269042969
epoch: 29 training_loss 2.4917989683151247 test_loss: 2.3819541931152344
epoch: 30 training_loss 2.4023181128501894 test_loss: 2.362758255004883
epoch: 31 training_loss 2.4038562893867494 test_loss: 2.449031639099121
epoch: 32 training_loss 2.345192151069641 test_loss: 2.292712593078613
epoch: 33 training_loss 2.2615295946598053 test_loss: 2.331387519836426
epoch: 34 training_loss 2.2199543499946595 test_loss: 2.385898399353027
epoch: 35 training_loss 2.2856825160980225 test_loss: 2.2091732025146484
epoch: 36 training_loss 2.1811695778369904 test_loss: 2.126246452331543
epoch: 37 training_loss 2.1880362284183503 test_loss: 2.205771636962891
epoch: 38 training_loss 2.15542062163353 test_loss: 2.2414419174194338
epoch: 39 training_loss 2.1583926951885224 test_loss: 2.1674070358276367
epoch: 40 training_loss 2.1192718315124512 test_loss: 2.085434341430664
epoch: 41 training_loss 2.1285625648498536 test_loss: 2.136318588256836
epoch: 42 training_loss 2.087439836263657 test_loss: 2.181778144836426
epoch: 43 training_loss 2.051411693096161 test_loss: 2.05804328918457
epoch: 44 training_loss 2.0387141382694245 test_loss: 2.040264129638672
epoch: 45 training_loss 2.04915762424469 test_loss: 2.0250856399536135
epoch: 46 training_loss 2.0136669528484346 test_loss: 2.1069076538085936
epoch: 47 training_loss 2.0059772181510924 test_loss: 2.018252944946289
epoch: 48 training_loss 1.9794503808021546 test_loss: 1.9311874389648438
epoch: 49 training_loss 1.9678668284416199 test_loss: 1.9694725036621095
epoch: 50 training_loss 1.927396867275238 test_loss: 2.007254219055176
epoch: 51 training_loss 1.907571007013321 test_loss: 1.9697992324829101
epoch: 52 training_loss 1.945804282426834 test_loss: 1.89998779296875
epoch: 53 training_loss 1.8973696303367615 test_loss: 1.9024457931518555
epoch: 54 training_loss 1.8802058064937592 test_loss: 1.9380462646484375
epoch: 55 training_loss 1.8866695499420165 test_loss: 1.893874740600586
epoch: 56 training_loss 1.8455764472484588 test_loss: 1.9029268264770507
epoch: 57 training_loss 1.8492697763442993 test_loss: 1.836273765563965
epoch: 58 training_loss 1.840901951789856 test_loss: 1.7749910354614258
epoch: 59 training_loss 1.85529568195343 test_loss: 1.870440673828125
epoch: 60 training_loss 1.8077497124671935 test_loss: 1.9458026885986328
epoch: 61 training_loss 1.7972225391864776 test_loss: 1.8534463882446288
epoch: 62 training_loss 1.8200279557704926 test_loss: 1.782120132446289
epoch: 63 training_loss 1.7916875243186952 test_loss: 1.722935676574707
epoch: 64 training_loss 1.763977850675583 test_loss: 1.8116037368774414
epoch: 65 training_loss 1.767056988477707 test_loss: 1.8461097717285155
epoch: 66 training_loss 1.7978634750843048 test_loss: 1.796246337890625
epoch: 67 training_loss 1.760707335472107 test_loss: 1.72626953125
epoch: 68 training_loss 1.774335629940033 test_loss: 1.7580533981323243
epoch: 69 training_loss 1.7533989715576173 test_loss: 1.7941295623779296
epoch: 70 training_loss 1.7339412331581117 test_loss: 1.726568603515625
epoch: 71 training_loss 1.7365432918071746 test_loss: 1.7161407470703125
epoch: 72 training_loss 1.7392025005817413 test_loss: 1.7410903930664063
epoch: 73 training_loss 1.744058482646942 test_loss: 1.7467460632324219
epoch: 74 training_loss 1.7189116561412812 test_loss: 1.8348237991333007
epoch: 75 training_loss 1.7228698587417604 test_loss: 1.6895925521850585
epoch: 76 training_loss 1.7492053937911987 test_loss: 1.6871608734130858
epoch: 77 training_loss 1.7034356427192687 test_loss: 1.7521114349365234
epoch: 78 training_loss 1.6915867352485656 test_loss: 1.6420526504516602
epoch: 79 training_loss 1.671228278875351 test_loss: 1.6783214569091798
epoch: 80 training_loss 1.7131742668151855 test_loss: 1.7505077362060546
epoch: 81 training_loss 1.7020829236507415 test_loss: 1.6809680938720704
epoch: 82 training_loss 1.670107423067093 test_loss: 1.7198801040649414
epoch: 83 training_loss 1.6566135251522065 test_loss: 1.7121341705322266
epoch: 84 training_loss 1.726501338481903 test_loss: 1.6896366119384765
epoch: 85 training_loss 1.6539609003067017 test_loss: 1.6238136291503906
epoch: 86 training_loss 1.615350753068924 test_loss: 1.6518369674682618
epoch: 87 training_loss 1.6673166465759277 test_loss: 1.708843994140625
epoch: 88 training_loss 1.6392962098121644 test_loss: 1.6830158233642578
epoch: 89 training_loss 1.6458900356292725 test_loss: 1.649683952331543
epoch: 90 training_loss 1.6295098817348481 test_loss: 1.6473377227783204
epoch: 91 training_loss 1.6502836740016937 test_loss: 1.6283742904663085
epoch: 92 training_loss 1.6329228329658507 test_loss: 1.603984260559082
epoch: 93 training_loss 1.587843998670578 test_loss: 1.6154762268066407
epoch: 94 training_loss 1.624726619720459 test_loss: 1.627279281616211
epoch: 95 training_loss 1.589262182712555 test_loss: 1.6395851135253907
epoch: 96 training_loss 1.5807714998722076 test_loss: 1.5907369613647462
epoch: 97 training_loss 1.587741219997406 test_loss: 1.5738073348999024
epoch: 98 training_loss 1.6008715236186981 test_loss: 1.5915019989013672
epoch: 99 training_loss 1.592182482481003 test_loss: 1.529829502105713
epoch: 100 training_loss 1.5877570700645447 test_loss: 1.5998937606811523
epoch: 101 training_loss 1.5704263257980347 test_loss: 1.575549602508545
epoch: 102 training_loss 1.567033077478409 test_loss: 1.598705768585205
epoch: 103 training_loss 1.5905003786087035 test_loss: 1.6194692611694337
epoch: 104 training_loss 1.567072013616562 test_loss: 1.5509437561035155
epoch: 105 training_loss 1.5918776392936707 test_loss: 1.5457091331481934
epoch: 106 training_loss 1.571960232257843 test_loss: 1.5870108604431152
epoch: 107 training_loss 1.5578299355506897 test_loss: 1.535335159301758
epoch: 108 training_loss 1.5610554242134094 test_loss: 1.5652053833007813
epoch: 109 training_loss 1.5503386294841766 test_loss: 1.5199314117431642
epoch: 110 training_loss 1.5403928339481354 test_loss: 1.5475889205932618
epoch: 111 training_loss 1.5607068061828613 test_loss: 1.5486215591430663
epoch: 112 training_loss 1.543528255224228 test_loss: 1.5475934982299804
epoch: 113 training_loss 1.5435273241996765 test_loss: 1.6214527130126952
epoch: 114 training_loss 1.5438273966312408 test_loss: 1.5593144416809082
epoch: 115 training_loss 1.529483858346939 test_loss: 1.529219150543213
epoch: 116 training_loss 1.5182872498035431 test_loss: 1.5267892837524415
epoch: 117 training_loss 1.5423672020435333 test_loss: 1.4977445602416992
epoch: 118 training_loss 1.535662591457367 test_loss: 1.5638741493225097
epoch: 119 training_loss 1.532851643562317 test_loss: 1.5160579681396484
epoch: 120 training_loss 1.5213048148155213 test_loss: 1.5551031112670899
epoch: 121 training_loss 1.5200250899791719 test_loss: 1.5555136680603028
epoch: 122 training_loss 1.5005125403404236 test_loss: 1.4970845222473144
epoch: 123 training_loss 1.5079383516311646 test_loss: 1.5803714752197267
epoch: 124 training_loss 1.5269589161872863 test_loss: 1.5595364570617676
epoch: 125 training_loss 1.5013271272182465 test_loss: 1.4586037635803222
epoch: 126 training_loss 1.4918260204792022 test_loss: 1.50320405960083
epoch: 127 training_loss 1.4852360367774964 test_loss: 1.5140435218811035
epoch: 128 training_loss 1.4908715355396271 test_loss: 1.5054595947265625
epoch: 129 training_loss 1.4875248301029205 test_loss: 1.4740556716918944
epoch: 130 training_loss 1.4899619114398956 test_loss: 1.5451478004455566
epoch: 131 training_loss 1.4808630561828613 test_loss: 1.4497798919677733
epoch: 132 training_loss 1.4970777571201324 test_loss: 1.4963769912719727
epoch: 133 training_loss 1.4965764605998992 test_loss: 1.4765288352966308
epoch: 134 training_loss 1.4969383418560027 test_loss: 1.4957117080688476
epoch: 135 training_loss 1.4712410819530488 test_loss: 1.4600424766540527
epoch: 136 training_loss 1.4761307561397552 test_loss: 1.4702668190002441
epoch: 137 training_loss 1.482180312871933 test_loss: 1.4530359268188477
epoch: 138 training_loss 1.4852395570278167 test_loss: 1.4830853462219238
epoch: 139 training_loss 1.4758478105068207 test_loss: 1.4662240982055663
epoch: 140 training_loss 1.4674798727035523 test_loss: 1.4708446502685546
epoch: 141 training_loss 1.4807775354385375 test_loss: 1.4852100372314454
epoch: 142 training_loss 1.45651740193367 test_loss: 1.4312414169311523
epoch: 143 training_loss 1.4638272249698638 test_loss: 1.4242067337036133
epoch: 144 training_loss 1.4687516009807586 test_loss: 1.4541176795959472
epoch: 145 training_loss 1.4850050115585327 test_loss: 1.4250645637512207
epoch: 146 training_loss 1.44961052775383 test_loss: 1.4476045608520507
epoch: 147 training_loss 1.4460083472728729 test_loss: 1.4512051582336425
epoch: 148 training_loss 1.4617633080482484 test_loss: 1.416262149810791
epoch: 149 training_loss 1.4536916160583495 test_loss: 1.4280667304992676
5050.672539104284
episode: 0 training return: tensor(-32.0415, device='cuda:0')
episode: 1 training return: tensor(-49.4674, device='cuda:0')
episode: 2 training return: tensor(-74.4031, device='cuda:0')
episode: 3 training return: tensor(1.0332, device='cuda:0')
epoch: 1 test_true_pfm: 4933.095138173506 sim_pfm: -110.1023951016444
episode: 4 training return: tensor(-68.6019, device='cuda:0')
episode: 5 training return: tensor(-40.6120, device='cuda:0')
episode: 6 training return: tensor(-99.7504, device='cuda:0')
episode: 7 training return: tensor(-241.0585, device='cuda:0')
epoch: 2 test_true_pfm: 5153.467304582944 sim_pfm: -49.33918518648716
episode: 8 training return: tensor(-112.9993, device='cuda:0')
episode: 9 training return: tensor(-95.9160, device='cuda:0')
episode: 10 training return: tensor(-28.2913, device='cuda:0')
episode: 11 training return: tensor(-2.6635, device='cuda:0')
epoch: 3 test_true_pfm: 5170.7949153554055 sim_pfm: 41.75616019256025
episode: 12 training return: tensor(-47.4464, device='cuda:0')
episode: 13 training return: tensor(19.8678, device='cuda:0')
episode: 14 training return: tensor(7.3395, device='cuda:0')
episode: 15 training return: tensor(14.5670, device='cuda:0')
epoch: 4 test_true_pfm: 5106.578171323486 sim_pfm: -28.93529145580639
episode: 16 training return: tensor(-70.2277, device='cuda:0')
episode: 17 training return: tensor(-242.9499, device='cuda:0')
episode: 18 training return: tensor(17.9227, device='cuda:0')
episode: 19 training return: tensor(-63.1891, device='cuda:0')
epoch: 5 test_true_pfm: 5159.339415471621 sim_pfm: 51.157904794614296
episode: 20 training return: tensor(-52.5140, device='cuda:0')
episode: 21 training return: tensor(-54.1719, device='cuda:0')
episode: 22 training return: tensor(37.7657, device='cuda:0')
episode: 23 training return: tensor(-124.7875, device='cuda:0')
epoch: 6 test_true_pfm: 5275.0638404614065 sim_pfm: -5.449657248313694
episode: 24 training return: tensor(-32.0180, device='cuda:0')
episode: 25 training return: tensor(-64.5773, device='cuda:0')
episode: 26 training return: tensor(-249.8929, device='cuda:0')
episode: 27 training return: tensor(-84.9322, device='cuda:0')
epoch: 7 test_true_pfm: 5104.372776782886 sim_pfm: -4.021141193351165
episode: 28 training return: tensor(-20.3581, device='cuda:0')
episode: 29 training return: tensor(-54.4131, device='cuda:0')
episode: 30 training return: tensor(35.9933, device='cuda:0')
episode: 31 training return: tensor(7.3267, device='cuda:0')
epoch: 8 test_true_pfm: 5184.116409539728 sim_pfm: -7.258135703702767
episode: 32 training return: tensor(-29.2639, device='cuda:0')
episode: 33 training return: tensor(-284.7146, device='cuda:0')
episode: 34 training return: tensor(-4.8844, device='cuda:0')
episode: 35 training return: tensor(-44.5234, device='cuda:0')
epoch: 9 test_true_pfm: 5185.948696746335 sim_pfm: 14.628195254510501
episode: 36 training return: tensor(62.6133, device='cuda:0')
episode: 37 training return: tensor(-182.2030, device='cuda:0')
episode: 38 training return: tensor(25.0980, device='cuda:0')
episode: 39 training return: tensor(-82.0784, device='cuda:0')
epoch: 10 test_true_pfm: 5246.539647032551 sim_pfm: 76.8294010637134
episode: 40 training return: tensor(-44.9473, device='cuda:0')
episode: 41 training return: tensor(-144.5847, device='cuda:0')
episode: 42 training return: tensor(-11.5818, device='cuda:0')
episode: 43 training return: tensor(30.4526, device='cuda:0')
epoch: 11 test_true_pfm: 5164.679554088314 sim_pfm: 85.52640649673413
episode: 44 training return: tensor(-12.0002, device='cuda:0')
episode: 45 training return: tensor(60.6613, device='cuda:0')
episode: 46 training return: tensor(-56.3966, device='cuda:0')
episode: 47 training return: tensor(-105.3000, device='cuda:0')
epoch: 12 test_true_pfm: 5215.16641639495 sim_pfm: -3.49462558189407
episode: 48 training return: tensor(-125.1433, device='cuda:0')
episode: 49 training return: tensor(17.5659, device='cuda:0')
episode: 50 training return: tensor(5.1990, device='cuda:0')
episode: 51 training return: tensor(-145.3030, device='cuda:0')
epoch: 13 test_true_pfm: 5265.314470344713 sim_pfm: 89.21517499145314
episode: 52 training return: tensor(-67.4036, device='cuda:0')
episode: 53 training return: tensor(-2.5988, device='cuda:0')
episode: 54 training return: tensor(-34.3414, device='cuda:0')
episode: 55 training return: tensor(-7.8474, device='cuda:0')
epoch: 14 test_true_pfm: 5208.294363880961 sim_pfm: 92.66500363683251
episode: 56 training return: tensor(164.2432, device='cuda:0')
episode: 57 training return: tensor(-143.1862, device='cuda:0')
episode: 58 training return: tensor(-72.3820, device='cuda:0')
episode: 59 training return: tensor(38.7448, device='cuda:0')
epoch: 15 test_true_pfm: 5212.473311823715 sim_pfm: 7.159838116533744
episode: 60 training return: tensor(-29.1664, device='cuda:0')
episode: 61 training return: tensor(61.6733, device='cuda:0')
episode: 62 training return: tensor(-12.0051, device='cuda:0')
episode: 63 training return: tensor(-180.2641, device='cuda:0')
epoch: 16 test_true_pfm: 5357.1553531558675 sim_pfm: 87.3799771956401
episode: 64 training return: tensor(-29.0646, device='cuda:0')
episode: 65 training return: tensor(-100.5607, device='cuda:0')
episode: 66 training return: tensor(10.0761, device='cuda:0')
episode: 67 training return: tensor(-20.9866, device='cuda:0')
epoch: 17 test_true_pfm: 5297.629885828631 sim_pfm: 83.32065585546661
episode: 68 training return: tensor(-73.3419, device='cuda:0')
episode: 69 training return: tensor(-10.7382, device='cuda:0')
episode: 70 training return: tensor(-19.1334, device='cuda:0')
episode: 71 training return: tensor(-122.6975, device='cuda:0')
epoch: 18 test_true_pfm: 5228.86711972897 sim_pfm: 77.86763681886562
episode: 72 training return: tensor(-26.3212, device='cuda:0')
episode: 73 training return: tensor(-25.9677, device='cuda:0')
episode: 74 training return: tensor(-70.0754, device='cuda:0')
episode: 75 training return: tensor(41.2029, device='cuda:0')
epoch: 19 test_true_pfm: 5289.2540298166 sim_pfm: 70.92394252061301
episode: 76 training return: tensor(-24.0574, device='cuda:0')
episode: 77 training return: tensor(100.5908, device='cuda:0')
episode: 78 training return: tensor(67.0419, device='cuda:0')
episode: 79 training return: tensor(23.7408, device='cuda:0')
epoch: 20 test_true_pfm: 5354.697358042956 sim_pfm: 85.98493271710079
episode: 80 training return: tensor(64.7634, device='cuda:0')
episode: 81 training return: tensor(-43.5092, device='cuda:0')
episode: 82 training return: tensor(8.7109, device='cuda:0')
episode: 83 training return: tensor(47.5472, device='cuda:0')
epoch: 21 test_true_pfm: 5333.854650767119 sim_pfm: 125.41983276214644
episode: 84 training return: tensor(75.6017, device='cuda:0')
episode: 85 training return: tensor(-76.1879, device='cuda:0')
episode: 86 training return: tensor(-6.2092, device='cuda:0')
episode: 87 training return: tensor(111.2479, device='cuda:0')
epoch: 22 test_true_pfm: 5288.579336105045 sim_pfm: 140.6564690199642
episode: 88 training return: tensor(-8.6006, device='cuda:0')
episode: 89 training return: tensor(71.4498, device='cuda:0')
episode: 90 training return: tensor(76.1910, device='cuda:0')
episode: 91 training return: tensor(48.3452, device='cuda:0')
epoch: 23 test_true_pfm: 5279.225065798719 sim_pfm: 115.84889581752941
episode: 92 training return: tensor(87.4127, device='cuda:0')
episode: 93 training return: tensor(16.7728, device='cuda:0')
episode: 94 training return: tensor(2.8916, device='cuda:0')
episode: 95 training return: tensor(-23.7651, device='cuda:0')
epoch: 24 test_true_pfm: 5352.105072470146 sim_pfm: 146.0007939726735
episode: 96 training return: tensor(-103.4037, device='cuda:0')
episode: 97 training return: tensor(128.7272, device='cuda:0')
episode: 98 training return: tensor(-21.8122, device='cuda:0')
episode: 99 training return: tensor(28.0810, device='cuda:0')
epoch: 25 test_true_pfm: 5383.968056194226 sim_pfm: 44.30777903456086
episode: 100 training return: tensor(49.5716, device='cuda:0')
episode: 101 training return: tensor(81.6858, device='cuda:0')
episode: 102 training return: tensor(25.9068, device='cuda:0')
episode: 103 training return: tensor(-1.4049, device='cuda:0')
epoch: 26 test_true_pfm: 5247.906179556271 sim_pfm: 38.439033193300325
episode: 104 training return: tensor(29.8610, device='cuda:0')
episode: 105 training return: tensor(103.6682, device='cuda:0')
episode: 106 training return: tensor(84.0966, device='cuda:0')
episode: 107 training return: tensor(-115.1978, device='cuda:0')
epoch: 27 test_true_pfm: 5203.126949423971 sim_pfm: 154.55747806009217
episode: 108 training return: tensor(57.1655, device='cuda:0')
episode: 109 training return: tensor(103.7432, device='cuda:0')
episode: 110 training return: tensor(117.6279, device='cuda:0')
episode: 111 training return: tensor(11.5276, device='cuda:0')
epoch: 28 test_true_pfm: 5389.044585502434 sim_pfm: 187.56725007181134
episode: 112 training return: tensor(44.0574, device='cuda:0')
episode: 113 training return: tensor(108.2455, device='cuda:0')
episode: 114 training return: tensor(77.3014, device='cuda:0')
episode: 115 training return: tensor(34.9994, device='cuda:0')
epoch: 29 test_true_pfm: 5331.97408421775 sim_pfm: 148.9367603516827
episode: 116 training return: tensor(-7.7658, device='cuda:0')
episode: 117 training return: tensor(131.7910, device='cuda:0')
episode: 118 training return: tensor(19.7833, device='cuda:0')
episode: 119 training return: tensor(-82.3038, device='cuda:0')
epoch: 30 test_true_pfm: 5362.133475571431 sim_pfm: 204.1571666628782
episode: 120 training return: tensor(-29.5163, device='cuda:0')
episode: 121 training return: tensor(141.0042, device='cuda:0')
episode: 122 training return: tensor(87.5117, device='cuda:0')
episode: 123 training return: tensor(192.2339, device='cuda:0')
epoch: 31 test_true_pfm: 5295.1011850592 sim_pfm: 150.9120449660113
episode: 124 training return: tensor(-36.5853, device='cuda:0')
episode: 125 training return: tensor(79.4951, device='cuda:0')
episode: 126 training return: tensor(93.2252, device='cuda:0')
episode: 127 training return: tensor(-98.3484, device='cuda:0')
epoch: 32 test_true_pfm: 5337.215008199543 sim_pfm: 142.5445121753049
episode: 128 training return: tensor(177.9804, device='cuda:0')
episode: 129 training return: tensor(-47.6370, device='cuda:0')
episode: 130 training return: tensor(141.1807, device='cuda:0')
episode: 131 training return: tensor(102.0815, device='cuda:0')
epoch: 33 test_true_pfm: 5454.390300798593 sim_pfm: 96.61870267077272
episode: 132 training return: tensor(2.4411, device='cuda:0')
episode: 133 training return: tensor(-36.8062, device='cuda:0')
episode: 134 training return: tensor(57.8223, device='cuda:0')
episode: 135 training return: tensor(73.6556, device='cuda:0')
epoch: 34 test_true_pfm: 5339.101411745202 sim_pfm: 184.25816755768997
episode: 136 training return: tensor(49.1579, device='cuda:0')
episode: 137 training return: tensor(83.6076, device='cuda:0')
episode: 138 training return: tensor(99.2179, device='cuda:0')
episode: 139 training return: tensor(118.3380, device='cuda:0')
epoch: 35 test_true_pfm: 5349.308711224897 sim_pfm: 214.43968715657442
episode: 140 training return: tensor(131.8241, device='cuda:0')
episode: 141 training return: tensor(-11.8311, device='cuda:0')
episode: 142 training return: tensor(2.2506, device='cuda:0')
episode: 143 training return: tensor(149.2445, device='cuda:0')
epoch: 36 test_true_pfm: 5446.82210543819 sim_pfm: 175.31032654216202
episode: 144 training return: tensor(98.2992, device='cuda:0')
episode: 145 training return: tensor(176.9303, device='cuda:0')
episode: 146 training return: tensor(186.8194, device='cuda:0')
episode: 147 training return: tensor(209.1310, device='cuda:0')
epoch: 37 test_true_pfm: 5421.332389812493 sim_pfm: 176.29517039027027
episode: 148 training return: tensor(-6.9095, device='cuda:0')
episode: 149 training return: tensor(49.4130, device='cuda:0')
episode: 150 training return: tensor(178.6183, device='cuda:0')
episode: 151 training return: tensor(135.4411, device='cuda:0')
epoch: 38 test_true_pfm: 5408.325171981036 sim_pfm: 140.34019892845149
episode: 152 training return: tensor(209.5324, device='cuda:0')
episode: 153 training return: tensor(203.2798, device='cuda:0')
episode: 154 training return: tensor(131.1245, device='cuda:0')
episode: 155 training return: tensor(53.3405, device='cuda:0')
epoch: 39 test_true_pfm: 5373.41400567998 sim_pfm: 203.97079000352338
episode: 156 training return: tensor(17.9947, device='cuda:0')
episode: 157 training return: tensor(-39.0012, device='cuda:0')
episode: 158 training return: tensor(168.7010, device='cuda:0')
episode: 159 training return: tensor(-15.3158, device='cuda:0')
epoch: 40 test_true_pfm: 5446.35238444462 sim_pfm: 170.49387842797054
episode: 160 training return: tensor(-15.2226, device='cuda:0')
episode: 161 training return: tensor(127.9598, device='cuda:0')
episode: 162 training return: tensor(176.9339, device='cuda:0')
episode: 163 training return: tensor(111.5254, device='cuda:0')
epoch: 41 test_true_pfm: 5333.002349011153 sim_pfm: 153.92671092176656
episode: 164 training return: tensor(162.5596, device='cuda:0')
episode: 165 training return: tensor(32.7101, device='cuda:0')
episode: 166 training return: tensor(12.8242, device='cuda:0')
episode: 167 training return: tensor(154.6149, device='cuda:0')
epoch: 42 test_true_pfm: 5430.916686949258 sim_pfm: 221.5127074253202
episode: 168 training return: tensor(168.2890, device='cuda:0')
episode: 169 training return: tensor(92.4705, device='cuda:0')
episode: 170 training return: tensor(238.8361, device='cuda:0')
episode: 171 training return: tensor(80.2949, device='cuda:0')
epoch: 43 test_true_pfm: 5454.858489587007 sim_pfm: 219.27634441006617
episode: 172 training return: tensor(233.1194, device='cuda:0')
episode: 173 training return: tensor(237.2348, device='cuda:0')
episode: 174 training return: tensor(15.0440, device='cuda:0')
episode: 175 training return: tensor(137.0986, device='cuda:0')
epoch: 44 test_true_pfm: 5355.8618445399 sim_pfm: 215.6226044911697
episode: 176 training return: tensor(220.2310, device='cuda:0')
episode: 177 training return: tensor(61.8563, device='cuda:0')
episode: 178 training return: tensor(105.3302, device='cuda:0')
episode: 179 training return: tensor(40.0909, device='cuda:0')
epoch: 45 test_true_pfm: 5452.488276547973 sim_pfm: 193.40720069625726
episode: 180 training return: tensor(178.5099, device='cuda:0')
episode: 181 training return: tensor(27.1731, device='cuda:0')
episode: 182 training return: tensor(186.9702, device='cuda:0')
episode: 183 training return: tensor(-1.9939, device='cuda:0')
epoch: 46 test_true_pfm: 5473.537558557423 sim_pfm: 196.1294093382894
episode: 184 training return: tensor(178.4305, device='cuda:0')
episode: 185 training return: tensor(141.6895, device='cuda:0')
episode: 186 training return: tensor(142.4794, device='cuda:0')
episode: 187 training return: tensor(125.3699, device='cuda:0')
epoch: 47 test_true_pfm: 5417.120660397171 sim_pfm: 232.30080751500404
episode: 188 training return: tensor(181.1190, device='cuda:0')
episode: 189 training return: tensor(19.2815, device='cuda:0')
episode: 190 training return: tensor(158.3344, device='cuda:0')
episode: 191 training return: tensor(35.7969, device='cuda:0')
epoch: 48 test_true_pfm: 5378.907480405628 sim_pfm: 174.13617285288638
episode: 192 training return: tensor(54.2551, device='cuda:0')
episode: 193 training return: tensor(35.7985, device='cuda:0')
episode: 194 training return: tensor(187.5502, device='cuda:0')
episode: 195 training return: tensor(143.7050, device='cuda:0')
epoch: 49 test_true_pfm: 5424.371804743361 sim_pfm: 254.01086347720897
episode: 196 training return: tensor(86.5748, device='cuda:0')
episode: 197 training return: tensor(200.4083, device='cuda:0')
episode: 198 training return: tensor(115.3942, device='cuda:0')
episode: 199 training return: tensor(170.1757, device='cuda:0')
epoch: 50 test_true_pfm: 5406.507848359938 sim_pfm: 154.31032241053376
episode: 200 training return: tensor(134.3346, device='cuda:0')
episode: 201 training return: tensor(71.6136, device='cuda:0')
episode: 202 training return: tensor(182.3240, device='cuda:0')
episode: 203 training return: tensor(108.6303, device='cuda:0')
epoch: 51 test_true_pfm: 5481.3471887017995 sim_pfm: 245.82434930772675
episode: 204 training return: tensor(90.9795, device='cuda:0')
episode: 205 training return: tensor(249.7528, device='cuda:0')
episode: 206 training return: tensor(-12.3922, device='cuda:0')
episode: 207 training return: tensor(138.5975, device='cuda:0')
epoch: 52 test_true_pfm: 5509.318123826165 sim_pfm: 226.39845730971624
episode: 208 training return: tensor(142.3719, device='cuda:0')
episode: 209 training return: tensor(231.3349, device='cuda:0')
episode: 210 training return: tensor(123.4177, device='cuda:0')
episode: 211 training return: tensor(27.4568, device='cuda:0')
epoch: 53 test_true_pfm: 5522.629057422816 sim_pfm: 214.74918370816158
episode: 212 training return: tensor(39.7136, device='cuda:0')
episode: 213 training return: tensor(172.3425, device='cuda:0')
episode: 214 training return: tensor(154.2986, device='cuda:0')
episode: 215 training return: tensor(97.9788, device='cuda:0')
epoch: 54 test_true_pfm: 5494.567722893448 sim_pfm: 248.09716302201073
episode: 216 training return: tensor(71.5774, device='cuda:0')
episode: 217 training return: tensor(114.6804, device='cuda:0')
episode: 218 training return: tensor(31.4556, device='cuda:0')
episode: 219 training return: tensor(94.0673, device='cuda:0')
epoch: 55 test_true_pfm: 5316.872697830076 sim_pfm: 293.2628298208583
episode: 220 training return: tensor(177.8118, device='cuda:0')
episode: 221 training return: tensor(235.3030, device='cuda:0')
episode: 222 training return: tensor(179.8826, device='cuda:0')
episode: 223 training return: tensor(119.7927, device='cuda:0')
epoch: 56 test_true_pfm: 5421.141241744995 sim_pfm: 285.9718611331191
episode: 224 training return: tensor(200.5522, device='cuda:0')
episode: 225 training return: tensor(181.0715, device='cuda:0')
episode: 226 training return: tensor(34.0384, device='cuda:0')
episode: 227 training return: tensor(3.7771, device='cuda:0')
epoch: 57 test_true_pfm: 5530.358850472335 sim_pfm: 224.82273457199335
episode: 228 training return: tensor(164.6858, device='cuda:0')
episode: 229 training return: tensor(192.2764, device='cuda:0')
episode: 230 training return: tensor(188.6700, device='cuda:0')
episode: 231 training return: tensor(-14.1943, device='cuda:0')
epoch: 58 test_true_pfm: 5576.941204478717 sim_pfm: 284.31095526815625
episode: 232 training return: tensor(181.9798, device='cuda:0')
episode: 233 training return: tensor(138.7849, device='cuda:0')
episode: 234 training return: tensor(219.2829, device='cuda:0')
episode: 235 training return: tensor(22.0081, device='cuda:0')
epoch: 59 test_true_pfm: 5426.347752791745 sim_pfm: 276.29279461257585
episode: 236 training return: tensor(229.8292, device='cuda:0')
episode: 237 training return: tensor(74.0307, device='cuda:0')
episode: 238 training return: tensor(159.4269, device='cuda:0')
episode: 239 training return: tensor(257.2361, device='cuda:0')
epoch: 60 test_true_pfm: 5434.922933472783 sim_pfm: 261.75076110177906
episode: 240 training return: tensor(156.0082, device='cuda:0')
episode: 241 training return: tensor(92.5901, device='cuda:0')
episode: 242 training return: tensor(89.3596, device='cuda:0')
episode: 243 training return: tensor(261.7606, device='cuda:0')
epoch: 61 test_true_pfm: 5532.153206132106 sim_pfm: 305.832013289677
episode: 244 training return: tensor(219.0176, device='cuda:0')
episode: 245 training return: tensor(185.1121, device='cuda:0')
episode: 246 training return: tensor(208.5802, device='cuda:0')
episode: 247 training return: tensor(176.9144, device='cuda:0')
epoch: 62 test_true_pfm: 5486.053803463675 sim_pfm: 209.26379093019446
episode: 248 training return: tensor(196.2837, device='cuda:0')
episode: 249 training return: tensor(238.3408, device='cuda:0')
episode: 250 training return: tensor(129.5427, device='cuda:0')
episode: 251 training return: tensor(248.6341, device='cuda:0')
epoch: 63 test_true_pfm: 5554.594840495664 sim_pfm: 180.89246774674393
episode: 252 training return: tensor(162.7554, device='cuda:0')
episode: 253 training return: tensor(15.7577, device='cuda:0')
episode: 254 training return: tensor(131.8683, device='cuda:0')
episode: 255 training return: tensor(73.9244, device='cuda:0')
epoch: 64 test_true_pfm: 5511.666618706705 sim_pfm: 240.4917427698189
episode: 256 training return: tensor(141.0290, device='cuda:0')
episode: 257 training return: tensor(147.9012, device='cuda:0')
episode: 258 training return: tensor(123.2681, device='cuda:0')
episode: 259 training return: tensor(172.1818, device='cuda:0')
epoch: 65 test_true_pfm: 5524.758631347152 sim_pfm: 294.1459561681938
episode: 260 training return: tensor(238.6779, device='cuda:0')
episode: 261 training return: tensor(291.6779, device='cuda:0')
episode: 262 training return: tensor(236.5358, device='cuda:0')
episode: 263 training return: tensor(250.0096, device='cuda:0')
epoch: 66 test_true_pfm: 5496.642191339407 sim_pfm: 256.3432065916325
episode: 264 training return: tensor(192.2832, device='cuda:0')
episode: 265 training return: tensor(189.3316, device='cuda:0')
episode: 266 training return: tensor(75.9614, device='cuda:0')
episode: 267 training return: tensor(110.2151, device='cuda:0')
epoch: 67 test_true_pfm: 5470.698432874034 sim_pfm: 270.004565299605
episode: 268 training return: tensor(104.3704, device='cuda:0')
episode: 269 training return: tensor(206.4777, device='cuda:0')
episode: 270 training return: tensor(231.3447, device='cuda:0')
episode: 271 training return: tensor(273.4792, device='cuda:0')
epoch: 68 test_true_pfm: 5543.456480191362 sim_pfm: 277.9121233081096
episode: 272 training return: tensor(214.0780, device='cuda:0')
episode: 273 training return: tensor(176.3379, device='cuda:0')
episode: 274 training return: tensor(241.2854, device='cuda:0')
episode: 275 training return: tensor(119.5646, device='cuda:0')
epoch: 69 test_true_pfm: 5522.746577922928 sim_pfm: 289.91265012761386
episode: 276 training return: tensor(128.7310, device='cuda:0')
episode: 277 training return: tensor(197.0192, device='cuda:0')
episode: 278 training return: tensor(154.3837, device='cuda:0')
episode: 279 training return: tensor(206.7408, device='cuda:0')
epoch: 70 test_true_pfm: 5546.1878661939745 sim_pfm: 317.2830678135312
episode: 280 training return: tensor(238.4220, device='cuda:0')
episode: 281 training return: tensor(226.3374, device='cuda:0')
episode: 282 training return: tensor(126.2708, device='cuda:0')
episode: 283 training return: tensor(184.7794, device='cuda:0')
epoch: 71 test_true_pfm: 5540.978927389452 sim_pfm: 204.3724089577057
episode: 284 training return: tensor(259.9582, device='cuda:0')
episode: 285 training return: tensor(156.5369, device='cuda:0')
episode: 286 training return: tensor(248.4680, device='cuda:0')
episode: 287 training return: tensor(166.5717, device='cuda:0')
epoch: 72 test_true_pfm: 5402.335839590041 sim_pfm: 244.7832726139071
episode: 288 training return: tensor(60.3140, device='cuda:0')
episode: 289 training return: tensor(274.5653, device='cuda:0')
episode: 290 training return: tensor(222.1276, device='cuda:0')
episode: 291 training return: tensor(161.6108, device='cuda:0')
epoch: 73 test_true_pfm: 5541.163952383068 sim_pfm: 346.5916714409638
episode: 292 training return: tensor(131.9557, device='cuda:0')
episode: 293 training return: tensor(144.7668, device='cuda:0')
episode: 294 training return: tensor(137.3383, device='cuda:0')
episode: 295 training return: tensor(89.4737, device='cuda:0')
epoch: 74 test_true_pfm: 5548.97249406225 sim_pfm: 263.25220323852653
episode: 296 training return: tensor(254.8836, device='cuda:0')
episode: 297 training return: tensor(110.1696, device='cuda:0')
episode: 298 training return: tensor(93.9090, device='cuda:0')
episode: 299 training return: tensor(181.5724, device='cuda:0')
epoch: 75 test_true_pfm: 5604.037447321881 sim_pfm: 320.69844085348694
episode: 300 training return: tensor(119.3901, device='cuda:0')
episode: 301 training return: tensor(178.6231, device='cuda:0')
episode: 302 training return: tensor(129.3025, device='cuda:0')
episode: 303 training return: tensor(178.6735, device='cuda:0')
epoch: 76 test_true_pfm: 5555.448747949536 sim_pfm: 219.05815864738543
episode: 304 training return: tensor(176.0183, device='cuda:0')
episode: 305 training return: tensor(157.7534, device='cuda:0')
episode: 306 training return: tensor(159.7821, device='cuda:0')
episode: 307 training return: tensor(189.8107, device='cuda:0')
epoch: 77 test_true_pfm: 5581.7354167810545 sim_pfm: 255.17652695748257
episode: 308 training return: tensor(186.3686, device='cuda:0')
episode: 309 training return: tensor(302.2267, device='cuda:0')
episode: 310 training return: tensor(179.3048, device='cuda:0')
episode: 311 training return: tensor(26.0410, device='cuda:0')
epoch: 78 test_true_pfm: 5637.46989308428 sim_pfm: 265.3753526866203
episode: 312 training return: tensor(189.3427, device='cuda:0')
episode: 313 training return: tensor(189.3581, device='cuda:0')
episode: 314 training return: tensor(170.2269, device='cuda:0')
episode: 315 training return: tensor(124.7924, device='cuda:0')
epoch: 79 test_true_pfm: 5546.179530923316 sim_pfm: 255.60728692038296
episode: 316 training return: tensor(216.2921, device='cuda:0')
episode: 317 training return: tensor(208.7357, device='cuda:0')
episode: 318 training return: tensor(134.5331, device='cuda:0')
episode: 319 training return: tensor(158.9947, device='cuda:0')
epoch: 80 test_true_pfm: 5607.169576388892 sim_pfm: 292.0222144849249
episode: 320 training return: tensor(244.9433, device='cuda:0')
episode: 321 training return: tensor(228.4740, device='cuda:0')
episode: 322 training return: tensor(239.7752, device='cuda:0')
episode: 323 training return: tensor(301.3866, device='cuda:0')
epoch: 81 test_true_pfm: 5592.4213981010735 sim_pfm: 297.3749462238047
episode: 324 training return: tensor(118.0497, device='cuda:0')
episode: 325 training return: tensor(208.0894, device='cuda:0')
episode: 326 training return: tensor(282.6539, device='cuda:0')
episode: 327 training return: tensor(290.0632, device='cuda:0')
epoch: 82 test_true_pfm: 5633.0721548481915 sim_pfm: 344.85710460763465
episode: 328 training return: tensor(256.8932, device='cuda:0')
episode: 329 training return: tensor(99.9568, device='cuda:0')
episode: 330 training return: tensor(140.6137, device='cuda:0')
episode: 331 training return: tensor(206.6797, device='cuda:0')
epoch: 83 test_true_pfm: 5527.171390575077 sim_pfm: 270.1366096122462
episode: 332 training return: tensor(152.7895, device='cuda:0')
episode: 333 training return: tensor(111.2075, device='cuda:0')
episode: 334 training return: tensor(75.1380, device='cuda:0')
episode: 335 training return: tensor(139.3521, device='cuda:0')
epoch: 84 test_true_pfm: 5521.4149944185165 sim_pfm: 233.2018800919468
episode: 336 training return: tensor(188.9305, device='cuda:0')
episode: 337 training return: tensor(167.8037, device='cuda:0')
episode: 338 training return: tensor(246.9763, device='cuda:0')
episode: 339 training return: tensor(169.9340, device='cuda:0')
epoch: 85 test_true_pfm: 5545.197668866061 sim_pfm: 304.32329055045074
episode: 340 training return: tensor(166.1342, device='cuda:0')
episode: 341 training return: tensor(290.6211, device='cuda:0')
episode: 342 training return: tensor(168.7319, device='cuda:0')
episode: 343 training return: tensor(127.6541, device='cuda:0')
epoch: 86 test_true_pfm: 5499.303892826944 sim_pfm: 317.35407679815154
episode: 344 training return: tensor(103.5229, device='cuda:0')
episode: 345 training return: tensor(250.7021, device='cuda:0')
episode: 346 training return: tensor(157.3590, device='cuda:0')
episode: 347 training return: tensor(181.4474, device='cuda:0')
epoch: 87 test_true_pfm: 5640.963573075015 sim_pfm: 302.58045069268945
episode: 348 training return: tensor(133.0705, device='cuda:0')
episode: 349 training return: tensor(180.9649, device='cuda:0')
episode: 350 training return: tensor(154.0586, device='cuda:0')
episode: 351 training return: tensor(243.0665, device='cuda:0')
epoch: 88 test_true_pfm: 5536.709764857267 sim_pfm: 239.4922396931021
episode: 352 training return: tensor(245.2633, device='cuda:0')
episode: 353 training return: tensor(253.9443, device='cuda:0')
episode: 354 training return: tensor(119.7913, device='cuda:0')
episode: 355 training return: tensor(200.8005, device='cuda:0')
epoch: 89 test_true_pfm: 5589.14780755212 sim_pfm: 325.50683847386
episode: 356 training return: tensor(101.2754, device='cuda:0')
episode: 357 training return: tensor(149.5379, device='cuda:0')
episode: 358 training return: tensor(196.3678, device='cuda:0')
episode: 359 training return: tensor(20.8763, device='cuda:0')
epoch: 90 test_true_pfm: 5597.988830651552 sim_pfm: 336.73045558063313
episode: 360 training return: tensor(214.5528, device='cuda:0')
episode: 361 training return: tensor(169.7434, device='cuda:0')
episode: 362 training return: tensor(78.5229, device='cuda:0')
episode: 363 training return: tensor(185.1395, device='cuda:0')
epoch: 91 test_true_pfm: 5601.08972957173 sim_pfm: 281.0705932598018
episode: 364 training return: tensor(288.5471, device='cuda:0')
episode: 365 training return: tensor(251.2797, device='cuda:0')
episode: 366 training return: tensor(232.6410, device='cuda:0')
episode: 367 training return: tensor(305.1879, device='cuda:0')
epoch: 92 test_true_pfm: 5641.469397539841 sim_pfm: 317.01350673808093
episode: 368 training return: tensor(309.3947, device='cuda:0')
episode: 369 training return: tensor(96.6309, device='cuda:0')
episode: 370 training return: tensor(78.1046, device='cuda:0')
episode: 371 training return: tensor(214.5968, device='cuda:0')
epoch: 93 test_true_pfm: 5564.773793240519 sim_pfm: 378.39422208306496
episode: 372 training return: tensor(156.4900, device='cuda:0')
episode: 373 training return: tensor(214.5751, device='cuda:0')
episode: 374 training return: tensor(136.2783, device='cuda:0')
episode: 375 training return: tensor(166.0839, device='cuda:0')
epoch: 94 test_true_pfm: 5631.738010486969 sim_pfm: 298.8621376128867
episode: 376 training return: tensor(169.4935, device='cuda:0')
episode: 377 training return: tensor(144.3572, device='cuda:0')
episode: 378 training return: tensor(186.4857, device='cuda:0')
episode: 379 training return: tensor(205.7322, device='cuda:0')
epoch: 95 test_true_pfm: 5553.856431839264 sim_pfm: 269.8477909263456
episode: 380 training return: tensor(252.2061, device='cuda:0')
episode: 381 training return: tensor(202.1457, device='cuda:0')
episode: 382 training return: tensor(246.8214, device='cuda:0')
episode: 383 training return: tensor(176.4572, device='cuda:0')
epoch: 96 test_true_pfm: 5578.770586840227 sim_pfm: 277.6732926607365
episode: 384 training return: tensor(226.8974, device='cuda:0')
episode: 385 training return: tensor(278.1417, device='cuda:0')
episode: 386 training return: tensor(171.8956, device='cuda:0')
episode: 387 training return: tensor(121.6056, device='cuda:0')
epoch: 97 test_true_pfm: 5569.619765473697 sim_pfm: 410.3808666538777
episode: 388 training return: tensor(45.3369, device='cuda:0')
episode: 389 training return: tensor(236.9039, device='cuda:0')
episode: 390 training return: tensor(190.7153, device='cuda:0')
episode: 391 training return: tensor(108.7505, device='cuda:0')
epoch: 98 test_true_pfm: 5637.683982189716 sim_pfm: 279.8252551545932
episode: 392 training return: tensor(216.1698, device='cuda:0')
episode: 393 training return: tensor(231.0761, device='cuda:0')
episode: 394 training return: tensor(112.2409, device='cuda:0')
episode: 395 training return: tensor(145.0284, device='cuda:0')
epoch: 99 test_true_pfm: 5544.395398464102 sim_pfm: 315.8138224608653
episode: 396 training return: tensor(231.4970, device='cuda:0')
episode: 397 training return: tensor(309.6127, device='cuda:0')
episode: 398 training return: tensor(146.6854, device='cuda:0')
episode: 399 training return: tensor(239.1686, device='cuda:0')
epoch: 100 test_true_pfm: 5561.583619675211 sim_pfm: 347.6234742515565
episode: 400 training return: tensor(261.8757, device='cuda:0')
episode: 401 training return: tensor(288.1935, device='cuda:0')
episode: 402 training return: tensor(33.2972, device='cuda:0')
episode: 403 training return: tensor(116.3669, device='cuda:0')
epoch: 101 test_true_pfm: 5513.556765454455 sim_pfm: 276.150210100847
episode: 404 training return: tensor(297.1934, device='cuda:0')
episode: 405 training return: tensor(182.4394, device='cuda:0')
episode: 406 training return: tensor(242.7451, device='cuda:0')
episode: 407 training return: tensor(215.6252, device='cuda:0')
epoch: 102 test_true_pfm: 5607.057370033438 sim_pfm: 373.2752959480761
episode: 408 training return: tensor(180.8108, device='cuda:0')
episode: 409 training return: tensor(74.7494, device='cuda:0')
episode: 410 training return: tensor(165.5962, device='cuda:0')
episode: 411 training return: tensor(213.3916, device='cuda:0')
epoch: 103 test_true_pfm: 5627.685081612261 sim_pfm: 329.6246536393107
episode: 412 training return: tensor(227.6563, device='cuda:0')
episode: 413 training return: tensor(271.2935, device='cuda:0')
episode: 414 training return: tensor(120.4686, device='cuda:0')
episode: 415 training return: tensor(181.7701, device='cuda:0')
epoch: 104 test_true_pfm: 5646.93225839322 sim_pfm: 363.4454070092955
episode: 416 training return: tensor(75.4329, device='cuda:0')
episode: 417 training return: tensor(223.3197, device='cuda:0')
episode: 418 training return: tensor(153.3490, device='cuda:0')
episode: 419 training return: tensor(203.8198, device='cuda:0')
epoch: 105 test_true_pfm: 5598.416045600438 sim_pfm: 344.549123182854
episode: 420 training return: tensor(61.5230, device='cuda:0')
episode: 421 training return: tensor(301.7272, device='cuda:0')
episode: 422 training return: tensor(171.8951, device='cuda:0')
episode: 423 training return: tensor(296.5439, device='cuda:0')
epoch: 106 test_true_pfm: 5545.792421846454 sim_pfm: 335.56634838308673
episode: 424 training return: tensor(340.8235, device='cuda:0')
episode: 425 training return: tensor(192.8269, device='cuda:0')
episode: 426 training return: tensor(205.1448, device='cuda:0')
episode: 427 training return: tensor(241.0538, device='cuda:0')
epoch: 107 test_true_pfm: 5694.76315184425 sim_pfm: 401.9911000611998
episode: 428 training return: tensor(264.2030, device='cuda:0')
episode: 429 training return: tensor(284.7359, device='cuda:0')
episode: 430 training return: tensor(227.2070, device='cuda:0')
episode: 431 training return: tensor(173.3598, device='cuda:0')
epoch: 108 test_true_pfm: 5712.972001701609 sim_pfm: 292.21048659905017
episode: 432 training return: tensor(239.5482, device='cuda:0')
episode: 433 training return: tensor(271.1208, device='cuda:0')
episode: 434 training return: tensor(189.1755, device='cuda:0')
episode: 435 training return: tensor(234.6550, device='cuda:0')
epoch: 109 test_true_pfm: 5559.9692945416855 sim_pfm: 343.8075623084248
episode: 436 training return: tensor(291.9941, device='cuda:0')
episode: 437 training return: tensor(232.1989, device='cuda:0')
episode: 438 training return: tensor(58.6208, device='cuda:0')
episode: 439 training return: tensor(180.5121, device='cuda:0')
epoch: 110 test_true_pfm: 5585.240612495159 sim_pfm: 291.88043861462694
episode: 440 training return: tensor(210.4633, device='cuda:0')
episode: 441 training return: tensor(308.6487, device='cuda:0')
episode: 442 training return: tensor(123.8620, device='cuda:0')
episode: 443 training return: tensor(312.4973, device='cuda:0')
epoch: 111 test_true_pfm: 5713.0385554768 sim_pfm: 271.61957521694904
episode: 444 training return: tensor(183.7821, device='cuda:0')
episode: 445 training return: tensor(262.1820, device='cuda:0')
episode: 446 training return: tensor(297.2500, device='cuda:0')
episode: 447 training return: tensor(263.3548, device='cuda:0')
epoch: 112 test_true_pfm: 5652.670597741118 sim_pfm: 376.8471046762522
episode: 448 training return: tensor(286.0856, device='cuda:0')
episode: 449 training return: tensor(319.1936, device='cuda:0')
episode: 450 training return: tensor(202.3704, device='cuda:0')
episode: 451 training return: tensor(321.4914, device='cuda:0')
epoch: 113 test_true_pfm: 5496.596441971076 sim_pfm: 327.82873685331043
episode: 452 training return: tensor(251.0179, device='cuda:0')
episode: 453 training return: tensor(129.5172, device='cuda:0')
episode: 454 training return: tensor(267.6296, device='cuda:0')
episode: 455 training return: tensor(190.3579, device='cuda:0')
epoch: 114 test_true_pfm: 5625.726164440005 sim_pfm: 347.90593147970503
episode: 456 training return: tensor(276.4471, device='cuda:0')
episode: 457 training return: tensor(145.4449, device='cuda:0')
episode: 458 training return: tensor(311.3263, device='cuda:0')
episode: 459 training return: tensor(245.5483, device='cuda:0')
epoch: 115 test_true_pfm: 5619.738056724901 sim_pfm: 336.00031267889426
episode: 460 training return: tensor(266.2439, device='cuda:0')
episode: 461 training return: tensor(250.7229, device='cuda:0')
episode: 462 training return: tensor(232.9212, device='cuda:0')
episode: 463 training return: tensor(69.8157, device='cuda:0')
epoch: 116 test_true_pfm: 5610.453294133018 sim_pfm: 313.7580270852777
episode: 464 training return: tensor(195.9225, device='cuda:0')
episode: 465 training return: tensor(200.8692, device='cuda:0')
episode: 466 training return: tensor(121.7779, device='cuda:0')
episode: 467 training return: tensor(309.6108, device='cuda:0')
epoch: 117 test_true_pfm: 5471.7072014051 sim_pfm: 334.33563161750016
episode: 468 training return: tensor(214.5497, device='cuda:0')
episode: 469 training return: tensor(220.7944, device='cuda:0')
episode: 470 training return: tensor(265.6302, device='cuda:0')
episode: 471 training return: tensor(126.3006, device='cuda:0')
epoch: 118 test_true_pfm: 5644.577773631842 sim_pfm: 312.63557264901465
episode: 472 training return: tensor(231.5606, device='cuda:0')
episode: 473 training return: tensor(324.6607, device='cuda:0')
episode: 474 training return: tensor(275.6356, device='cuda:0')
episode: 475 training return: tensor(328.1596, device='cuda:0')
epoch: 119 test_true_pfm: 5664.85908902756 sim_pfm: 390.60732654699433
episode: 476 training return: tensor(252.6437, device='cuda:0')
episode: 477 training return: tensor(224.4772, device='cuda:0')
episode: 478 training return: tensor(211.7015, device='cuda:0')
episode: 479 training return: tensor(227.2743, device='cuda:0')
epoch: 120 test_true_pfm: 5705.320116508729 sim_pfm: 343.53856644878397
episode: 480 training return: tensor(299.7485, device='cuda:0')
episode: 481 training return: tensor(6.1121, device='cuda:0')
episode: 482 training return: tensor(281.1994, device='cuda:0')
episode: 483 training return: tensor(289.5325, device='cuda:0')
epoch: 121 test_true_pfm: 5597.59096022446 sim_pfm: 383.7734950459562
episode: 484 training return: tensor(206.1657, device='cuda:0')
episode: 485 training return: tensor(299.7630, device='cuda:0')
episode: 486 training return: tensor(247.3539, device='cuda:0')
episode: 487 training return: tensor(273.1436, device='cuda:0')
epoch: 122 test_true_pfm: 5671.953102603365 sim_pfm: 303.66595015045215
episode: 488 training return: tensor(223.7197, device='cuda:0')
episode: 489 training return: tensor(210.8122, device='cuda:0')
episode: 490 training return: tensor(144.1759, device='cuda:0')
episode: 491 training return: tensor(82.7642, device='cuda:0')
epoch: 123 test_true_pfm: 5665.051283772288 sim_pfm: 295.7781500277536
episode: 492 training return: tensor(233.2333, device='cuda:0')
episode: 493 training return: tensor(281.4785, device='cuda:0')
episode: 494 training return: tensor(266.1223, device='cuda:0')
episode: 495 training return: tensor(264.7470, device='cuda:0')
epoch: 124 test_true_pfm: 5570.8035574022615 sim_pfm: 311.3726984312525
episode: 496 training return: tensor(294.6884, device='cuda:0')
episode: 497 training return: tensor(285.1288, device='cuda:0')
episode: 498 training return: tensor(209.4441, device='cuda:0')
episode: 499 training return: tensor(176.2106, device='cuda:0')
epoch: 125 test_true_pfm: 5591.5843358426655 sim_pfm: 337.2696153372526
episode: 500 training return: tensor(164.4229, device='cuda:0')
episode: 501 training return: tensor(312.0520, device='cuda:0')
episode: 502 training return: tensor(315.6699, device='cuda:0')
episode: 503 training return: tensor(299.7366, device='cuda:0')
epoch: 126 test_true_pfm: 5584.312233953674 sim_pfm: 391.5541704806383
episode: 504 training return: tensor(207.0505, device='cuda:0')
episode: 505 training return: tensor(218.7912, device='cuda:0')
episode: 506 training return: tensor(163.6988, device='cuda:0')
episode: 507 training return: tensor(61.5602, device='cuda:0')
epoch: 127 test_true_pfm: 5677.3576620601625 sim_pfm: 353.390170510121
episode: 508 training return: tensor(249.2318, device='cuda:0')
episode: 509 training return: tensor(222.6663, device='cuda:0')
episode: 510 training return: tensor(68.5725, device='cuda:0')
episode: 511 training return: tensor(255.9778, device='cuda:0')
epoch: 128 test_true_pfm: 5569.604323362347 sim_pfm: 284.6904221008493
episode: 512 training return: tensor(175.4110, device='cuda:0')
episode: 513 training return: tensor(279.5400, device='cuda:0')
episode: 514 training return: tensor(228.6231, device='cuda:0')
episode: 515 training return: tensor(287.3678, device='cuda:0')
epoch: 129 test_true_pfm: 5542.656275892382 sim_pfm: 377.74503222651157
episode: 516 training return: tensor(232.9303, device='cuda:0')
episode: 517 training return: tensor(-11.3780, device='cuda:0')
episode: 518 training return: tensor(241.0952, device='cuda:0')
episode: 519 training return: tensor(286.1700, device='cuda:0')
epoch: 130 test_true_pfm: 5750.517743752013 sim_pfm: 361.9229081272885
episode: 520 training return: tensor(219.9083, device='cuda:0')
episode: 521 training return: tensor(292.1172, device='cuda:0')
episode: 522 training return: tensor(202.9263, device='cuda:0')
episode: 523 training return: tensor(253.4731, device='cuda:0')
epoch: 131 test_true_pfm: 5615.8957494314445 sim_pfm: 375.1936052127664
episode: 524 training return: tensor(192.3250, device='cuda:0')
episode: 525 training return: tensor(197.7269, device='cuda:0')
episode: 526 training return: tensor(203.1854, device='cuda:0')
episode: 527 training return: tensor(118.7674, device='cuda:0')
epoch: 132 test_true_pfm: 5609.851538760801 sim_pfm: 358.8504348027054
episode: 528 training return: tensor(93.1498, device='cuda:0')
episode: 529 training return: tensor(260.6594, device='cuda:0')
episode: 530 training return: tensor(286.0217, device='cuda:0')
episode: 531 training return: tensor(313.1563, device='cuda:0')
epoch: 133 test_true_pfm: 5641.37946162983 sim_pfm: 371.5418232498341
episode: 532 training return: tensor(228.1087, device='cuda:0')
episode: 533 training return: tensor(364.6098, device='cuda:0')
episode: 534 training return: tensor(198.0017, device='cuda:0')
episode: 535 training return: tensor(120.9983, device='cuda:0')
epoch: 134 test_true_pfm: 5638.845634857263 sim_pfm: 380.2971570415539
episode: 536 training return: tensor(250.6253, device='cuda:0')
episode: 537 training return: tensor(174.5502, device='cuda:0')
episode: 538 training return: tensor(227.3423, device='cuda:0')
episode: 539 training return: tensor(241.7388, device='cuda:0')
epoch: 135 test_true_pfm: 5618.749363985228 sim_pfm: 347.2498961246262
episode: 540 training return: tensor(277.3424, device='cuda:0')
episode: 541 training return: tensor(229.7080, device='cuda:0')
episode: 542 training return: tensor(201.1445, device='cuda:0')
episode: 543 training return: tensor(193.5025, device='cuda:0')
epoch: 136 test_true_pfm: 5652.965129574021 sim_pfm: 360.0600898320069
episode: 544 training return: tensor(194.5196, device='cuda:0')
episode: 545 training return: tensor(296.5976, device='cuda:0')
episode: 546 training return: tensor(284.7587, device='cuda:0')
episode: 547 training return: tensor(260.9975, device='cuda:0')
epoch: 137 test_true_pfm: 5615.359867519767 sim_pfm: 297.1743114407873
episode: 548 training return: tensor(272.5699, device='cuda:0')
episode: 549 training return: tensor(324.7202, device='cuda:0')
episode: 550 training return: tensor(243.4041, device='cuda:0')
episode: 551 training return: tensor(229.0716, device='cuda:0')
epoch: 138 test_true_pfm: 5689.530024837949 sim_pfm: 362.84854067256674
episode: 552 training return: tensor(304.8920, device='cuda:0')
episode: 553 training return: tensor(229.5165, device='cuda:0')
episode: 554 training return: tensor(238.0880, device='cuda:0')
episode: 555 training return: tensor(211.0672, device='cuda:0')
epoch: 139 test_true_pfm: 5558.98020296458 sim_pfm: 369.046741091103
episode: 556 training return: tensor(234.9442, device='cuda:0')
episode: 557 training return: tensor(285.3542, device='cuda:0')
episode: 558 training return: tensor(288.4457, device='cuda:0')
episode: 559 training return: tensor(295.1216, device='cuda:0')
epoch: 140 test_true_pfm: 5661.248832614391 sim_pfm: 367.56553898796363
episode: 560 training return: tensor(298.2678, device='cuda:0')
episode: 561 training return: tensor(201.0473, device='cuda:0')
episode: 562 training return: tensor(240.4694, device='cuda:0')
episode: 563 training return: tensor(75.6069, device='cuda:0')
epoch: 141 test_true_pfm: 5623.262909064976 sim_pfm: 374.7603439989034
episode: 564 training return: tensor(342.3643, device='cuda:0')
episode: 565 training return: tensor(251.8898, device='cuda:0')
episode: 566 training return: tensor(200.1187, device='cuda:0')
episode: 567 training return: tensor(268.0423, device='cuda:0')
epoch: 142 test_true_pfm: 5649.209339171022 sim_pfm: 375.7241219993836
episode: 568 training return: tensor(247.7520, device='cuda:0')
episode: 569 training return: tensor(267.9557, device='cuda:0')
episode: 570 training return: tensor(184.1851, device='cuda:0')
episode: 571 training return: tensor(210.6018, device='cuda:0')
epoch: 143 test_true_pfm: 5609.557429466847 sim_pfm: 373.2202285434857
episode: 572 training return: tensor(54.6898, device='cuda:0')
episode: 573 training return: tensor(202.2148, device='cuda:0')
episode: 574 training return: tensor(281.6605, device='cuda:0')
episode: 575 training return: tensor(218.0052, device='cuda:0')
epoch: 144 test_true_pfm: 5672.91026207577 sim_pfm: 359.89458969362505
episode: 576 training return: tensor(190.6036, device='cuda:0')
episode: 577 training return: tensor(213.9653, device='cuda:0')
episode: 578 training return: tensor(312.5471, device='cuda:0')
episode: 579 training return: tensor(183.7950, device='cuda:0')
epoch: 145 test_true_pfm: 5725.4273930330355 sim_pfm: 361.5384353250265
episode: 580 training return: tensor(166.8860, device='cuda:0')
episode: 581 training return: tensor(301.8790, device='cuda:0')
episode: 582 training return: tensor(309.5216, device='cuda:0')
episode: 583 training return: tensor(320.9629, device='cuda:0')
epoch: 146 test_true_pfm: 5654.2979615042495 sim_pfm: 308.2853699777431
episode: 584 training return: tensor(214.8957, device='cuda:0')
episode: 585 training return: tensor(194.5358, device='cuda:0')
episode: 586 training return: tensor(251.3992, device='cuda:0')
episode: 587 training return: tensor(266.1422, device='cuda:0')
epoch: 147 test_true_pfm: 5650.709406425215 sim_pfm: 380.66769207636634
episode: 588 training return: tensor(219.1220, device='cuda:0')
episode: 589 training return: tensor(272.7170, device='cuda:0')
episode: 590 training return: tensor(222.1328, device='cuda:0')
episode: 591 training return: tensor(224.1788, device='cuda:0')
epoch: 148 test_true_pfm: 5678.876137912247 sim_pfm: 392.69874998762197
episode: 592 training return: tensor(328.6821, device='cuda:0')
episode: 593 training return: tensor(322.6750, device='cuda:0')
episode: 594 training return: tensor(255.9973, device='cuda:0')
episode: 595 training return: tensor(151.2575, device='cuda:0')
epoch: 149 test_true_pfm: 5685.733354663014 sim_pfm: 367.2895858144232
episode: 596 training return: tensor(339.6342, device='cuda:0')
episode: 597 training return: tensor(299.1967, device='cuda:0')
episode: 598 training return: tensor(299.8009, device='cuda:0')
episode: 599 training return: tensor(247.4651, device='cuda:0')
epoch: 150 test_true_pfm: 5621.9154456349215 sim_pfm: 321.3897680656228
