['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'behavior', '--traj', 'mixed', '--seed', '3', '--data', '100000']
epoch: 0 training_loss 0.24047145344316959 test_loss: 0.1648313045501709
epoch: 1 training_loss 0.14955820716917514 test_loss: 0.1480257987976074
epoch: 2 training_loss 0.13109420459717513 test_loss: 0.12916842699050904
epoch: 3 training_loss 0.12904631659388544 test_loss: 0.12242408990859985
epoch: 4 training_loss 0.1252505924925208 test_loss: 0.12154233455657959
epoch: 5 training_loss 0.11729763757437467 test_loss: 0.11968621015548705
epoch: 6 training_loss 0.1128384768217802 test_loss: 0.12323741912841797
epoch: 7 training_loss 0.11359416753053665 test_loss: 0.1083483338356018
epoch: 8 training_loss 0.10637724142521619 test_loss: 0.11437058448791504
epoch: 9 training_loss 0.11423338953405619 test_loss: 0.11919608116149902
epoch: 10 training_loss 0.10369857743382455 test_loss: 0.10416414737701415
epoch: 11 training_loss 0.10436242297291756 test_loss: 0.1040223240852356
epoch: 12 training_loss 0.10509420070797205 test_loss: 0.10491465330123902
epoch: 13 training_loss 0.10796204000711442 test_loss: 0.10661880970001221
epoch: 14 training_loss 0.10348178036510944 test_loss: 0.10682928562164307
epoch: 15 training_loss 0.1011539050936699 test_loss: 0.10280441045761109
epoch: 16 training_loss 0.10198258876800537 test_loss: 0.11139862537384033
epoch: 17 training_loss 0.10041842386126518 test_loss: 0.09134708642959595
epoch: 18 training_loss 0.0914375365898013 test_loss: 0.10400283336639404
epoch: 19 training_loss 0.09763149976730347 test_loss: 0.10446610450744628
epoch: 20 training_loss 0.10174183271825314 test_loss: 0.08406218290328979
epoch: 21 training_loss 0.09249128730967641 test_loss: 0.09195667505264282
epoch: 22 training_loss 0.08964097259566188 test_loss: 0.1072199821472168
epoch: 23 training_loss 0.09541229248046874 test_loss: 0.09249799847602844
epoch: 24 training_loss 0.09036730075255037 test_loss: 0.09399793148040772
epoch: 25 training_loss 0.09472169935703277 test_loss: 0.09798476696014405
epoch: 26 training_loss 0.09911678038537503 test_loss: 0.09924965500831603
epoch: 27 training_loss 0.08485472112894059 test_loss: 0.09474697113037109
epoch: 28 training_loss 0.08543793322518468 test_loss: 0.0907767355442047
epoch: 29 training_loss 0.0928028180077672 test_loss: 0.11897780895233154
epoch: 30 training_loss 0.09405016772449017 test_loss: 0.08176459074020385
epoch: 31 training_loss 0.08382989555597305 test_loss: 0.08629949688911438
epoch: 32 training_loss 0.08346707357093691 test_loss: 0.08654178977012635
epoch: 33 training_loss 0.0879357329942286 test_loss: 0.08437771201133729
epoch: 34 training_loss 0.08429368428885936 test_loss: 0.08128176927566529
epoch: 35 training_loss 0.08665894698351621 test_loss: 0.09384260177612305
epoch: 36 training_loss 0.08853982057422399 test_loss: 0.08289160132408142
epoch: 37 training_loss 0.07783995263278484 test_loss: 0.0854430377483368
epoch: 38 training_loss 0.08766612574458123 test_loss: 0.09402477145195007
epoch: 39 training_loss 0.07882922099903227 test_loss: 0.07858055830001831
epoch: 40 training_loss 0.07965029813349248 test_loss: 0.07361202836036682
epoch: 41 training_loss 0.0775285204872489 test_loss: 0.06677777171134949
epoch: 42 training_loss 0.08087117042392493 test_loss: 0.08451722264289856
epoch: 43 training_loss 0.07428451666608453 test_loss: 0.0836036205291748
epoch: 44 training_loss 0.07890649838373065 test_loss: 0.07282909154891967
epoch: 45 training_loss 0.07627707621082663 test_loss: 0.07281621694564819
epoch: 46 training_loss 0.07896334951743483 test_loss: 0.07923778891563416
epoch: 47 training_loss 0.07392434427514673 test_loss: 0.0722082018852234
epoch: 48 training_loss 0.07594274854287505 test_loss: 0.07826998829841614
epoch: 49 training_loss 0.06799860062077641 test_loss: 0.07048116326332092
epoch: 50 training_loss 0.07232926271855832 test_loss: 0.08422163724899293
epoch: 51 training_loss 0.07318646771833301 test_loss: 0.08728483319282532
epoch: 52 training_loss 0.07341417901217938 test_loss: 0.07993126511573792
epoch: 53 training_loss 0.06845173025503755 test_loss: 0.07615594863891602
epoch: 54 training_loss 0.07857529912143946 test_loss: 0.0842154324054718
epoch: 55 training_loss 0.07156409339979292 test_loss: 0.0718978226184845
epoch: 56 training_loss 0.07464949820190668 test_loss: 0.07086197137832642
epoch: 57 training_loss 0.07143402494490146 test_loss: 0.07383930087089538
epoch: 58 training_loss 0.06979524021968246 test_loss: 0.069416081905365
epoch: 59 training_loss 0.07075485745444894 test_loss: 0.08046367168426513
epoch: 60 training_loss 0.07153695479035377 test_loss: 0.06793901324272156
epoch: 61 training_loss 0.07621675364673137 test_loss: 0.06320803761482238
epoch: 62 training_loss 0.06850984355434775 test_loss: 0.07180332541465759
epoch: 63 training_loss 0.07180153893306851 test_loss: 0.07041502594947815
epoch: 64 training_loss 0.07032642368227243 test_loss: 0.07924779057502747
epoch: 65 training_loss 0.06991565370932222 test_loss: 0.07247674465179443
epoch: 66 training_loss 0.07129459565505386 test_loss: 0.07663614153862
epoch: 67 training_loss 0.0695709028840065 test_loss: 0.07078680992126465
epoch: 68 training_loss 0.07071733908727765 test_loss: 0.08169276118278504
epoch: 69 training_loss 0.07120523296296596 test_loss: 0.08157074451446533
epoch: 70 training_loss 0.06815527526661753 test_loss: 0.06733115315437317
epoch: 71 training_loss 0.07006178015843034 test_loss: 0.065914785861969
epoch: 72 training_loss 0.07464312240481377 test_loss: 0.0673772931098938
epoch: 73 training_loss 0.06872678192332388 test_loss: 0.06619923710823059
epoch: 74 training_loss 0.0677209809049964 test_loss: 0.0675273597240448
epoch: 75 training_loss 0.07064826781861484 test_loss: 0.0736035943031311
epoch: 76 training_loss 0.06853915752843022 test_loss: 0.06055567860603332
epoch: 77 training_loss 0.07027897950261831 test_loss: 0.07635228037834167
epoch: 78 training_loss 0.0638929208368063 test_loss: 0.07404407858848572
epoch: 79 training_loss 0.06853112613782286 test_loss: 0.06952637434005737
epoch: 80 training_loss 0.06722774539142846 test_loss: 0.06620646715164184
epoch: 81 training_loss 0.07105359078384936 test_loss: 0.07409217357635497
epoch: 82 training_loss 0.06516876190900803 test_loss: 0.0782516360282898
epoch: 83 training_loss 0.06711751599796116 test_loss: 0.06715196371078491
epoch: 84 training_loss 0.06870530443266035 test_loss: 0.0673502504825592
epoch: 85 training_loss 0.067050622086972 test_loss: 0.06651555895805358
epoch: 86 training_loss 0.06600497074425221 test_loss: 0.06264271140098572
epoch: 87 training_loss 0.06905501134693623 test_loss: 0.07915658354759217
epoch: 88 training_loss 0.06707105442881584 test_loss: 0.06099200844764709
epoch: 89 training_loss 0.06408157418482005 test_loss: 0.054017525911331174
epoch: 90 training_loss 0.06932500125840306 test_loss: 0.05899796485900879
epoch: 91 training_loss 0.0691667778044939 test_loss: 0.07366458177566529
epoch: 92 training_loss 0.06405251190066337 test_loss: 0.07056553363800049
epoch: 93 training_loss 0.06713155862875282 test_loss: 0.06745862364768981
epoch: 94 training_loss 0.06779081272892654 test_loss: 0.06208351254463196
epoch: 95 training_loss 0.06378065261989832 test_loss: 0.07535513043403626
epoch: 96 training_loss 0.07246899088844656 test_loss: 0.07070844173431397
epoch: 97 training_loss 0.06319387952797115 test_loss: 0.07352476119995117
epoch: 98 training_loss 0.06445644233375787 test_loss: 0.05104081034660339
epoch: 99 training_loss 0.06695776842534543 test_loss: 0.06579545140266418
epoch: 100 training_loss 0.06703326925635338 test_loss: 0.07249590754508972
epoch: 101 training_loss 0.0671724608540535 test_loss: 0.07085843682289124
epoch: 102 training_loss 0.06558131547644734 test_loss: 0.07192804813385009
epoch: 103 training_loss 0.06450336128473282 test_loss: 0.06418328285217285
epoch: 104 training_loss 0.06925747614353896 test_loss: 0.05470795631408691
epoch: 105 training_loss 0.06795609246939421 test_loss: 0.059459513425827025
epoch: 106 training_loss 0.06592773428186774 test_loss: 0.07576969265937805
epoch: 107 training_loss 0.06398329610005021 test_loss: 0.07017273306846619
epoch: 108 training_loss 0.06226106643676758 test_loss: 0.09045013189315795
epoch: 109 training_loss 0.06183643715456128 test_loss: 0.0659839391708374
epoch: 110 training_loss 0.06502883808687329 test_loss: 0.05757449865341187
epoch: 111 training_loss 0.07239671936258674 test_loss: 0.07006326913833619
epoch: 112 training_loss 0.06454158850014209 test_loss: 0.06753686666488648
epoch: 113 training_loss 0.060471191136166454 test_loss: 0.067935049533844
epoch: 114 training_loss 0.06259023101069033 test_loss: 0.05755544304847717
epoch: 115 training_loss 0.06363935539498926 test_loss: 0.06121113896369934
epoch: 116 training_loss 0.06534525969065726 test_loss: 0.07035761475563049
epoch: 117 training_loss 0.07127520391717553 test_loss: 0.06658233404159546
epoch: 118 training_loss 0.06573138184845448 test_loss: 0.06693063974380493
epoch: 119 training_loss 0.06343615263700485 test_loss: 0.07167990207672119
epoch: 120 training_loss 0.06519530769437551 test_loss: 0.06314743161201478
epoch: 121 training_loss 0.06188837954774499 test_loss: 0.06515223383903504
epoch: 122 training_loss 0.06521826054900885 test_loss: 0.063511061668396
epoch: 123 training_loss 0.06697480842471122 test_loss: 0.07081952691078186
epoch: 124 training_loss 0.06700620785355568 test_loss: 0.06266341209411622
epoch: 125 training_loss 0.06774922876968048 test_loss: 0.06707934141159058
epoch: 126 training_loss 0.06368664938956499 test_loss: 0.06286254525184631
epoch: 127 training_loss 0.07125345731154084 test_loss: 0.057366198301315306
epoch: 128 training_loss 0.06710845651105046 test_loss: 0.06661032438278199
epoch: 129 training_loss 0.06590324946679176 test_loss: 0.06491263508796692
epoch: 130 training_loss 0.06340684035792947 test_loss: 0.08548678159713745
epoch: 131 training_loss 0.06600776266306639 test_loss: 0.0637187659740448
epoch: 132 training_loss 0.06309833833947778 test_loss: 0.07855260968208314
epoch: 133 training_loss 0.06793646929785609 test_loss: 0.06817519664764404
epoch: 134 training_loss 0.06755149960517884 test_loss: 0.06978230476379395
epoch: 135 training_loss 0.05905181628651917 test_loss: 0.07433966398239136
epoch: 136 training_loss 0.0661890616454184 test_loss: 0.06600443720817566
epoch: 137 training_loss 0.062212694380432365 test_loss: 0.07080481648445129
epoch: 138 training_loss 0.06699278684332967 test_loss: 0.06459156274795533
epoch: 139 training_loss 0.06315654268488288 test_loss: 0.07921488881111145
epoch: 140 training_loss 0.06583160158246755 test_loss: 0.06884939074516297
epoch: 141 training_loss 0.06585488174110651 test_loss: 0.08007432818412781
epoch: 142 training_loss 0.06501534984912723 test_loss: 0.04607730209827423
epoch: 143 training_loss 0.06146303097717464 test_loss: 0.050399559736251834
epoch: 144 training_loss 0.06350937843322754 test_loss: 0.051847410202026364
epoch: 145 training_loss 0.06227307658642531 test_loss: 0.08258729577064514
epoch: 146 training_loss 0.06226950562559068 test_loss: 0.06997459530830383
epoch: 147 training_loss 0.06294284655712545 test_loss: 0.06734760999679565
epoch: 148 training_loss 0.06586866123601794 test_loss: 0.061822181940078734
epoch: 149 training_loss 0.060709195658564565 test_loss: 0.0671587586402893
epoch: 0 training_loss 56.263107223510744 test_loss: 33.335565185546876
epoch: 1 training_loss 26.495368614196778 test_loss: 22.627952575683594
epoch: 2 training_loss 20.161430416107176 test_loss: 18.44136962890625
epoch: 3 training_loss 17.136475439071656 test_loss: 16.000013732910155
epoch: 4 training_loss 15.006550502777099 test_loss: 13.96160888671875
epoch: 5 training_loss 13.13081678390503 test_loss: 12.160897064208985
epoch: 6 training_loss 12.002272853851318 test_loss: 11.76067886352539
epoch: 7 training_loss 11.141017837524414 test_loss: 10.587937927246093
epoch: 8 training_loss 10.286144924163818 test_loss: 10.105098724365234
epoch: 9 training_loss 9.478172006607055 test_loss: 9.359473419189452
epoch: 10 training_loss 8.949320621490479 test_loss: 8.690802001953125
epoch: 11 training_loss 8.575330419540405 test_loss: 8.285037994384766
epoch: 12 training_loss 8.154483494758606 test_loss: 8.020582580566407
epoch: 13 training_loss 7.84219268321991 test_loss: 7.403871154785156
epoch: 14 training_loss 7.5225416278839115 test_loss: 7.414411163330078
epoch: 15 training_loss 7.266738276481629 test_loss: 7.238597106933594
epoch: 16 training_loss 6.917055749893189 test_loss: 6.906145477294922
epoch: 17 training_loss 6.8568834209442135 test_loss: 6.7712158203125
epoch: 18 training_loss 6.6895130920410155 test_loss: 6.437805938720703
epoch: 19 training_loss 6.521310815811157 test_loss: 6.234886932373047
epoch: 20 training_loss 6.3726208209991455 test_loss: 6.279730987548828
epoch: 21 training_loss 6.164194812774658 test_loss: 6.057423782348633
epoch: 22 training_loss 6.0721082735061644 test_loss: 6.009206008911133
epoch: 23 training_loss 5.883598346710205 test_loss: 5.965908432006836
epoch: 24 training_loss 5.815931458473205 test_loss: 5.677112197875976
epoch: 25 training_loss 5.6546903991699216 test_loss: 5.60719108581543
epoch: 26 training_loss 5.571412315368653 test_loss: 5.472418975830078
epoch: 27 training_loss 5.498990669250488 test_loss: 5.337449264526367
epoch: 28 training_loss 5.327377285957336 test_loss: 5.305989837646484
epoch: 29 training_loss 5.454841604232788 test_loss: 5.286557388305664
epoch: 30 training_loss 5.237721202373504 test_loss: 5.35274658203125
epoch: 31 training_loss 5.160796608924866 test_loss: 5.278417587280273
epoch: 32 training_loss 5.073957920074463 test_loss: 5.157075881958008
epoch: 33 training_loss 5.158296382427215 test_loss: 4.820584869384765
epoch: 34 training_loss 5.001622662544251 test_loss: 5.106675338745117
epoch: 35 training_loss 4.920993995666504 test_loss: 4.854326629638672
epoch: 36 training_loss 4.906569352149964 test_loss: 4.963828277587891
epoch: 37 training_loss 4.817541511058807 test_loss: 4.943510055541992
epoch: 38 training_loss 4.693148112297058 test_loss: 4.605978775024414
epoch: 39 training_loss 4.635959408283234 test_loss: 4.610350036621094
epoch: 40 training_loss 4.631098842620849 test_loss: 4.64081916809082
epoch: 41 training_loss 4.675259561538696 test_loss: 4.477810668945312
epoch: 42 training_loss 4.541223094463349 test_loss: 4.602750778198242
epoch: 43 training_loss 4.550633442401886 test_loss: 4.350073623657226
epoch: 44 training_loss 4.483590435981751 test_loss: 4.415141677856445
epoch: 45 training_loss 4.426852173805237 test_loss: 4.424214553833008
epoch: 46 training_loss 4.469411063194275 test_loss: 4.419501876831054
epoch: 47 training_loss 4.476097812652588 test_loss: 4.344890975952149
epoch: 48 training_loss 4.389855523109436 test_loss: 4.190664672851563
epoch: 49 training_loss 4.418265089988709 test_loss: 4.457872772216797
epoch: 50 training_loss 4.301666190624237 test_loss: 4.441100692749023
epoch: 51 training_loss 4.316417238712311 test_loss: 4.350436019897461
epoch: 52 training_loss 4.141227624416351 test_loss: 4.172064208984375
epoch: 53 training_loss 4.228368949890137 test_loss: 4.133505630493164
epoch: 54 training_loss 4.2239002203941345 test_loss: 4.125381851196289
epoch: 55 training_loss 4.085238356590271 test_loss: 4.145896530151367
epoch: 56 training_loss 4.205341019630432 test_loss: 4.108376312255859
epoch: 57 training_loss 4.131099252700806 test_loss: 4.076546096801758
epoch: 58 training_loss 4.064131016731262 test_loss: 3.98974723815918
epoch: 59 training_loss 4.042485859394073 test_loss: 3.988916015625
epoch: 60 training_loss 4.033117954730987 test_loss: 4.144064331054688
epoch: 61 training_loss 4.002220258712769 test_loss: 3.971674346923828
epoch: 62 training_loss 4.023630237579345 test_loss: 4.030250167846679
epoch: 63 training_loss 4.006857166290283 test_loss: 3.9244029998779295
epoch: 64 training_loss 3.9750794124603273 test_loss: 3.944486618041992
epoch: 65 training_loss 3.9010157203674316 test_loss: 3.9490528106689453
epoch: 66 training_loss 3.8972385811805723 test_loss: 3.8435523986816404
epoch: 67 training_loss 4.0110541796684265 test_loss: 3.884252166748047
epoch: 68 training_loss 3.7884894323349 test_loss: 3.8550125122070313
epoch: 69 training_loss 3.827491319179535 test_loss: 3.982918930053711
epoch: 70 training_loss 3.7294267344474794 test_loss: 3.8111812591552736
epoch: 71 training_loss 3.8729654002189635 test_loss: 3.760491943359375
epoch: 72 training_loss 3.7966318559646606 test_loss: 3.7525882720947266
epoch: 73 training_loss 3.6722980618476866 test_loss: 3.6724388122558596
epoch: 74 training_loss 3.7627047371864317 test_loss: 3.8683498382568358
epoch: 75 training_loss 3.7706737446784975 test_loss: 3.7130672454833986
epoch: 76 training_loss 3.659370789527893 test_loss: 3.732687759399414
epoch: 77 training_loss 3.67753378868103 test_loss: 3.5549617767333985
epoch: 78 training_loss 3.7083101058006287 test_loss: 3.5964370727539063
epoch: 79 training_loss 3.6188934421539307 test_loss: 3.685196304321289
epoch: 80 training_loss 3.6751649284362795 test_loss: 3.633990478515625
epoch: 81 training_loss 3.6574031925201416 test_loss: 3.473977279663086
epoch: 82 training_loss 3.627619514465332 test_loss: 3.6376083374023436
epoch: 83 training_loss 3.639028241634369 test_loss: 3.764176940917969
epoch: 84 training_loss 3.6397568511962892 test_loss: 3.587314224243164
epoch: 85 training_loss 3.5248407411575315 test_loss: 3.50234375
epoch: 86 training_loss 3.549293029308319 test_loss: 3.4556194305419923
epoch: 87 training_loss 3.505294132232666 test_loss: 3.527053451538086
epoch: 88 training_loss 3.514194579124451 test_loss: 3.4795883178710936
epoch: 89 training_loss 3.5689560651779173 test_loss: 3.678646469116211
epoch: 90 training_loss 3.593077962398529 test_loss: 3.551249313354492
epoch: 91 training_loss 3.501192903518677 test_loss: 3.485075759887695
epoch: 92 training_loss 3.3999822473526002 test_loss: 3.3379356384277346
epoch: 93 training_loss 3.450058689117432 test_loss: 3.528690719604492
epoch: 94 training_loss 3.424434034824371 test_loss: 3.6502593994140624
epoch: 95 training_loss 3.3814799070358275 test_loss: 3.3252506256103516
epoch: 96 training_loss 3.4347305035591127 test_loss: 3.3588912963867186
epoch: 97 training_loss 3.4652749252319337 test_loss: 3.3328357696533204
epoch: 98 training_loss 3.391509189605713 test_loss: 3.3177864074707033
epoch: 99 training_loss 3.325026273727417 test_loss: 3.396865463256836
epoch: 100 training_loss 3.4353213906288147 test_loss: 3.378017044067383
epoch: 101 training_loss 3.374880895614624 test_loss: 3.3563709259033203
epoch: 102 training_loss 3.3607760500907897 test_loss: 3.4653594970703123
epoch: 103 training_loss 3.334160385131836 test_loss: 3.441005325317383
epoch: 104 training_loss 3.38411146402359 test_loss: 3.4715885162353515
epoch: 105 training_loss 3.2787192964553835 test_loss: 3.3493015289306642
epoch: 106 training_loss 3.301850402355194 test_loss: 3.221749496459961
epoch: 107 training_loss 3.2961005640029906 test_loss: 3.2319591522216795
epoch: 108 training_loss 3.349852929115295 test_loss: 3.2199535369873047
epoch: 109 training_loss 3.2836741065979003 test_loss: 3.2290294647216795
epoch: 110 training_loss 3.3186051177978517 test_loss: 3.298202133178711
epoch: 111 training_loss 3.265132131576538 test_loss: 3.2892124176025392
epoch: 112 training_loss 3.2626598191261293 test_loss: 3.1129995346069337
epoch: 113 training_loss 3.24806125164032 test_loss: 3.1641889572143556
epoch: 114 training_loss 3.25647132396698 test_loss: 3.190046691894531
epoch: 115 training_loss 3.163195462226868 test_loss: 3.102915954589844
epoch: 116 training_loss 3.1560874509811403 test_loss: 3.108138656616211
epoch: 117 training_loss 3.2152109050750735 test_loss: 3.174984169006348
epoch: 118 training_loss 3.2112714648246765 test_loss: 3.1243968963623048
epoch: 119 training_loss 3.2116074919700623 test_loss: 3.290027618408203
epoch: 120 training_loss 3.2321451473236085 test_loss: 3.2068099975585938
epoch: 121 training_loss 3.2442341160774233 test_loss: 3.124762535095215
epoch: 122 training_loss 3.2068856835365294 test_loss: 3.233524703979492
epoch: 123 training_loss 3.1162552356719972 test_loss: 3.2099689483642577
epoch: 124 training_loss 3.1464774203300476 test_loss: 3.171359634399414
epoch: 125 training_loss 3.160492718219757 test_loss: 3.234174346923828
epoch: 126 training_loss 3.150871250629425 test_loss: 3.0205194473266603
epoch: 127 training_loss 3.0906764888763427 test_loss: 3.1036367416381836
epoch: 128 training_loss 3.0623764204978943 test_loss: 3.1122577667236326
epoch: 129 training_loss 3.0326760315895083 test_loss: 3.022393798828125
epoch: 130 training_loss 3.128373398780823 test_loss: 2.9919878005981446
epoch: 131 training_loss 3.1325620293617247 test_loss: 3.1131313323974608
epoch: 132 training_loss 3.0769471669197084 test_loss: 2.8524831771850585
epoch: 133 training_loss 3.0884553480148313 test_loss: 3.142733573913574
epoch: 134 training_loss 3.06056884765625 test_loss: 3.148845672607422
epoch: 135 training_loss 3.057457423210144 test_loss: 2.9810821533203127
epoch: 136 training_loss 3.1093080711364744 test_loss: 3.0149145126342773
epoch: 137 training_loss 3.029701342582703 test_loss: 3.097438430786133
epoch: 138 training_loss 2.9740870141983033 test_loss: 3.0112037658691406
epoch: 139 training_loss 3.0589322662353515 test_loss: 3.129598045349121
epoch: 140 training_loss 3.032619369029999 test_loss: 2.940165710449219
epoch: 141 training_loss 2.9903212857246397 test_loss: 3.082177925109863
epoch: 142 training_loss 2.9775647807121275 test_loss: 3.168839454650879
epoch: 143 training_loss 3.060986375808716 test_loss: 2.9693492889404296
epoch: 144 training_loss 2.982389566898346 test_loss: 3.0365453720092774
epoch: 145 training_loss 2.9673669743537903 test_loss: 2.938772201538086
epoch: 146 training_loss 3.024815945625305 test_loss: 3.065708541870117
epoch: 147 training_loss 3.0100705552101137 test_loss: 2.9598773956298827
epoch: 148 training_loss 3.0004119062423706 test_loss: 2.8762889862060548
epoch: 149 training_loss 2.9977415943145753 test_loss: 2.917843246459961
5186.738503940999
episode: 0 training return: tensor(-465.6627, device='cuda:0')
episode: 1 training return: tensor(-489.0919, device='cuda:0')
episode: 2 training return: tensor(-502.4295, device='cuda:0')
episode: 3 training return: tensor(-425.2536, device='cuda:0')
epoch: 1 test_true_pfm: 5108.569751167699 sim_pfm: -480.7144988802708
episode: 4 training return: tensor(-486.5833, device='cuda:0')
episode: 5 training return: tensor(-457.3157, device='cuda:0')
episode: 6 training return: tensor(-519.4499, device='cuda:0')
episode: 7 training return: tensor(-610.3082, device='cuda:0')
epoch: 2 test_true_pfm: 5619.958884661069 sim_pfm: -467.0848330707425
episode: 8 training return: tensor(-562.2957, device='cuda:0')
episode: 9 training return: tensor(-516.9383, device='cuda:0')
episode: 10 training return: tensor(-481.7798, device='cuda:0')
episode: 11 training return: tensor(-554.9954, device='cuda:0')
epoch: 3 test_true_pfm: 5099.82848730059 sim_pfm: -416.06782845138997
episode: 12 training return: tensor(-515.3323, device='cuda:0')
episode: 13 training return: tensor(-490.8024, device='cuda:0')
episode: 14 training return: tensor(-498.4016, device='cuda:0')
episode: 15 training return: tensor(-522.6705, device='cuda:0')
epoch: 4 test_true_pfm: 5492.545825540371 sim_pfm: -463.76916329258046
episode: 16 training return: tensor(-418.2244, device='cuda:0')
episode: 17 training return: tensor(-426.2575, device='cuda:0')
episode: 18 training return: tensor(-538.6254, device='cuda:0')
episode: 19 training return: tensor(-601.8198, device='cuda:0')
epoch: 5 test_true_pfm: 5186.431140963051 sim_pfm: -403.6627750089586
episode: 20 training return: tensor(-411.1336, device='cuda:0')
episode: 21 training return: tensor(-577.7715, device='cuda:0')
episode: 22 training return: tensor(-517.7726, device='cuda:0')
episode: 23 training return: tensor(-487.2180, device='cuda:0')
epoch: 6 test_true_pfm: 5286.911632862072 sim_pfm: -423.3317974961149
episode: 24 training return: tensor(-469.7196, device='cuda:0')
episode: 25 training return: tensor(-505.3387, device='cuda:0')
episode: 26 training return: tensor(-523.8459, device='cuda:0')
episode: 27 training return: tensor(-502.1851, device='cuda:0')
epoch: 7 test_true_pfm: 5252.584111004797 sim_pfm: -460.68404139901395
episode: 28 training return: tensor(-417.7405, device='cuda:0')
episode: 29 training return: tensor(-416.6096, device='cuda:0')
episode: 30 training return: tensor(-496.6833, device='cuda:0')
episode: 31 training return: tensor(-476.1386, device='cuda:0')
epoch: 8 test_true_pfm: 5177.749019303071 sim_pfm: -409.44076100186794
episode: 32 training return: tensor(-535.6627, device='cuda:0')
episode: 33 training return: tensor(-458.2600, device='cuda:0')
episode: 34 training return: tensor(-419.8926, device='cuda:0')
episode: 35 training return: tensor(-492.6677, device='cuda:0')
epoch: 9 test_true_pfm: 5228.815340279786 sim_pfm: -371.07246559514897
episode: 36 training return: tensor(-501.1226, device='cuda:0')
episode: 37 training return: tensor(-464.3288, device='cuda:0')
episode: 38 training return: tensor(-394.2299, device='cuda:0')
episode: 39 training return: tensor(-559.8231, device='cuda:0')
epoch: 10 test_true_pfm: 5249.57293818243 sim_pfm: -434.9191311853647
episode: 40 training return: tensor(-401.1539, device='cuda:0')
episode: 41 training return: tensor(-417.4435, device='cuda:0')
episode: 42 training return: tensor(-377.2165, device='cuda:0')
episode: 43 training return: tensor(-540.6442, device='cuda:0')
epoch: 11 test_true_pfm: 3580.317929848977 sim_pfm: -446.06034750525333
episode: 44 training return: tensor(-479.1890, device='cuda:0')
episode: 45 training return: tensor(-466.7359, device='cuda:0')
episode: 46 training return: tensor(-480.4160, device='cuda:0')
episode: 47 training return: tensor(-526.1740, device='cuda:0')
epoch: 12 test_true_pfm: 5176.456804545608 sim_pfm: -522.741546685303
episode: 48 training return: tensor(-452.0375, device='cuda:0')
episode: 49 training return: tensor(-476.7018, device='cuda:0')
episode: 50 training return: tensor(-486.6440, device='cuda:0')
episode: 51 training return: tensor(-447.8057, device='cuda:0')
epoch: 13 test_true_pfm: 5157.180701023568 sim_pfm: -548.3593980148726
episode: 52 training return: tensor(-439.4603, device='cuda:0')
episode: 53 training return: tensor(-260.9971, device='cuda:0')
episode: 54 training return: tensor(-497.3054, device='cuda:0')
episode: 55 training return: tensor(-449.5649, device='cuda:0')
epoch: 14 test_true_pfm: 5220.125053351404 sim_pfm: -347.5037956732267
episode: 56 training return: tensor(-405.9555, device='cuda:0')
episode: 57 training return: tensor(-457.6993, device='cuda:0')
episode: 58 training return: tensor(-478.1035, device='cuda:0')
episode: 59 training return: tensor(-382.8867, device='cuda:0')
epoch: 15 test_true_pfm: 5238.578559793842 sim_pfm: -416.2893906770817
episode: 60 training return: tensor(-406.8788, device='cuda:0')
episode: 61 training return: tensor(-264.7727, device='cuda:0')
episode: 62 training return: tensor(-491.8654, device='cuda:0')
episode: 63 training return: tensor(-363.2782, device='cuda:0')
epoch: 16 test_true_pfm: 5218.672315759939 sim_pfm: -373.14789169846335
episode: 64 training return: tensor(-382.0292, device='cuda:0')
episode: 65 training return: tensor(-359.4289, device='cuda:0')
episode: 66 training return: tensor(-340.4276, device='cuda:0')
episode: 67 training return: tensor(-478.6552, device='cuda:0')
epoch: 17 test_true_pfm: 5286.060065938297 sim_pfm: -557.6914494516872
episode: 68 training return: tensor(-454.7880, device='cuda:0')
episode: 69 training return: tensor(-370.2712, device='cuda:0')
episode: 70 training return: tensor(-513.4006, device='cuda:0')
episode: 71 training return: tensor(-427.8505, device='cuda:0')
epoch: 18 test_true_pfm: 5329.73502898595 sim_pfm: -371.23762847255176
episode: 72 training return: tensor(-449.7417, device='cuda:0')
episode: 73 training return: tensor(-387.7150, device='cuda:0')
episode: 74 training return: tensor(-434.9893, device='cuda:0')
episode: 75 training return: tensor(-488.5442, device='cuda:0')
epoch: 19 test_true_pfm: 5284.297934548975 sim_pfm: -379.23890713807003
episode: 76 training return: tensor(-429.4112, device='cuda:0')
episode: 77 training return: tensor(-410.1386, device='cuda:0')
episode: 78 training return: tensor(-479.8028, device='cuda:0')
episode: 79 training return: tensor(-433.2474, device='cuda:0')
epoch: 20 test_true_pfm: 5703.306314729976 sim_pfm: -353.9157880616646
episode: 80 training return: tensor(-424.7696, device='cuda:0')
episode: 81 training return: tensor(-320.9384, device='cuda:0')
episode: 82 training return: tensor(-423.0532, device='cuda:0')
episode: 83 training return: tensor(-417.6945, device='cuda:0')
epoch: 21 test_true_pfm: 5305.8419948389865 sim_pfm: -298.49954530319275
episode: 84 training return: tensor(-576.2615, device='cuda:0')
episode: 85 training return: tensor(-428.7467, device='cuda:0')
episode: 86 training return: tensor(-412.8792, device='cuda:0')
episode: 87 training return: tensor(-393.6646, device='cuda:0')
epoch: 22 test_true_pfm: 5555.773651995361 sim_pfm: -343.94002516548306
episode: 88 training return: tensor(-383.5333, device='cuda:0')
episode: 89 training return: tensor(-360.5444, device='cuda:0')
episode: 90 training return: tensor(-388.9317, device='cuda:0')
episode: 91 training return: tensor(-451.9912, device='cuda:0')
epoch: 23 test_true_pfm: 5326.585144744591 sim_pfm: -321.6201677455295
episode: 92 training return: tensor(-454.5612, device='cuda:0')
episode: 93 training return: tensor(-420.0994, device='cuda:0')
episode: 94 training return: tensor(-409.4286, device='cuda:0')
episode: 95 training return: tensor(-429.1530, device='cuda:0')
epoch: 24 test_true_pfm: 5274.09329160902 sim_pfm: -362.2844781839328
episode: 96 training return: tensor(-418.2219, device='cuda:0')
episode: 97 training return: tensor(-380.8584, device='cuda:0')
episode: 98 training return: tensor(-360.8316, device='cuda:0')
episode: 99 training return: tensor(-399.5266, device='cuda:0')
epoch: 25 test_true_pfm: 5247.997230322763 sim_pfm: -318.98100995154044
episode: 100 training return: tensor(-372.0177, device='cuda:0')
episode: 101 training return: tensor(-412.6347, device='cuda:0')
episode: 102 training return: tensor(-382.1953, device='cuda:0')
episode: 103 training return: tensor(-833.5364, device='cuda:0')
epoch: 26 test_true_pfm: 5411.016665225055 sim_pfm: -329.79412922860746
episode: 104 training return: tensor(-405.2152, device='cuda:0')
episode: 105 training return: tensor(-379.0618, device='cuda:0')
episode: 106 training return: tensor(-432.5026, device='cuda:0')
episode: 107 training return: tensor(-407.9851, device='cuda:0')
epoch: 27 test_true_pfm: 5354.61394601827 sim_pfm: -308.7765126107067
episode: 108 training return: tensor(-406.2954, device='cuda:0')
episode: 109 training return: tensor(-359.5359, device='cuda:0')
episode: 110 training return: tensor(-397.6528, device='cuda:0')
episode: 111 training return: tensor(-419.9500, device='cuda:0')
epoch: 28 test_true_pfm: 5407.424627223842 sim_pfm: -349.6056527903129
episode: 112 training return: tensor(-429.2725, device='cuda:0')
episode: 113 training return: tensor(-392.5022, device='cuda:0')
episode: 114 training return: tensor(-465.3070, device='cuda:0')
episode: 115 training return: tensor(-308.9793, device='cuda:0')
epoch: 29 test_true_pfm: 5311.626081304472 sim_pfm: -309.4710235353753
episode: 116 training return: tensor(-438.9841, device='cuda:0')
episode: 117 training return: tensor(-386.1633, device='cuda:0')
episode: 118 training return: tensor(-269.2864, device='cuda:0')
episode: 119 training return: tensor(-387.4514, device='cuda:0')
epoch: 30 test_true_pfm: 5017.448319425392 sim_pfm: -306.9278737953379
episode: 120 training return: tensor(-502.1443, device='cuda:0')
episode: 121 training return: tensor(-327.7494, device='cuda:0')
episode: 122 training return: tensor(-436.7170, device='cuda:0')
episode: 123 training return: tensor(-436.8338, device='cuda:0')
epoch: 31 test_true_pfm: 4703.547794245081 sim_pfm: -323.07151737106807
episode: 124 training return: tensor(-449.0263, device='cuda:0')
episode: 125 training return: tensor(-374.8250, device='cuda:0')
episode: 126 training return: tensor(-352.7270, device='cuda:0')
episode: 127 training return: tensor(-453.2993, device='cuda:0')
epoch: 32 test_true_pfm: 5395.121524708001 sim_pfm: -279.78351827631315
episode: 128 training return: tensor(-404.2413, device='cuda:0')
episode: 129 training return: tensor(-326.3319, device='cuda:0')
episode: 130 training return: tensor(-427.7900, device='cuda:0')
episode: 131 training return: tensor(-312.0032, device='cuda:0')
epoch: 33 test_true_pfm: 5411.890659898208 sim_pfm: -350.8419458102823
episode: 132 training return: tensor(-407.5443, device='cuda:0')
episode: 133 training return: tensor(-341.7493, device='cuda:0')
episode: 134 training return: tensor(-408.0116, device='cuda:0')
episode: 135 training return: tensor(-367.6573, device='cuda:0')
epoch: 34 test_true_pfm: 5443.9014749019125 sim_pfm: -294.05769938160665
episode: 136 training return: tensor(-383.1887, device='cuda:0')
episode: 137 training return: tensor(-339.6263, device='cuda:0')
episode: 138 training return: tensor(-526.9044, device='cuda:0')
episode: 139 training return: tensor(-405.3238, device='cuda:0')
epoch: 35 test_true_pfm: 5079.86069378658 sim_pfm: -306.4892016715991
episode: 140 training return: tensor(-521.4538, device='cuda:0')
episode: 141 training return: tensor(-387.3214, device='cuda:0')
episode: 142 training return: tensor(-385.2895, device='cuda:0')
episode: 143 training return: tensor(-423.6236, device='cuda:0')
epoch: 36 test_true_pfm: 5639.258400249545 sim_pfm: -299.22689710524475
episode: 144 training return: tensor(-354.2112, device='cuda:0')
episode: 145 training return: tensor(-300.8879, device='cuda:0')
episode: 146 training return: tensor(-252.9238, device='cuda:0')
episode: 147 training return: tensor(-261.1113, device='cuda:0')
epoch: 37 test_true_pfm: 4844.597615290572 sim_pfm: -305.72409671149217
episode: 148 training return: tensor(-516.5925, device='cuda:0')
episode: 149 training return: tensor(-302.3956, device='cuda:0')
episode: 150 training return: tensor(-423.5440, device='cuda:0')
episode: 151 training return: tensor(-363.0963, device='cuda:0')
epoch: 38 test_true_pfm: 5340.3491905437595 sim_pfm: -341.1664351533885
episode: 152 training return: tensor(-335.4044, device='cuda:0')
episode: 153 training return: tensor(-375.8608, device='cuda:0')
episode: 154 training return: tensor(-355.3926, device='cuda:0')
episode: 155 training return: tensor(-475.8659, device='cuda:0')
epoch: 39 test_true_pfm: 4428.978375231807 sim_pfm: -351.76724978072644
episode: 156 training return: tensor(-356.6600, device='cuda:0')
episode: 157 training return: tensor(-361.7339, device='cuda:0')
episode: 158 training return: tensor(-499.6241, device='cuda:0')
episode: 159 training return: tensor(-337.6513, device='cuda:0')
epoch: 40 test_true_pfm: 5321.482333207289 sim_pfm: -324.00951391316875
episode: 160 training return: tensor(-385.4639, device='cuda:0')
episode: 161 training return: tensor(-383.1040, device='cuda:0')
episode: 162 training return: tensor(-331.0165, device='cuda:0')
episode: 163 training return: tensor(-374.1508, device='cuda:0')
epoch: 41 test_true_pfm: 5442.497175384507 sim_pfm: -280.9898891309761
episode: 164 training return: tensor(-381.4416, device='cuda:0')
episode: 165 training return: tensor(-429.3260, device='cuda:0')
episode: 166 training return: tensor(-343.9547, device='cuda:0')
episode: 167 training return: tensor(-416.0411, device='cuda:0')
epoch: 42 test_true_pfm: 5470.943074537434 sim_pfm: -276.8086944827616
episode: 168 training return: tensor(-344.5250, device='cuda:0')
episode: 169 training return: tensor(-466.1232, device='cuda:0')
episode: 170 training return: tensor(-405.5347, device='cuda:0')
episode: 171 training return: tensor(-466.3804, device='cuda:0')
epoch: 43 test_true_pfm: 5587.513813190572 sim_pfm: -286.79977227623266
episode: 172 training return: tensor(-374.6010, device='cuda:0')
episode: 173 training return: tensor(-458.7155, device='cuda:0')
episode: 174 training return: tensor(-350.4986, device='cuda:0')
episode: 175 training return: tensor(-392.4714, device='cuda:0')
epoch: 44 test_true_pfm: 5617.757457598728 sim_pfm: -321.01426176415407
episode: 176 training return: tensor(-384.1853, device='cuda:0')
episode: 177 training return: tensor(-394.2763, device='cuda:0')
episode: 178 training return: tensor(-331.0691, device='cuda:0')
episode: 179 training return: tensor(-415.5292, device='cuda:0')
epoch: 45 test_true_pfm: 5427.919497618212 sim_pfm: -311.48901214535
episode: 180 training return: tensor(-343.1036, device='cuda:0')
episode: 181 training return: tensor(-452.7682, device='cuda:0')
episode: 182 training return: tensor(-394.9928, device='cuda:0')
episode: 183 training return: tensor(-402.8141, device='cuda:0')
epoch: 46 test_true_pfm: 5572.638416727455 sim_pfm: -529.8976560970768
episode: 184 training return: tensor(-497.1306, device='cuda:0')
episode: 185 training return: tensor(-422.1382, device='cuda:0')
episode: 186 training return: tensor(-307.3146, device='cuda:0')
episode: 187 training return: tensor(-370.9085, device='cuda:0')
epoch: 47 test_true_pfm: 5363.991284250207 sim_pfm: -312.0206034973283
episode: 188 training return: tensor(-253.3959, device='cuda:0')
episode: 189 training return: tensor(-336.2671, device='cuda:0')
episode: 190 training return: tensor(-440.8782, device='cuda:0')
episode: 191 training return: tensor(-285.0188, device='cuda:0')
epoch: 48 test_true_pfm: 5429.003012465062 sim_pfm: -332.56410382379545
episode: 192 training return: tensor(-314.8482, device='cuda:0')
episode: 193 training return: tensor(-391.9333, device='cuda:0')
episode: 194 training return: tensor(-326.4661, device='cuda:0')
episode: 195 training return: tensor(-453.5084, device='cuda:0')
epoch: 49 test_true_pfm: 5384.703385391952 sim_pfm: -273.57461898912635
episode: 196 training return: tensor(-370.1730, device='cuda:0')
episode: 197 training return: tensor(-412.0852, device='cuda:0')
episode: 198 training return: tensor(-283.7384, device='cuda:0')
episode: 199 training return: tensor(-409.0191, device='cuda:0')
epoch: 50 test_true_pfm: 5503.6707176460195 sim_pfm: -272.51755659881746
episode: 200 training return: tensor(-390.5188, device='cuda:0')
episode: 201 training return: tensor(-450.6793, device='cuda:0')
episode: 202 training return: tensor(-314.7517, device='cuda:0')
episode: 203 training return: tensor(-343.2446, device='cuda:0')
epoch: 51 test_true_pfm: 5373.342731583093 sim_pfm: -246.37967135265353
episode: 204 training return: tensor(-459.3980, device='cuda:0')
episode: 205 training return: tensor(-457.3785, device='cuda:0')
episode: 206 training return: tensor(-445.5957, device='cuda:0')
episode: 207 training return: tensor(-342.8765, device='cuda:0')
epoch: 52 test_true_pfm: 5421.256819818682 sim_pfm: -332.52730315580266
episode: 208 training return: tensor(-299.6688, device='cuda:0')
episode: 209 training return: tensor(-771.3770, device='cuda:0')
episode: 210 training return: tensor(-432.6736, device='cuda:0')
episode: 211 training return: tensor(-392.8240, device='cuda:0')
epoch: 53 test_true_pfm: 5950.139651359896 sim_pfm: -259.59658072828705
episode: 212 training return: tensor(-438.2016, device='cuda:0')
episode: 213 training return: tensor(-354.7462, device='cuda:0')
episode: 214 training return: tensor(-429.7393, device='cuda:0')
episode: 215 training return: tensor(-317.7287, device='cuda:0')
epoch: 54 test_true_pfm: 5503.736002157625 sim_pfm: -396.26745256212115
episode: 216 training return: tensor(-361.7472, device='cuda:0')
episode: 217 training return: tensor(-346.0165, device='cuda:0')
episode: 218 training return: tensor(-412.7206, device='cuda:0')
episode: 219 training return: tensor(-283.9359, device='cuda:0')
epoch: 55 test_true_pfm: 5439.07968603577 sim_pfm: -383.0665280115597
episode: 220 training return: tensor(-472.6310, device='cuda:0')
episode: 221 training return: tensor(-371.4788, device='cuda:0')
episode: 222 training return: tensor(-417.9576, device='cuda:0')
episode: 223 training return: tensor(-349.0123, device='cuda:0')
epoch: 56 test_true_pfm: 5485.389247294687 sim_pfm: -290.2399194389679
episode: 224 training return: tensor(-366.0931, device='cuda:0')
episode: 225 training return: tensor(-475.7252, device='cuda:0')
episode: 226 training return: tensor(-327.1594, device='cuda:0')
episode: 227 training return: tensor(-401.2650, device='cuda:0')
epoch: 57 test_true_pfm: 5679.625079470628 sim_pfm: -303.14443139439874
episode: 228 training return: tensor(-432.0797, device='cuda:0')
episode: 229 training return: tensor(-357.6069, device='cuda:0')
episode: 230 training return: tensor(-345.7524, device='cuda:0')
episode: 231 training return: tensor(-283.0443, device='cuda:0')
epoch: 58 test_true_pfm: 5421.408838538266 sim_pfm: -283.1162298457445
episode: 232 training return: tensor(-341.4336, device='cuda:0')
episode: 233 training return: tensor(-278.2192, device='cuda:0')
episode: 234 training return: tensor(-425.3652, device='cuda:0')
episode: 235 training return: tensor(-356.0383, device='cuda:0')
epoch: 59 test_true_pfm: 5467.821686195925 sim_pfm: -349.0850419832859
episode: 236 training return: tensor(-860.9998, device='cuda:0')
episode: 237 training return: tensor(-365.1237, device='cuda:0')
episode: 238 training return: tensor(-343.9481, device='cuda:0')
episode: 239 training return: tensor(-282.1086, device='cuda:0')
epoch: 60 test_true_pfm: 5499.60884541187 sim_pfm: -247.43786638741344
episode: 240 training return: tensor(-339.8140, device='cuda:0')
episode: 241 training return: tensor(-330.2093, device='cuda:0')
episode: 242 training return: tensor(-733.9811, device='cuda:0')
episode: 243 training return: tensor(-394.8219, device='cuda:0')
epoch: 61 test_true_pfm: 5316.70178477585 sim_pfm: -280.1495602814248
episode: 244 training return: tensor(-318.0178, device='cuda:0')
episode: 245 training return: tensor(-424.5301, device='cuda:0')
episode: 246 training return: tensor(-520.6166, device='cuda:0')
episode: 247 training return: tensor(-409.1396, device='cuda:0')
epoch: 62 test_true_pfm: 5498.368050192013 sim_pfm: -272.1171189805221
episode: 248 training return: tensor(-292.8248, device='cuda:0')
episode: 249 training return: tensor(-370.2242, device='cuda:0')
episode: 250 training return: tensor(-383.9331, device='cuda:0')
episode: 251 training return: tensor(-393.7600, device='cuda:0')
epoch: 63 test_true_pfm: 5534.6649691814355 sim_pfm: -305.23149682189495
episode: 252 training return: tensor(-331.8145, device='cuda:0')
episode: 253 training return: tensor(-408.8742, device='cuda:0')
episode: 254 training return: tensor(-400.1391, device='cuda:0')
episode: 255 training return: tensor(-359.5615, device='cuda:0')
epoch: 64 test_true_pfm: 5552.184680421616 sim_pfm: -201.0775162837041
episode: 256 training return: tensor(-405.9603, device='cuda:0')
episode: 257 training return: tensor(-404.0840, device='cuda:0')
episode: 258 training return: tensor(-384.1903, device='cuda:0')
episode: 259 training return: tensor(-350.7072, device='cuda:0')
epoch: 65 test_true_pfm: 5488.836109897398 sim_pfm: -225.08099857495594
episode: 260 training return: tensor(-428.2199, device='cuda:0')
episode: 261 training return: tensor(-501.5534, device='cuda:0')
episode: 262 training return: tensor(-317.2803, device='cuda:0')
episode: 263 training return: tensor(-314.5227, device='cuda:0')
epoch: 66 test_true_pfm: 5506.332784778292 sim_pfm: -263.3526571379528
episode: 264 training return: tensor(-338.4935, device='cuda:0')
episode: 265 training return: tensor(-516.3393, device='cuda:0')
episode: 266 training return: tensor(-403.5253, device='cuda:0')
episode: 267 training return: tensor(-336.8880, device='cuda:0')
epoch: 67 test_true_pfm: 5443.156019569983 sim_pfm: -300.5216133009235
episode: 268 training return: tensor(-458.8636, device='cuda:0')
episode: 269 training return: tensor(-267.6841, device='cuda:0')
episode: 270 training return: tensor(-365.3454, device='cuda:0')
episode: 271 training return: tensor(-317.6371, device='cuda:0')
epoch: 68 test_true_pfm: 5471.593653055245 sim_pfm: -293.4312012075582
episode: 272 training return: tensor(-423.2198, device='cuda:0')
episode: 273 training return: tensor(-318.5768, device='cuda:0')
episode: 274 training return: tensor(-337.8589, device='cuda:0')
episode: 275 training return: tensor(-295.9434, device='cuda:0')
epoch: 69 test_true_pfm: 5433.214796879507 sim_pfm: -290.5571212604991
episode: 276 training return: tensor(-297.9349, device='cuda:0')
episode: 277 training return: tensor(-312.0451, device='cuda:0')
episode: 278 training return: tensor(-450.1987, device='cuda:0')
episode: 279 training return: tensor(-435.5208, device='cuda:0')
epoch: 70 test_true_pfm: 5486.3470215936495 sim_pfm: -264.14631055154797
episode: 280 training return: tensor(-339.2133, device='cuda:0')
episode: 281 training return: tensor(-239.9423, device='cuda:0')
episode: 282 training return: tensor(-418.8438, device='cuda:0')
episode: 283 training return: tensor(-258.0853, device='cuda:0')
epoch: 71 test_true_pfm: 5682.030593979999 sim_pfm: -260.87216991713893
episode: 284 training return: tensor(-297.1578, device='cuda:0')
episode: 285 training return: tensor(-420.3985, device='cuda:0')
episode: 286 training return: tensor(-311.7064, device='cuda:0')
episode: 287 training return: tensor(-322.8898, device='cuda:0')
epoch: 72 test_true_pfm: 5626.071878275411 sim_pfm: -276.6668791888612
episode: 288 training return: tensor(-351.0236, device='cuda:0')
episode: 289 training return: tensor(-425.1429, device='cuda:0')
episode: 290 training return: tensor(-277.9699, device='cuda:0')
episode: 291 training return: tensor(-374.0208, device='cuda:0')
epoch: 73 test_true_pfm: 5325.317203063582 sim_pfm: -411.2005730837312
episode: 292 training return: tensor(-343.2859, device='cuda:0')
episode: 293 training return: tensor(-308.7795, device='cuda:0')
episode: 294 training return: tensor(-349.9817, device='cuda:0')
episode: 295 training return: tensor(-387.3966, device='cuda:0')
epoch: 74 test_true_pfm: 5452.5258362486375 sim_pfm: -260.97337005396065
episode: 296 training return: tensor(-344.8733, device='cuda:0')
episode: 297 training return: tensor(-369.7899, device='cuda:0')
episode: 298 training return: tensor(-484.5291, device='cuda:0')
episode: 299 training return: tensor(-457.6067, device='cuda:0')
epoch: 75 test_true_pfm: 5500.189114920475 sim_pfm: -215.593419389353
episode: 300 training return: tensor(-329.0080, device='cuda:0')
episode: 301 training return: tensor(-354.6425, device='cuda:0')
episode: 302 training return: tensor(-326.9988, device='cuda:0')
episode: 303 training return: tensor(-342.9791, device='cuda:0')
epoch: 76 test_true_pfm: 5499.850987127716 sim_pfm: -253.63518852651274
episode: 304 training return: tensor(-363.5455, device='cuda:0')
episode: 305 training return: tensor(-405.1137, device='cuda:0')
episode: 306 training return: tensor(-336.1935, device='cuda:0')
episode: 307 training return: tensor(-349.4870, device='cuda:0')
epoch: 77 test_true_pfm: 5464.339274024922 sim_pfm: -259.0681505295118
episode: 308 training return: tensor(-548.0399, device='cuda:0')
episode: 309 training return: tensor(-348.3424, device='cuda:0')
episode: 310 training return: tensor(-362.8526, device='cuda:0')
episode: 311 training return: tensor(-387.8564, device='cuda:0')
epoch: 78 test_true_pfm: 5371.033935374103 sim_pfm: -243.61177560361102
episode: 312 training return: tensor(-331.4135, device='cuda:0')
episode: 313 training return: tensor(-448.1632, device='cuda:0')
episode: 314 training return: tensor(-473.6845, device='cuda:0')
episode: 315 training return: tensor(-362.8409, device='cuda:0')
epoch: 79 test_true_pfm: 5495.573251511599 sim_pfm: -287.8267404185414
episode: 316 training return: tensor(-389.1071, device='cuda:0')
episode: 317 training return: tensor(-316.4952, device='cuda:0')
episode: 318 training return: tensor(-274.9025, device='cuda:0')
episode: 319 training return: tensor(-326.4989, device='cuda:0')
epoch: 80 test_true_pfm: 5546.430758646243 sim_pfm: -269.99937117075507
episode: 320 training return: tensor(-379.5961, device='cuda:0')
episode: 321 training return: tensor(-308.0521, device='cuda:0')
episode: 322 training return: tensor(-401.6710, device='cuda:0')
episode: 323 training return: tensor(-395.2047, device='cuda:0')
epoch: 81 test_true_pfm: 6264.703965264146 sim_pfm: -257.3692109710537
episode: 324 training return: tensor(-347.2820, device='cuda:0')
episode: 325 training return: tensor(-340.6673, device='cuda:0')
episode: 326 training return: tensor(-338.4730, device='cuda:0')
episode: 327 training return: tensor(-416.9812, device='cuda:0')
epoch: 82 test_true_pfm: 5525.757962448472 sim_pfm: -272.0180313017918
episode: 328 training return: tensor(-370.9155, device='cuda:0')
episode: 329 training return: tensor(-381.1314, device='cuda:0')
episode: 330 training return: tensor(-332.9008, device='cuda:0')
episode: 331 training return: tensor(-370.5277, device='cuda:0')
epoch: 83 test_true_pfm: 5628.662394130541 sim_pfm: -275.65776532533346
episode: 332 training return: tensor(-357.3750, device='cuda:0')
episode: 333 training return: tensor(-384.3919, device='cuda:0')
episode: 334 training return: tensor(-394.5915, device='cuda:0')
episode: 335 training return: tensor(-369.5823, device='cuda:0')
epoch: 84 test_true_pfm: 5451.225290252543 sim_pfm: -248.02333881355784
episode: 336 training return: tensor(-313.5298, device='cuda:0')
episode: 337 training return: tensor(-303.1219, device='cuda:0')
episode: 338 training return: tensor(-374.0024, device='cuda:0')
episode: 339 training return: tensor(-379.4635, device='cuda:0')
epoch: 85 test_true_pfm: 5564.272874230518 sim_pfm: -233.3802916048735
episode: 340 training return: tensor(-373.4800, device='cuda:0')
episode: 341 training return: tensor(-243.2972, device='cuda:0')
episode: 342 training return: tensor(-391.4342, device='cuda:0')
episode: 343 training return: tensor(-385.4368, device='cuda:0')
epoch: 86 test_true_pfm: 6004.675521464774 sim_pfm: -275.3012809900877
episode: 344 training return: tensor(-485.7784, device='cuda:0')
episode: 345 training return: tensor(-361.0696, device='cuda:0')
episode: 346 training return: tensor(-264.3208, device='cuda:0')
episode: 347 training return: tensor(-370.4630, device='cuda:0')
epoch: 87 test_true_pfm: 5540.997407425016 sim_pfm: -263.6118697125348
episode: 348 training return: tensor(-259.6853, device='cuda:0')
episode: 349 training return: tensor(-307.0180, device='cuda:0')
episode: 350 training return: tensor(-395.2058, device='cuda:0')
episode: 351 training return: tensor(-229.0036, device='cuda:0')
epoch: 88 test_true_pfm: 5482.653223702498 sim_pfm: -240.06508931248877
episode: 352 training return: tensor(-343.0310, device='cuda:0')
episode: 353 training return: tensor(-380.2305, device='cuda:0')
episode: 354 training return: tensor(-335.7267, device='cuda:0')
episode: 355 training return: tensor(-355.3361, device='cuda:0')
epoch: 89 test_true_pfm: 5967.337848264608 sim_pfm: -289.1971431781033
episode: 356 training return: tensor(-308.6601, device='cuda:0')
episode: 357 training return: tensor(-337.7753, device='cuda:0')
episode: 358 training return: tensor(-252.6850, device='cuda:0')
episode: 359 training return: tensor(-374.9282, device='cuda:0')
epoch: 90 test_true_pfm: 5365.972603831385 sim_pfm: -281.6262859047274
episode: 360 training return: tensor(-319.6279, device='cuda:0')
episode: 361 training return: tensor(-386.7661, device='cuda:0')
episode: 362 training return: tensor(-345.6518, device='cuda:0')
episode: 363 training return: tensor(-372.8358, device='cuda:0')
epoch: 91 test_true_pfm: 6210.765807255545 sim_pfm: -269.4652207809656
episode: 364 training return: tensor(-354.3815, device='cuda:0')
episode: 365 training return: tensor(-325.9060, device='cuda:0')
episode: 366 training return: tensor(-369.2813, device='cuda:0')
episode: 367 training return: tensor(-331.0811, device='cuda:0')
epoch: 92 test_true_pfm: 5734.219498109028 sim_pfm: -327.6270911620716
episode: 368 training return: tensor(-439.7538, device='cuda:0')
episode: 369 training return: tensor(-369.9503, device='cuda:0')
episode: 370 training return: tensor(-346.7021, device='cuda:0')
episode: 371 training return: tensor(-435.7988, device='cuda:0')
epoch: 93 test_true_pfm: 5453.722806388327 sim_pfm: -293.0285292220069
episode: 372 training return: tensor(-272.0914, device='cuda:0')
episode: 373 training return: tensor(-276.8022, device='cuda:0')
episode: 374 training return: tensor(-351.8759, device='cuda:0')
episode: 375 training return: tensor(-337.7238, device='cuda:0')
epoch: 94 test_true_pfm: 6409.078224534795 sim_pfm: -178.11385731979195
episode: 376 training return: tensor(-326.6289, device='cuda:0')
episode: 377 training return: tensor(-357.7440, device='cuda:0')
episode: 378 training return: tensor(-349.2056, device='cuda:0')
episode: 379 training return: tensor(-364.9097, device='cuda:0')
epoch: 95 test_true_pfm: 5555.004112117732 sim_pfm: -274.28851467101293
episode: 380 training return: tensor(-363.5837, device='cuda:0')
episode: 381 training return: tensor(-341.3784, device='cuda:0')
episode: 382 training return: tensor(-471.4368, device='cuda:0')
episode: 383 training return: tensor(-335.8048, device='cuda:0')
epoch: 96 test_true_pfm: 5531.284600078298 sim_pfm: -203.05897064205297
episode: 384 training return: tensor(-285.2202, device='cuda:0')
episode: 385 training return: tensor(-307.7883, device='cuda:0')
episode: 386 training return: tensor(-364.1080, device='cuda:0')
episode: 387 training return: tensor(-249.0721, device='cuda:0')
epoch: 97 test_true_pfm: 5513.697061585193 sim_pfm: -290.8810640575636
episode: 388 training return: tensor(-279.9158, device='cuda:0')
episode: 389 training return: tensor(-361.0124, device='cuda:0')
episode: 390 training return: tensor(-364.2857, device='cuda:0')
episode: 391 training return: tensor(-387.5299, device='cuda:0')
epoch: 98 test_true_pfm: 5438.702636573283 sim_pfm: -246.56000789802056
episode: 392 training return: tensor(-368.3434, device='cuda:0')
episode: 393 training return: tensor(-309.0652, device='cuda:0')
episode: 394 training return: tensor(-307.6185, device='cuda:0')
episode: 395 training return: tensor(-343.9945, device='cuda:0')
epoch: 99 test_true_pfm: 5460.396989633416 sim_pfm: -245.97598891113498
episode: 396 training return: tensor(-319.7943, device='cuda:0')
episode: 397 training return: tensor(-163.1777, device='cuda:0')
episode: 398 training return: tensor(-179.6143, device='cuda:0')
episode: 399 training return: tensor(-338.0040, device='cuda:0')
epoch: 100 test_true_pfm: 5624.388197371761 sim_pfm: -215.2604990536444
episode: 400 training return: tensor(-297.9676, device='cuda:0')
episode: 401 training return: tensor(-392.6175, device='cuda:0')
episode: 402 training return: tensor(-325.9999, device='cuda:0')
episode: 403 training return: tensor(-419.5641, device='cuda:0')
epoch: 101 test_true_pfm: 5492.272955156327 sim_pfm: -302.58253080629703
episode: 404 training return: tensor(-405.5986, device='cuda:0')
episode: 405 training return: tensor(-363.4100, device='cuda:0')
episode: 406 training return: tensor(-355.6922, device='cuda:0')
episode: 407 training return: tensor(-345.6314, device='cuda:0')
epoch: 102 test_true_pfm: 5509.911934333496 sim_pfm: -231.13920245491317
episode: 408 training return: tensor(-349.0229, device='cuda:0')
episode: 409 training return: tensor(-321.9289, device='cuda:0')
episode: 410 training return: tensor(-806.1876, device='cuda:0')
episode: 411 training return: tensor(-350.9814, device='cuda:0')
epoch: 103 test_true_pfm: 5528.237339397724 sim_pfm: -201.43132932425942
episode: 412 training return: tensor(-266.2547, device='cuda:0')
episode: 413 training return: tensor(-350.8001, device='cuda:0')
episode: 414 training return: tensor(-414.1045, device='cuda:0')
episode: 415 training return: tensor(-445.9401, device='cuda:0')
epoch: 104 test_true_pfm: 5560.405615547649 sim_pfm: -216.64922080018246
episode: 416 training return: tensor(-367.3049, device='cuda:0')
episode: 417 training return: tensor(-338.3028, device='cuda:0')
episode: 418 training return: tensor(-352.1455, device='cuda:0')
episode: 419 training return: tensor(-393.5990, device='cuda:0')
epoch: 105 test_true_pfm: 5566.904188587607 sim_pfm: -250.8219810448354
episode: 420 training return: tensor(-249.9447, device='cuda:0')
episode: 421 training return: tensor(-369.3592, device='cuda:0')
episode: 422 training return: tensor(-328.7061, device='cuda:0')
episode: 423 training return: tensor(-283.6075, device='cuda:0')
epoch: 106 test_true_pfm: 5464.878529806549 sim_pfm: -233.75093940127408
episode: 424 training return: tensor(-302.1306, device='cuda:0')
episode: 425 training return: tensor(-334.7471, device='cuda:0')
episode: 426 training return: tensor(-345.0553, device='cuda:0')
episode: 427 training return: tensor(-447.6076, device='cuda:0')
epoch: 107 test_true_pfm: 5495.537237592466 sim_pfm: -284.13909171668155
episode: 428 training return: tensor(-340.7191, device='cuda:0')
episode: 429 training return: tensor(-266.2829, device='cuda:0')
episode: 430 training return: tensor(-288.3763, device='cuda:0')
episode: 431 training return: tensor(-439.6312, device='cuda:0')
epoch: 108 test_true_pfm: 5570.961351372113 sim_pfm: -231.30869734147564
episode: 432 training return: tensor(-354.7211, device='cuda:0')
episode: 433 training return: tensor(-305.9775, device='cuda:0')
episode: 434 training return: tensor(-345.7202, device='cuda:0')
episode: 435 training return: tensor(-346.4840, device='cuda:0')
epoch: 109 test_true_pfm: 5458.784276206788 sim_pfm: -228.19954115958535
episode: 436 training return: tensor(-395.7283, device='cuda:0')
episode: 437 training return: tensor(-356.3454, device='cuda:0')
episode: 438 training return: tensor(-316.4244, device='cuda:0')
episode: 439 training return: tensor(-439.4629, device='cuda:0')
epoch: 110 test_true_pfm: 5525.294411337526 sim_pfm: -264.9048352513831
episode: 440 training return: tensor(-320.9921, device='cuda:0')
episode: 441 training return: tensor(-416.8679, device='cuda:0')
episode: 442 training return: tensor(-297.0099, device='cuda:0')
episode: 443 training return: tensor(-343.6280, device='cuda:0')
epoch: 111 test_true_pfm: 6055.334116218142 sim_pfm: -259.054097908452
episode: 444 training return: tensor(-355.6754, device='cuda:0')
episode: 445 training return: tensor(-338.8308, device='cuda:0')
episode: 446 training return: tensor(-366.8788, device='cuda:0')
episode: 447 training return: tensor(-338.0255, device='cuda:0')
epoch: 112 test_true_pfm: 5458.194371882576 sim_pfm: -341.3630541515401
episode: 448 training return: tensor(-830.5432, device='cuda:0')
episode: 449 training return: tensor(-306.1015, device='cuda:0')
episode: 450 training return: tensor(-258.3143, device='cuda:0')
episode: 451 training return: tensor(-301.0985, device='cuda:0')
epoch: 113 test_true_pfm: 5549.679355076383 sim_pfm: -228.84058267720198
episode: 452 training return: tensor(-374.8715, device='cuda:0')
episode: 453 training return: tensor(-301.8539, device='cuda:0')
episode: 454 training return: tensor(-424.8264, device='cuda:0')
episode: 455 training return: tensor(-429.7920, device='cuda:0')
epoch: 114 test_true_pfm: 5521.775075709411 sim_pfm: -253.78040639464356
episode: 456 training return: tensor(-261.7724, device='cuda:0')
episode: 457 training return: tensor(-298.7872, device='cuda:0')
episode: 458 training return: tensor(-274.4868, device='cuda:0')
episode: 459 training return: tensor(-257.6246, device='cuda:0')
epoch: 115 test_true_pfm: 5554.715516090433 sim_pfm: -265.1740790408318
episode: 460 training return: tensor(-259.9949, device='cuda:0')
episode: 461 training return: tensor(-314.1895, device='cuda:0')
episode: 462 training return: tensor(-312.6444, device='cuda:0')
episode: 463 training return: tensor(-329.3923, device='cuda:0')
epoch: 116 test_true_pfm: 5833.165216046434 sim_pfm: -276.4827526950394
episode: 464 training return: tensor(-302.8148, device='cuda:0')
episode: 465 training return: tensor(-311.8680, device='cuda:0')
episode: 466 training return: tensor(-312.4093, device='cuda:0')
episode: 467 training return: tensor(-323.7905, device='cuda:0')
epoch: 117 test_true_pfm: 4222.85732262407 sim_pfm: -232.92827333901855
episode: 468 training return: tensor(-385.6494, device='cuda:0')
episode: 469 training return: tensor(-439.6129, device='cuda:0')
episode: 470 training return: tensor(-220.2078, device='cuda:0')
episode: 471 training return: tensor(-412.5000, device='cuda:0')
epoch: 118 test_true_pfm: 5614.483587392329 sim_pfm: -263.1732588731393
episode: 472 training return: tensor(-445.8698, device='cuda:0')
episode: 473 training return: tensor(-385.8340, device='cuda:0')
episode: 474 training return: tensor(-411.3792, device='cuda:0')
episode: 475 training return: tensor(-379.0305, device='cuda:0')
epoch: 119 test_true_pfm: 5552.238824562329 sim_pfm: -273.01776796297054
episode: 476 training return: tensor(-342.4224, device='cuda:0')
episode: 477 training return: tensor(-378.7591, device='cuda:0')
episode: 478 training return: tensor(-336.8943, device='cuda:0')
episode: 479 training return: tensor(-273.2263, device='cuda:0')
epoch: 120 test_true_pfm: 5572.5323199750055 sim_pfm: -250.43230767998224
episode: 480 training return: tensor(-348.7113, device='cuda:0')
episode: 481 training return: tensor(-246.7735, device='cuda:0')
episode: 482 training return: tensor(-284.4706, device='cuda:0')
episode: 483 training return: tensor(-321.9513, device='cuda:0')
epoch: 121 test_true_pfm: 5484.010807568217 sim_pfm: -192.63605354593406
episode: 484 training return: tensor(-323.0385, device='cuda:0')
episode: 485 training return: tensor(-346.4842, device='cuda:0')
episode: 486 training return: tensor(-390.3163, device='cuda:0')
episode: 487 training return: tensor(-293.7261, device='cuda:0')
epoch: 122 test_true_pfm: 5547.123172153223 sim_pfm: -297.6021626271734
episode: 488 training return: tensor(-384.0379, device='cuda:0')
episode: 489 training return: tensor(-435.5463, device='cuda:0')
episode: 490 training return: tensor(-364.9547, device='cuda:0')
episode: 491 training return: tensor(-378.9320, device='cuda:0')
epoch: 123 test_true_pfm: 5472.035059287878 sim_pfm: -238.7735206099654
episode: 492 training return: tensor(-369.8942, device='cuda:0')
episode: 493 training return: tensor(-316.8413, device='cuda:0')
episode: 494 training return: tensor(-325.8485, device='cuda:0')
episode: 495 training return: tensor(-380.5154, device='cuda:0')
epoch: 124 test_true_pfm: 5503.264980638003 sim_pfm: -246.97493138653226
episode: 496 training return: tensor(-333.3286, device='cuda:0')
episode: 497 training return: tensor(-265.7433, device='cuda:0')
episode: 498 training return: tensor(-293.3367, device='cuda:0')
episode: 499 training return: tensor(-352.5544, device='cuda:0')
epoch: 125 test_true_pfm: 5763.768946261199 sim_pfm: -207.2646934935377
episode: 500 training return: tensor(-374.1534, device='cuda:0')
episode: 501 training return: tensor(-335.9775, device='cuda:0')
episode: 502 training return: tensor(-316.8482, device='cuda:0')
episode: 503 training return: tensor(-309.1170, device='cuda:0')
epoch: 126 test_true_pfm: 5663.920513084037 sim_pfm: -203.59078497460965
episode: 504 training return: tensor(-270.0608, device='cuda:0')
episode: 505 training return: tensor(-371.3773, device='cuda:0')
episode: 506 training return: tensor(-250.9590, device='cuda:0')
episode: 507 training return: tensor(-344.4486, device='cuda:0')
epoch: 127 test_true_pfm: 5478.51673299935 sim_pfm: -304.2602374528651
episode: 508 training return: tensor(-279.5206, device='cuda:0')
episode: 509 training return: tensor(-260.3924, device='cuda:0')
episode: 510 training return: tensor(-389.8604, device='cuda:0')
episode: 511 training return: tensor(-328.9488, device='cuda:0')
epoch: 128 test_true_pfm: 5416.401342502945 sim_pfm: -252.9337852417084
episode: 512 training return: tensor(-361.7098, device='cuda:0')
episode: 513 training return: tensor(-181.4599, device='cuda:0')
episode: 514 training return: tensor(-338.9523, device='cuda:0')
episode: 515 training return: tensor(-352.6976, device='cuda:0')
epoch: 129 test_true_pfm: 4872.366099249268 sim_pfm: -241.37541111492706
episode: 516 training return: tensor(-273.9243, device='cuda:0')
episode: 517 training return: tensor(-251.5187, device='cuda:0')
episode: 518 training return: tensor(-380.9733, device='cuda:0')
episode: 519 training return: tensor(-350.3500, device='cuda:0')
epoch: 130 test_true_pfm: 5545.610204659191 sim_pfm: -200.77514022558657
episode: 520 training return: tensor(-388.2596, device='cuda:0')
episode: 521 training return: tensor(-335.6385, device='cuda:0')
episode: 522 training return: tensor(-324.0089, device='cuda:0')
episode: 523 training return: tensor(-328.2269, device='cuda:0')
epoch: 131 test_true_pfm: 5587.004053529595 sim_pfm: -241.1968576060705
episode: 524 training return: tensor(-315.8307, device='cuda:0')
episode: 525 training return: tensor(-313.0253, device='cuda:0')
episode: 526 training return: tensor(-302.2936, device='cuda:0')
episode: 527 training return: tensor(-353.2034, device='cuda:0')
epoch: 132 test_true_pfm: 5648.296003938744 sim_pfm: -239.0082886085826
episode: 528 training return: tensor(-277.8159, device='cuda:0')
episode: 529 training return: tensor(-332.5223, device='cuda:0')
episode: 530 training return: tensor(-368.8481, device='cuda:0')
episode: 531 training return: tensor(-368.6295, device='cuda:0')
epoch: 133 test_true_pfm: 6197.960607127109 sim_pfm: -242.28797422206844
episode: 532 training return: tensor(-246.0247, device='cuda:0')
episode: 533 training return: tensor(-364.3957, device='cuda:0')
episode: 534 training return: tensor(-310.4594, device='cuda:0')
episode: 535 training return: tensor(-339.8195, device='cuda:0')
epoch: 134 test_true_pfm: 5535.060485553081 sim_pfm: -263.1381590549524
episode: 536 training return: tensor(-325.5574, device='cuda:0')
episode: 537 training return: tensor(-314.4131, device='cuda:0')
episode: 538 training return: tensor(-311.6027, device='cuda:0')
episode: 539 training return: tensor(-392.8615, device='cuda:0')
epoch: 135 test_true_pfm: 5614.482935382701 sim_pfm: -272.9830578759332
episode: 540 training return: tensor(-354.2818, device='cuda:0')
episode: 541 training return: tensor(-295.2026, device='cuda:0')
episode: 542 training return: tensor(-256.1856, device='cuda:0')
episode: 543 training return: tensor(-242.1665, device='cuda:0')
epoch: 136 test_true_pfm: 5597.6279216247885 sim_pfm: -201.68996044141628
episode: 544 training return: tensor(-342.1593, device='cuda:0')
episode: 545 training return: tensor(-325.8843, device='cuda:0')
episode: 546 training return: tensor(-386.8728, device='cuda:0')
episode: 547 training return: tensor(-453.7453, device='cuda:0')
epoch: 137 test_true_pfm: 5491.480948410445 sim_pfm: -219.19573918614574
episode: 548 training return: tensor(-298.2670, device='cuda:0')
episode: 549 training return: tensor(-356.0893, device='cuda:0')
episode: 550 training return: tensor(-327.1907, device='cuda:0')
episode: 551 training return: tensor(-312.0155, device='cuda:0')
epoch: 138 test_true_pfm: 5578.423624695621 sim_pfm: -177.6857420883025
episode: 552 training return: tensor(-311.8530, device='cuda:0')
episode: 553 training return: tensor(-324.8250, device='cuda:0')
episode: 554 training return: tensor(-230.1175, device='cuda:0')
episode: 555 training return: tensor(-354.9229, device='cuda:0')
epoch: 139 test_true_pfm: 5578.483365191981 sim_pfm: -245.0713285659925
episode: 556 training return: tensor(-281.0503, device='cuda:0')
episode: 557 training return: tensor(-257.5910, device='cuda:0')
episode: 558 training return: tensor(-272.6476, device='cuda:0')
episode: 559 training return: tensor(-302.7813, device='cuda:0')
epoch: 140 test_true_pfm: 5679.660513902135 sim_pfm: -208.99684758292278
episode: 560 training return: tensor(-355.4054, device='cuda:0')
episode: 561 training return: tensor(-433.9359, device='cuda:0')
episode: 562 training return: tensor(-265.9037, device='cuda:0')
episode: 563 training return: tensor(-422.0694, device='cuda:0')
epoch: 141 test_true_pfm: 5559.782069049023 sim_pfm: -224.46446762126288
episode: 564 training return: tensor(-326.6740, device='cuda:0')
episode: 565 training return: tensor(-213.0096, device='cuda:0')
episode: 566 training return: tensor(-276.9673, device='cuda:0')
episode: 567 training return: tensor(-350.6863, device='cuda:0')
epoch: 142 test_true_pfm: 5515.722898596079 sim_pfm: -260.82318733436597
episode: 568 training return: tensor(-239.4231, device='cuda:0')
episode: 569 training return: tensor(-296.3297, device='cuda:0')
episode: 570 training return: tensor(-361.6047, device='cuda:0')
episode: 571 training return: tensor(-240.9402, device='cuda:0')
epoch: 143 test_true_pfm: 5993.5376646581235 sim_pfm: -183.05444616334475
episode: 572 training return: tensor(-328.9700, device='cuda:0')
episode: 573 training return: tensor(-322.5010, device='cuda:0')
episode: 574 training return: tensor(-273.1998, device='cuda:0')
episode: 575 training return: tensor(-296.7127, device='cuda:0')
epoch: 144 test_true_pfm: 5576.263108122418 sim_pfm: -187.09687983297044
episode: 576 training return: tensor(-355.1238, device='cuda:0')
episode: 577 training return: tensor(-279.6916, device='cuda:0')
episode: 578 training return: tensor(-331.9472, device='cuda:0')
episode: 579 training return: tensor(-315.0767, device='cuda:0')
epoch: 145 test_true_pfm: 5516.7734511753215 sim_pfm: -265.3433859590344
episode: 580 training return: tensor(-325.3260, device='cuda:0')
episode: 581 training return: tensor(-392.8918, device='cuda:0')
episode: 582 training return: tensor(-379.1661, device='cuda:0')
episode: 583 training return: tensor(-396.0269, device='cuda:0')
epoch: 146 test_true_pfm: 5490.031363849844 sim_pfm: -231.8528164323749
episode: 584 training return: tensor(-284.9776, device='cuda:0')
episode: 585 training return: tensor(-306.3786, device='cuda:0')
episode: 586 training return: tensor(-280.9642, device='cuda:0')
episode: 587 training return: tensor(-313.3020, device='cuda:0')
epoch: 147 test_true_pfm: 5561.901805437272 sim_pfm: -250.80029313000463
episode: 588 training return: tensor(-332.3550, device='cuda:0')
episode: 589 training return: tensor(-284.6330, device='cuda:0')
episode: 590 training return: tensor(-242.1316, device='cuda:0')
episode: 591 training return: tensor(-342.0683, device='cuda:0')
epoch: 148 test_true_pfm: 5538.532252357013 sim_pfm: -218.0902780694693
episode: 592 training return: tensor(-317.6161, device='cuda:0')
episode: 593 training return: tensor(-246.1415, device='cuda:0')
episode: 594 training return: tensor(-284.9319, device='cuda:0')
episode: 595 training return: tensor(-245.1475, device='cuda:0')
epoch: 149 test_true_pfm: 5518.744200404194 sim_pfm: -235.74337118434292
episode: 596 training return: tensor(-471.0786, device='cuda:0')
episode: 597 training return: tensor(-325.3088, device='cuda:0')
episode: 598 training return: tensor(-220.2590, device='cuda:0')
episode: 599 training return: tensor(-335.2502, device='cuda:0')
epoch: 150 test_true_pfm: 5563.164190678875 sim_pfm: -236.39631745735338
