['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'behavior', '--traj', 'mixed', '--seed', '4', '--data', '100000']
epoch: 0 training_loss 0.21099705003201963 test_loss: 0.1474675416946411
epoch: 1 training_loss 0.14685961030423642 test_loss: 0.12593111991882325
epoch: 2 training_loss 0.12928684748709202 test_loss: 0.1265464186668396
epoch: 3 training_loss 0.12398122906684876 test_loss: 0.1113128423690796
epoch: 4 training_loss 0.11808702941983938 test_loss: 0.12220233678817749
epoch: 5 training_loss 0.11439988300204278 test_loss: 0.12088510990142823
epoch: 6 training_loss 0.11287106566131115 test_loss: 0.11403049230575561
epoch: 7 training_loss 0.11066160421818495 test_loss: 0.10997713804244995
epoch: 8 training_loss 0.11202572472393513 test_loss: 0.1027155876159668
epoch: 9 training_loss 0.11072258751839399 test_loss: 0.11006842851638794
epoch: 10 training_loss 0.10853391770273448 test_loss: 0.10001573562622071
epoch: 11 training_loss 0.10801810812205076 test_loss: 0.1158901333808899
epoch: 12 training_loss 0.10591630164533854 test_loss: 0.09681652188301086
epoch: 13 training_loss 0.10107434816658496 test_loss: 0.10651832818984985
epoch: 14 training_loss 0.10596210390329361 test_loss: 0.08971676826477051
epoch: 15 training_loss 0.10015981018543244 test_loss: 0.10508737564086915
epoch: 16 training_loss 0.09807398919016123 test_loss: 0.10774812698364258
epoch: 17 training_loss 0.09913538757711648 test_loss: 0.08976811766624451
epoch: 18 training_loss 0.09729972336441278 test_loss: 0.0876656413078308
epoch: 19 training_loss 0.10386025935411453 test_loss: 0.09683137536048889
epoch: 20 training_loss 0.09926867511123419 test_loss: 0.097760009765625
epoch: 21 training_loss 0.09100215725600719 test_loss: 0.08542460799217225
epoch: 22 training_loss 0.08976248182356357 test_loss: 0.10400086641311646
epoch: 23 training_loss 0.09238011416047812 test_loss: 0.106899094581604
epoch: 24 training_loss 0.08739386118017137 test_loss: 0.09158976078033447
epoch: 25 training_loss 0.08695720219984651 test_loss: 0.08992007374763489
epoch: 26 training_loss 0.08554953381419182 test_loss: 0.08671479821205139
epoch: 27 training_loss 0.0918473094329238 test_loss: 0.09814401865005493
epoch: 28 training_loss 0.08197486393153668 test_loss: 0.08205239176750183
epoch: 29 training_loss 0.0816222295537591 test_loss: 0.08472646474838257
epoch: 30 training_loss 0.08669461611658334 test_loss: 0.10206748247146606
epoch: 31 training_loss 0.07806326163932681 test_loss: 0.08759859800338746
epoch: 32 training_loss 0.07958401078358293 test_loss: 0.07480807900428772
epoch: 33 training_loss 0.07787560377269984 test_loss: 0.06673353314399719
epoch: 34 training_loss 0.0804552673175931 test_loss: 0.07950799465179444
epoch: 35 training_loss 0.07835715314373375 test_loss: 0.08695166110992432
epoch: 36 training_loss 0.07726563641801476 test_loss: 0.08047857880592346
epoch: 37 training_loss 0.07341647505760193 test_loss: 0.08031914234161378
epoch: 38 training_loss 0.07547797558829189 test_loss: 0.07202727794647217
epoch: 39 training_loss 0.07566083438694476 test_loss: 0.07824884653091431
epoch: 40 training_loss 0.07959948308765888 test_loss: 0.07520607709884644
epoch: 41 training_loss 0.07564013050869108 test_loss: 0.08642243146896363
epoch: 42 training_loss 0.0738920664228499 test_loss: 0.07882934808731079
epoch: 43 training_loss 0.07547760447487235 test_loss: 0.07774482369422912
epoch: 44 training_loss 0.06639175051823258 test_loss: 0.07723132371902466
epoch: 45 training_loss 0.07244398940354585 test_loss: 0.06664015650749207
epoch: 46 training_loss 0.07023017628118396 test_loss: 0.07504776120185852
epoch: 47 training_loss 0.07568748435005546 test_loss: 0.06951975226402282
epoch: 48 training_loss 0.07811350539326668 test_loss: 0.07724342346191407
epoch: 49 training_loss 0.06708349954336881 test_loss: 0.07717580199241639
epoch: 50 training_loss 0.0729644806869328 test_loss: 0.0747740924358368
epoch: 51 training_loss 0.07173700543120504 test_loss: 0.06939411163330078
epoch: 52 training_loss 0.07066103523597121 test_loss: 0.09453867077827453
epoch: 53 training_loss 0.07202854068949818 test_loss: 0.08382325172424317
epoch: 54 training_loss 0.06976742388680578 test_loss: 0.07756085991859436
epoch: 55 training_loss 0.0694521900266409 test_loss: 0.0627186119556427
epoch: 56 training_loss 0.07280305711552501 test_loss: 0.07771984338760377
epoch: 57 training_loss 0.0686894195433706 test_loss: 0.06923809051513671
epoch: 58 training_loss 0.06910637440159917 test_loss: 0.07701693177223205
epoch: 59 training_loss 0.07366274876520038 test_loss: 0.06610924005508423
epoch: 60 training_loss 0.07301282892003655 test_loss: 0.07339195609092712
epoch: 61 training_loss 0.06783731684088706 test_loss: 0.06295815706253052
epoch: 62 training_loss 0.06776500137522817 test_loss: 0.06983264088630677
epoch: 63 training_loss 0.06477375097572803 test_loss: 0.06741666197776794
epoch: 64 training_loss 0.07056563891470433 test_loss: 0.07094606161117553
epoch: 65 training_loss 0.06593537321314216 test_loss: 0.0642568290233612
epoch: 66 training_loss 0.06565703798085451 test_loss: 0.06033933162689209
epoch: 67 training_loss 0.06710276521742344 test_loss: 0.08298982381820678
epoch: 68 training_loss 0.06819416457787156 test_loss: 0.05556889176368714
epoch: 69 training_loss 0.06525751710869372 test_loss: 0.07623429894447327
epoch: 70 training_loss 0.06789529971778392 test_loss: 0.06227209568023682
epoch: 71 training_loss 0.07231791259720921 test_loss: 0.0720732033252716
epoch: 72 training_loss 0.07146663853898644 test_loss: 0.06748011708259583
epoch: 73 training_loss 0.0659938936214894 test_loss: 0.06126958727836609
epoch: 74 training_loss 0.06566501691937447 test_loss: 0.057481133937835695
epoch: 75 training_loss 0.06374037457630038 test_loss: 0.06677216291427612
epoch: 76 training_loss 0.0700413727760315 test_loss: 0.07167739272117615
epoch: 77 training_loss 0.07399575136601926 test_loss: 0.0701374351978302
epoch: 78 training_loss 0.06709128549322486 test_loss: 0.06847743391990661
epoch: 79 training_loss 0.06354297642596066 test_loss: 0.0746328890323639
epoch: 80 training_loss 0.06930365308187902 test_loss: 0.0727466344833374
epoch: 81 training_loss 0.06398978857323527 test_loss: 0.060290127992630005
epoch: 82 training_loss 0.06576137203723192 test_loss: 0.0765856683254242
epoch: 83 training_loss 0.06790342977270485 test_loss: 0.07010256648063659
epoch: 84 training_loss 0.06932891821488738 test_loss: 0.07733182907104492
epoch: 85 training_loss 0.07372881587594747 test_loss: 0.07008243203163148
epoch: 86 training_loss 0.06817899017594754 test_loss: 0.05764536857604981
epoch: 87 training_loss 0.06765399375930428 test_loss: 0.07150877118110657
epoch: 88 training_loss 0.07090554650872946 test_loss: 0.06877949237823486
epoch: 89 training_loss 0.06621252121403813 test_loss: 0.0653312623500824
epoch: 90 training_loss 0.0709516331832856 test_loss: 0.06726567149162292
epoch: 91 training_loss 0.06774830128066242 test_loss: 0.067183518409729
epoch: 92 training_loss 0.06548071749508382 test_loss: 0.05612252950668335
epoch: 93 training_loss 0.06937272479757667 test_loss: 0.08619054555892944
epoch: 94 training_loss 0.06790797853842377 test_loss: 0.07080814242362976
epoch: 95 training_loss 0.06654119351878762 test_loss: 0.07793958187103271
epoch: 96 training_loss 0.07019613499753177 test_loss: 0.059338217973709105
epoch: 97 training_loss 0.06409000027924776 test_loss: 0.06529805660247803
epoch: 98 training_loss 0.06360363613814116 test_loss: 0.057227426767349245
epoch: 99 training_loss 0.06958594501018524 test_loss: 0.0691652774810791
epoch: 100 training_loss 0.0662592538818717 test_loss: 0.06485854983329772
epoch: 101 training_loss 0.0669070528075099 test_loss: 0.07344198822975159
epoch: 102 training_loss 0.06560166902840138 test_loss: 0.06677486896514892
epoch: 103 training_loss 0.0673044556286186 test_loss: 0.07004227042198181
epoch: 104 training_loss 0.06049954765476286 test_loss: 0.06725945472717285
epoch: 105 training_loss 0.06471591759473086 test_loss: 0.06596308946609497
epoch: 106 training_loss 0.06496188336983323 test_loss: 0.07750228643417359
epoch: 107 training_loss 0.0715257415920496 test_loss: 0.08020208477973938
epoch: 108 training_loss 0.06579721674323082 test_loss: 0.07352047562599182
epoch: 109 training_loss 0.0680641576834023 test_loss: 0.07390249371528626
epoch: 110 training_loss 0.061877442952245475 test_loss: 0.057064175605773926
epoch: 111 training_loss 0.0679590159561485 test_loss: 0.0531747579574585
epoch: 112 training_loss 0.06595572013407945 test_loss: 0.06849702596664428
epoch: 113 training_loss 0.06613711988553404 test_loss: 0.06314520239830017
epoch: 114 training_loss 0.06830334978178143 test_loss: 0.06487631201744079
epoch: 115 training_loss 0.06887170559726656 test_loss: 0.06310171484947205
epoch: 116 training_loss 0.06985233603045345 test_loss: 0.07404300570487976
epoch: 117 training_loss 0.06520685165189206 test_loss: 0.0691521406173706
epoch: 118 training_loss 0.06579284209758043 test_loss: 0.05789967775344849
epoch: 119 training_loss 0.06588619347661734 test_loss: 0.06823402643203735
epoch: 120 training_loss 0.065620006788522 test_loss: 0.07155157923698426
epoch: 121 training_loss 0.06891705354675651 test_loss: 0.07117655277252197
epoch: 122 training_loss 0.06082198375836015 test_loss: 0.06934669017791747
epoch: 123 training_loss 0.06425221983343363 test_loss: 0.06821423172950744
epoch: 124 training_loss 0.06754861261695623 test_loss: 0.06363256573677063
epoch: 125 training_loss 0.06451065361499786 test_loss: 0.059501713514328
epoch: 126 training_loss 0.06502164537087082 test_loss: 0.07793456912040711
epoch: 127 training_loss 0.06831579451449216 test_loss: 0.06912904381752014
epoch: 128 training_loss 0.06152957441285253 test_loss: 0.06521736979484558
epoch: 129 training_loss 0.06437529794871807 test_loss: 0.06162288188934326
epoch: 130 training_loss 0.06992877006530762 test_loss: 0.07444655895233154
epoch: 131 training_loss 0.06556379498913883 test_loss: 0.06927539110183716
epoch: 132 training_loss 0.07001975430175662 test_loss: 0.06204524040222168
epoch: 133 training_loss 0.0638008495233953 test_loss: 0.07031255364418029
epoch: 134 training_loss 0.0664769197627902 test_loss: 0.060341262817382814
epoch: 135 training_loss 0.064879647269845 test_loss: 0.06739698052406311
epoch: 136 training_loss 0.0637868553120643 test_loss: 0.06664515137672425
epoch: 137 training_loss 0.06536207176744938 test_loss: 0.07482086420059204
epoch: 138 training_loss 0.06944587947800755 test_loss: 0.0649030327796936
epoch: 139 training_loss 0.0630525386147201 test_loss: 0.07810041308403015
epoch: 140 training_loss 0.06379169348627328 test_loss: 0.06499760746955871
epoch: 141 training_loss 0.06627099723555148 test_loss: 0.0678730309009552
epoch: 142 training_loss 0.06411653907969594 test_loss: 0.06420711278915406
epoch: 143 training_loss 0.06738306412473322 test_loss: 0.06934573650360107
epoch: 144 training_loss 0.05872620748355985 test_loss: 0.061961311101913455
epoch: 145 training_loss 0.06653497045394033 test_loss: 0.07454100847244263
epoch: 146 training_loss 0.06002527590841055 test_loss: 0.059984844923019406
epoch: 147 training_loss 0.06647643953561783 test_loss: 0.07849363088607789
epoch: 148 training_loss 0.06952856566756964 test_loss: 0.06361886858940125
epoch: 149 training_loss 0.0678688205871731 test_loss: 0.06824953556060791
epoch: 0 training_loss 58.30361225128174 test_loss: 35.06623840332031
epoch: 1 training_loss 27.75344856262207 test_loss: 22.65294189453125
epoch: 2 training_loss 20.740286464691163 test_loss: 19.1216796875
epoch: 3 training_loss 17.530980854034425 test_loss: 16.330137634277342
epoch: 4 training_loss 15.215540342330932 test_loss: 14.405845642089844
epoch: 5 training_loss 13.693666305541992 test_loss: 13.188986206054688
epoch: 6 training_loss 12.38722043991089 test_loss: 12.253790283203125
epoch: 7 training_loss 11.3839058303833 test_loss: 10.707472229003907
epoch: 8 training_loss 10.55775987625122 test_loss: 9.952467346191407
epoch: 9 training_loss 9.8290461063385 test_loss: 9.39282455444336
epoch: 10 training_loss 9.110799331665039 test_loss: 8.965953826904297
epoch: 11 training_loss 8.880986843109131 test_loss: 8.611023712158204
epoch: 12 training_loss 8.40473156452179 test_loss: 8.014982604980469
epoch: 13 training_loss 8.144651956558228 test_loss: 7.849039459228516
epoch: 14 training_loss 7.787024617195129 test_loss: 7.763263702392578
epoch: 15 training_loss 7.610586242675781 test_loss: 7.326173400878906
epoch: 16 training_loss 7.180741534233094 test_loss: 6.818013763427734
epoch: 17 training_loss 6.869671397209167 test_loss: 6.763877868652344
epoch: 18 training_loss 6.645382108688355 test_loss: 6.417531585693359
epoch: 19 training_loss 6.63463659286499 test_loss: 6.318075180053711
epoch: 20 training_loss 6.426121430397034 test_loss: 6.244860076904297
epoch: 21 training_loss 6.2848122024536135 test_loss: 6.084740066528321
epoch: 22 training_loss 6.127587614059448 test_loss: 6.051216125488281
epoch: 23 training_loss 5.92425217628479 test_loss: 5.86773796081543
epoch: 24 training_loss 5.9887559652328495 test_loss: 6.0101573944091795
epoch: 25 training_loss 5.7542371702194215 test_loss: 5.753866577148438
epoch: 26 training_loss 5.70030339717865 test_loss: 5.57410774230957
epoch: 27 training_loss 5.6475680589675905 test_loss: 5.657839584350586
epoch: 28 training_loss 5.591505246162415 test_loss: 5.503867721557617
epoch: 29 training_loss 5.517938437461853 test_loss: 5.231909561157226
epoch: 30 training_loss 5.3748850440979 test_loss: 5.296286010742188
epoch: 31 training_loss 5.360867314338684 test_loss: 5.307458114624024
epoch: 32 training_loss 5.296692447662354 test_loss: 5.289694595336914
epoch: 33 training_loss 5.105110340118408 test_loss: 4.903044509887695
epoch: 34 training_loss 5.147921915054321 test_loss: 4.932159042358398
epoch: 35 training_loss 5.0763746356964115 test_loss: 4.7861286163330075
epoch: 36 training_loss 4.96941680431366 test_loss: 4.762020111083984
epoch: 37 training_loss 5.010887084007263 test_loss: 4.916894149780274
epoch: 38 training_loss 5.0395661330223085 test_loss: 4.892890930175781
epoch: 39 training_loss 4.8905865406990054 test_loss: 4.9216564178466795
epoch: 40 training_loss 4.7891969180107115 test_loss: 4.606537628173828
epoch: 41 training_loss 4.7102383255958555 test_loss: 4.625036239624023
epoch: 42 training_loss 4.638083343505859 test_loss: 4.44133415222168
epoch: 43 training_loss 4.746056368350983 test_loss: 4.601739120483399
epoch: 44 training_loss 4.6684904646873475 test_loss: 4.497956848144531
epoch: 45 training_loss 4.637237949371338 test_loss: 4.4303642272949215
epoch: 46 training_loss 4.547953836917877 test_loss: 4.491632461547852
epoch: 47 training_loss 4.526033399105072 test_loss: 4.268779754638672
epoch: 48 training_loss 4.503213477134705 test_loss: 4.499786376953125
epoch: 49 training_loss 4.447920455932617 test_loss: 4.34296989440918
epoch: 50 training_loss 4.318528680801392 test_loss: 4.454017257690429
epoch: 51 training_loss 4.285752000808716 test_loss: 4.151198196411133
epoch: 52 training_loss 4.309972329139709 test_loss: 4.259576034545899
epoch: 53 training_loss 4.291881246566772 test_loss: 4.154840469360352
epoch: 54 training_loss 4.266255736351013 test_loss: 4.1636604309082035
epoch: 55 training_loss 4.214573011398316 test_loss: 4.014280700683594
epoch: 56 training_loss 4.1923876881599424 test_loss: 4.122221755981445
epoch: 57 training_loss 4.153598382472992 test_loss: 4.122644424438477
epoch: 58 training_loss 4.1578922367095945 test_loss: 3.958772659301758
epoch: 59 training_loss 4.074471929073334 test_loss: 3.9739849090576174
epoch: 60 training_loss 4.014111993312835 test_loss: 3.9358379364013674
epoch: 61 training_loss 4.0491774368286135 test_loss: 3.92882080078125
epoch: 62 training_loss 4.00522109746933 test_loss: 3.9580673217773437
epoch: 63 training_loss 4.012403159141541 test_loss: 3.9580554962158203
epoch: 64 training_loss 3.978811469078064 test_loss: 3.937057876586914
epoch: 65 training_loss 3.9644802904129026 test_loss: 3.8080596923828125
epoch: 66 training_loss 3.9417787170410157 test_loss: 3.6672325134277344
epoch: 67 training_loss 3.8561079025268556 test_loss: 3.7335018157958983
epoch: 68 training_loss 3.89935754776001 test_loss: 3.7689437866210938
epoch: 69 training_loss 3.744012596607208 test_loss: 3.8322002410888674
epoch: 70 training_loss 3.8506118726730345 test_loss: 3.746042251586914
epoch: 71 training_loss 3.894234619140625 test_loss: 3.7752334594726564
epoch: 72 training_loss 3.8358366751670836 test_loss: 3.7601104736328126
epoch: 73 training_loss 3.824126653671265 test_loss: 3.490341567993164
epoch: 74 training_loss 3.7796825432777403 test_loss: 3.8301647186279295
epoch: 75 training_loss 3.74407012462616 test_loss: 3.734357452392578
epoch: 76 training_loss 3.7565910816192627 test_loss: 3.6581615447998046
epoch: 77 training_loss 3.784655759334564 test_loss: 3.523876953125
epoch: 78 training_loss 3.739357895851135 test_loss: 3.6572185516357423
epoch: 79 training_loss 3.6707954454421996 test_loss: 3.5698837280273437
epoch: 80 training_loss 3.668662261962891 test_loss: 3.791892242431641
epoch: 81 training_loss 3.6903054785728453 test_loss: 3.5981555938720704
epoch: 82 training_loss 3.5903191637992857 test_loss: 3.569253158569336
epoch: 83 training_loss 3.595305542945862 test_loss: 3.612252426147461
epoch: 84 training_loss 3.6071012139320375 test_loss: 3.509236145019531
epoch: 85 training_loss 3.597314467430115 test_loss: 3.6674335479736326
epoch: 86 training_loss 3.521387641429901 test_loss: 3.6846466064453125
epoch: 87 training_loss 3.6403722381591797 test_loss: 3.4301048278808595
epoch: 88 training_loss 3.6178621363639833 test_loss: 3.306737518310547
epoch: 89 training_loss 3.536986997127533 test_loss: 3.584827423095703
epoch: 90 training_loss 3.522279589176178 test_loss: 3.508985137939453
epoch: 91 training_loss 3.578167381286621 test_loss: 3.6233997344970703
epoch: 92 training_loss 3.5579394102096558 test_loss: 3.3105987548828124
epoch: 93 training_loss 3.464666748046875 test_loss: 3.544381332397461
epoch: 94 training_loss 3.5074434018135072 test_loss: 3.326934814453125
epoch: 95 training_loss 3.457674653530121 test_loss: 3.3295333862304686
epoch: 96 training_loss 3.4083092164993287 test_loss: 3.540261459350586
epoch: 97 training_loss 3.4391071915626528 test_loss: 3.5918952941894533
epoch: 98 training_loss 3.387246918678284 test_loss: 3.392858123779297
epoch: 99 training_loss 3.415853941440582 test_loss: 3.4040298461914062
epoch: 100 training_loss 3.4414516258239747 test_loss: 3.436057281494141
epoch: 101 training_loss 3.4030928564071656 test_loss: 3.2114547729492187
epoch: 102 training_loss 3.4335171675682066 test_loss: 3.502139663696289
epoch: 103 training_loss 3.4006596302986143 test_loss: 3.409272003173828
epoch: 104 training_loss 3.3848769950866697 test_loss: 3.395458221435547
epoch: 105 training_loss 3.339039764404297 test_loss: 3.3928302764892577
epoch: 106 training_loss 3.3169316053390503 test_loss: 3.1170366287231444
epoch: 107 training_loss 3.352281527519226 test_loss: 3.2727333068847657
epoch: 108 training_loss 3.309345395565033 test_loss: 3.341450500488281
epoch: 109 training_loss 3.29961177110672 test_loss: 3.2491390228271486
epoch: 110 training_loss 3.293300986289978 test_loss: 3.2148422241210937
epoch: 111 training_loss 3.3370709586143494 test_loss: 3.432296371459961
epoch: 112 training_loss 3.2968638730049133 test_loss: 3.049616241455078
epoch: 113 training_loss 3.2901828956604002 test_loss: 3.2272335052490235
epoch: 114 training_loss 3.2648857164382936 test_loss: 3.1724987030029297
epoch: 115 training_loss 3.282679274082184 test_loss: 3.208683395385742
epoch: 116 training_loss 3.202001008987427 test_loss: 3.119724655151367
epoch: 117 training_loss 3.310367531776428 test_loss: 3.2070545196533202
epoch: 118 training_loss 3.176040623188019 test_loss: 3.1780315399169923
epoch: 119 training_loss 3.20557981967926 test_loss: 3.2577484130859373
epoch: 120 training_loss 3.138905420303345 test_loss: 3.111118125915527
epoch: 121 training_loss 3.251299271583557 test_loss: 3.2670051574707033
epoch: 122 training_loss 3.2004862689971922 test_loss: 3.204911804199219
epoch: 123 training_loss 3.18336688041687 test_loss: 3.13590087890625
epoch: 124 training_loss 3.155430121421814 test_loss: 3.2012489318847654
epoch: 125 training_loss 3.1588732171058655 test_loss: 3.0945945739746095
epoch: 126 training_loss 3.1468442416191102 test_loss: 3.079383659362793
epoch: 127 training_loss 3.1646248483657837 test_loss: 3.0448749542236326
epoch: 128 training_loss 3.1640446472167967 test_loss: 3.092936706542969
epoch: 129 training_loss 3.1299509024620056 test_loss: 3.0251480102539063
epoch: 130 training_loss 3.144808521270752 test_loss: 3.0946687698364257
epoch: 131 training_loss 3.123276138305664 test_loss: 2.939764404296875
epoch: 132 training_loss 3.130941114425659 test_loss: 3.0867849349975587
epoch: 133 training_loss 3.1782331323623656 test_loss: 3.0376800537109374
epoch: 134 training_loss 3.049065706729889 test_loss: 3.039290428161621
epoch: 135 training_loss 3.02622594833374 test_loss: 2.9676082611083983
epoch: 136 training_loss 3.0272272706031798 test_loss: 3.0170051574707033
epoch: 137 training_loss 3.110882875919342 test_loss: 3.1192516326904296
epoch: 138 training_loss 3.070343668460846 test_loss: 3.0488548278808594
epoch: 139 training_loss 3.0617307567596437 test_loss: 2.9136659622192385
epoch: 140 training_loss 3.1002921438217164 test_loss: 3.1217397689819335
epoch: 141 training_loss 3.049333682060242 test_loss: 2.8929481506347656
epoch: 142 training_loss 3.038582053184509 test_loss: 3.0031196594238283
epoch: 143 training_loss 3.1057214021682737 test_loss: 3.0214399337768554
epoch: 144 training_loss 3.034256718158722 test_loss: 2.9667945861816407
epoch: 145 training_loss 3.035490071773529 test_loss: 3.009963607788086
epoch: 146 training_loss 2.9534764742851256 test_loss: 2.9813682556152346
epoch: 147 training_loss 3.036885192394257 test_loss: 2.9474327087402346
epoch: 148 training_loss 2.9697920989990236 test_loss: 3.118768310546875
epoch: 149 training_loss 2.9663416862487795 test_loss: 2.997566795349121
5141.121660530651
episode: 0 training return: tensor(-560.8066, device='cuda:0')
episode: 1 training return: tensor(-797.0988, device='cuda:0')
episode: 2 training return: tensor(-550.2601, device='cuda:0')
episode: 3 training return: tensor(-528.0592, device='cuda:0')
epoch: 1 test_true_pfm: 5130.871407472822 sim_pfm: -405.08290820851107
episode: 4 training return: tensor(-484.6415, device='cuda:0')
episode: 5 training return: tensor(-506.0009, device='cuda:0')
episode: 6 training return: tensor(-461.7142, device='cuda:0')
episode: 7 training return: tensor(-427.5583, device='cuda:0')
epoch: 2 test_true_pfm: 5136.194246788739 sim_pfm: -432.63278455278487
episode: 8 training return: tensor(-469.6523, device='cuda:0')
episode: 9 training return: tensor(-516.1604, device='cuda:0')
episode: 10 training return: tensor(-584.0040, device='cuda:0')
episode: 11 training return: tensor(-403.5179, device='cuda:0')
epoch: 3 test_true_pfm: 4982.7287952148245 sim_pfm: -440.0717625376904
episode: 12 training return: tensor(-398.0994, device='cuda:0')
episode: 13 training return: tensor(-518.4670, device='cuda:0')
episode: 14 training return: tensor(-345.2364, device='cuda:0')
episode: 15 training return: tensor(-547.5303, device='cuda:0')
epoch: 4 test_true_pfm: 4994.783774031017 sim_pfm: -597.4015756165996
episode: 16 training return: tensor(-453.7360, device='cuda:0')
episode: 17 training return: tensor(-520.5273, device='cuda:0')
episode: 18 training return: tensor(-488.9225, device='cuda:0')
episode: 19 training return: tensor(-360.5721, device='cuda:0')
epoch: 5 test_true_pfm: 5192.700309010051 sim_pfm: -437.68968420367065
episode: 20 training return: tensor(-439.7427, device='cuda:0')
episode: 21 training return: tensor(-470.4949, device='cuda:0')
episode: 22 training return: tensor(-496.1778, device='cuda:0')
episode: 23 training return: tensor(-884.4372, device='cuda:0')
epoch: 6 test_true_pfm: 5123.757841101514 sim_pfm: -424.0267563596232
episode: 24 training return: tensor(-396.2226, device='cuda:0')
episode: 25 training return: tensor(-444.4660, device='cuda:0')
episode: 26 training return: tensor(-558.6278, device='cuda:0')
episode: 27 training return: tensor(-497.6340, device='cuda:0')
epoch: 7 test_true_pfm: 5223.823073366732 sim_pfm: -395.98269276635256
episode: 28 training return: tensor(-476.1093, device='cuda:0')
episode: 29 training return: tensor(-426.5421, device='cuda:0')
episode: 30 training return: tensor(-441.3568, device='cuda:0')
episode: 31 training return: tensor(-473.9467, device='cuda:0')
epoch: 8 test_true_pfm: 5073.279858300175 sim_pfm: -481.5317637324042
episode: 32 training return: tensor(-389.5625, device='cuda:0')
episode: 33 training return: tensor(-521.7684, device='cuda:0')
episode: 34 training return: tensor(-454.8752, device='cuda:0')
episode: 35 training return: tensor(-509.3347, device='cuda:0')
epoch: 9 test_true_pfm: 5080.451564812988 sim_pfm: -411.7906047688448
episode: 36 training return: tensor(-493.4931, device='cuda:0')
episode: 37 training return: tensor(-410.7959, device='cuda:0')
episode: 38 training return: tensor(-491.9152, device='cuda:0')
episode: 39 training return: tensor(-510.2255, device='cuda:0')
epoch: 10 test_true_pfm: 5133.864778800278 sim_pfm: -566.6016125517878
episode: 40 training return: tensor(-446.0536, device='cuda:0')
episode: 41 training return: tensor(-412.8329, device='cuda:0')
episode: 42 training return: tensor(-398.0503, device='cuda:0')
episode: 43 training return: tensor(-527.3565, device='cuda:0')
epoch: 11 test_true_pfm: 5133.477844964153 sim_pfm: -379.01783181851107
episode: 44 training return: tensor(-522.3721, device='cuda:0')
episode: 45 training return: tensor(-459.7648, device='cuda:0')
episode: 46 training return: tensor(-412.4178, device='cuda:0')
episode: 47 training return: tensor(-430.2166, device='cuda:0')
epoch: 12 test_true_pfm: 5227.579661530329 sim_pfm: -360.098444345155
episode: 48 training return: tensor(-423.4164, device='cuda:0')
episode: 49 training return: tensor(-428.6301, device='cuda:0')
episode: 50 training return: tensor(-414.5372, device='cuda:0')
episode: 51 training return: tensor(-445.5061, device='cuda:0')
epoch: 13 test_true_pfm: 5263.375677328629 sim_pfm: -389.72305979183875
episode: 52 training return: tensor(-387.7619, device='cuda:0')
episode: 53 training return: tensor(-473.9084, device='cuda:0')
episode: 54 training return: tensor(-466.8381, device='cuda:0')
episode: 55 training return: tensor(-442.7404, device='cuda:0')
epoch: 14 test_true_pfm: 5130.966118169418 sim_pfm: -412.33718572411453
episode: 56 training return: tensor(-462.8968, device='cuda:0')
episode: 57 training return: tensor(-561.7350, device='cuda:0')
episode: 58 training return: tensor(-411.8624, device='cuda:0')
episode: 59 training return: tensor(-384.5329, device='cuda:0')
epoch: 15 test_true_pfm: 5302.409469174986 sim_pfm: -400.104848574847
episode: 60 training return: tensor(-488.3810, device='cuda:0')
episode: 61 training return: tensor(-492.7498, device='cuda:0')
episode: 62 training return: tensor(-467.3745, device='cuda:0')
episode: 63 training return: tensor(-419.8848, device='cuda:0')
epoch: 16 test_true_pfm: 5179.448099847369 sim_pfm: -370.6364622017524
episode: 64 training return: tensor(-469.7088, device='cuda:0')
episode: 65 training return: tensor(-507.6117, device='cuda:0')
episode: 66 training return: tensor(-472.3275, device='cuda:0')
episode: 67 training return: tensor(-365.3366, device='cuda:0')
epoch: 17 test_true_pfm: 5192.899821516156 sim_pfm: -394.2411232419545
episode: 68 training return: tensor(-410.9800, device='cuda:0')
episode: 69 training return: tensor(-426.7625, device='cuda:0')
episode: 70 training return: tensor(-396.4951, device='cuda:0')
episode: 71 training return: tensor(-436.7586, device='cuda:0')
epoch: 18 test_true_pfm: 5310.51928183212 sim_pfm: -324.23239237852977
episode: 72 training return: tensor(-407.2519, device='cuda:0')
episode: 73 training return: tensor(-416.2490, device='cuda:0')
episode: 74 training return: tensor(-429.2747, device='cuda:0')
episode: 75 training return: tensor(-402.1681, device='cuda:0')
epoch: 19 test_true_pfm: 5226.320394627965 sim_pfm: -367.6920787114844
episode: 76 training return: tensor(-387.1650, device='cuda:0')
episode: 77 training return: tensor(-452.5024, device='cuda:0')
episode: 78 training return: tensor(-423.7029, device='cuda:0')
episode: 79 training return: tensor(-370.3843, device='cuda:0')
epoch: 20 test_true_pfm: 5299.678094344211 sim_pfm: -357.6925597755083
episode: 80 training return: tensor(-469.1128, device='cuda:0')
episode: 81 training return: tensor(-448.2883, device='cuda:0')
episode: 82 training return: tensor(-427.0944, device='cuda:0')
episode: 83 training return: tensor(-519.2330, device='cuda:0')
epoch: 21 test_true_pfm: 5244.5059542011395 sim_pfm: -352.84298367617885
episode: 84 training return: tensor(-419.4800, device='cuda:0')
episode: 85 training return: tensor(-427.8069, device='cuda:0')
episode: 86 training return: tensor(-472.0975, device='cuda:0')
episode: 87 training return: tensor(-431.4734, device='cuda:0')
epoch: 22 test_true_pfm: 5347.945072044282 sim_pfm: -347.324948014934
episode: 88 training return: tensor(-436.4806, device='cuda:0')
episode: 89 training return: tensor(-407.2084, device='cuda:0')
episode: 90 training return: tensor(-371.0421, device='cuda:0')
episode: 91 training return: tensor(-481.0408, device='cuda:0')
epoch: 23 test_true_pfm: 5260.030488171659 sim_pfm: -518.3366185579216
episode: 92 training return: tensor(-381.8434, device='cuda:0')
episode: 93 training return: tensor(-414.0691, device='cuda:0')
episode: 94 training return: tensor(-397.6094, device='cuda:0')
episode: 95 training return: tensor(-490.4893, device='cuda:0')
epoch: 24 test_true_pfm: 5167.923374796312 sim_pfm: -352.05794859759044
episode: 96 training return: tensor(-454.7062, device='cuda:0')
episode: 97 training return: tensor(-458.9372, device='cuda:0')
episode: 98 training return: tensor(-345.5630, device='cuda:0')
episode: 99 training return: tensor(-341.2870, device='cuda:0')
epoch: 25 test_true_pfm: 5357.183388620659 sim_pfm: -325.4673702941703
episode: 100 training return: tensor(-356.0015, device='cuda:0')
episode: 101 training return: tensor(-379.0075, device='cuda:0')
episode: 102 training return: tensor(-463.2636, device='cuda:0')
episode: 103 training return: tensor(-379.5993, device='cuda:0')
epoch: 26 test_true_pfm: 5286.736792011184 sim_pfm: -335.16295202758437
episode: 104 training return: tensor(-314.9433, device='cuda:0')
episode: 105 training return: tensor(-375.9473, device='cuda:0')
episode: 106 training return: tensor(-468.7219, device='cuda:0')
episode: 107 training return: tensor(-471.6086, device='cuda:0')
epoch: 27 test_true_pfm: 5335.959786935248 sim_pfm: -389.39151495846454
episode: 108 training return: tensor(-410.5740, device='cuda:0')
episode: 109 training return: tensor(-416.0987, device='cuda:0')
episode: 110 training return: tensor(-414.0161, device='cuda:0')
episode: 111 training return: tensor(-272.8990, device='cuda:0')
epoch: 28 test_true_pfm: 5316.413075862947 sim_pfm: -357.6761718491228
episode: 112 training return: tensor(-455.4602, device='cuda:0')
episode: 113 training return: tensor(-459.2353, device='cuda:0')
episode: 114 training return: tensor(-520.4193, device='cuda:0')
episode: 115 training return: tensor(-359.8658, device='cuda:0')
epoch: 29 test_true_pfm: 5217.19078601769 sim_pfm: -360.87048712536733
episode: 116 training return: tensor(-364.0102, device='cuda:0')
episode: 117 training return: tensor(-378.2735, device='cuda:0')
episode: 118 training return: tensor(-367.1420, device='cuda:0')
episode: 119 training return: tensor(-428.2119, device='cuda:0')
epoch: 30 test_true_pfm: 5298.909879838763 sim_pfm: -316.67762979651644
episode: 120 training return: tensor(-405.7573, device='cuda:0')
episode: 121 training return: tensor(-357.4048, device='cuda:0')
episode: 122 training return: tensor(-361.8429, device='cuda:0')
episode: 123 training return: tensor(-284.1567, device='cuda:0')
epoch: 31 test_true_pfm: 5324.982129867691 sim_pfm: -326.66299447906204
episode: 124 training return: tensor(-377.5721, device='cuda:0')
episode: 125 training return: tensor(-289.3039, device='cuda:0')
episode: 126 training return: tensor(-423.3605, device='cuda:0')
episode: 127 training return: tensor(-313.3514, device='cuda:0')
epoch: 32 test_true_pfm: 5430.383407452059 sim_pfm: -306.5838018547317
episode: 128 training return: tensor(-360.2250, device='cuda:0')
episode: 129 training return: tensor(-459.6044, device='cuda:0')
episode: 130 training return: tensor(-404.9317, device='cuda:0')
episode: 131 training return: tensor(-413.8048, device='cuda:0')
epoch: 33 test_true_pfm: 5346.672510245327 sim_pfm: -281.9861290331173
episode: 132 training return: tensor(-392.0708, device='cuda:0')
episode: 133 training return: tensor(-382.1602, device='cuda:0')
episode: 134 training return: tensor(-395.1575, device='cuda:0')
episode: 135 training return: tensor(-358.2532, device='cuda:0')
epoch: 34 test_true_pfm: 5321.521986135168 sim_pfm: -295.1959349397803
episode: 136 training return: tensor(-375.9602, device='cuda:0')
episode: 137 training return: tensor(-404.6465, device='cuda:0')
episode: 138 training return: tensor(-377.8649, device='cuda:0')
episode: 139 training return: tensor(-446.2542, device='cuda:0')
epoch: 35 test_true_pfm: 5408.650639875887 sim_pfm: -325.7068837907961
episode: 140 training return: tensor(-428.3808, device='cuda:0')
episode: 141 training return: tensor(-393.7238, device='cuda:0')
episode: 142 training return: tensor(-434.0669, device='cuda:0')
episode: 143 training return: tensor(-443.5435, device='cuda:0')
epoch: 36 test_true_pfm: 5290.723640445437 sim_pfm: -346.76644949651865
episode: 144 training return: tensor(-328.6090, device='cuda:0')
episode: 145 training return: tensor(-500.9489, device='cuda:0')
episode: 146 training return: tensor(-344.6972, device='cuda:0')
episode: 147 training return: tensor(-363.6251, device='cuda:0')
epoch: 37 test_true_pfm: 5321.71245018684 sim_pfm: -276.7331771296837
episode: 148 training return: tensor(-406.4368, device='cuda:0')
episode: 149 training return: tensor(-314.8562, device='cuda:0')
episode: 150 training return: tensor(-334.2275, device='cuda:0')
episode: 151 training return: tensor(-244.2661, device='cuda:0')
epoch: 38 test_true_pfm: 5379.756063197255 sim_pfm: -303.7544638746379
episode: 152 training return: tensor(-654.5924, device='cuda:0')
episode: 153 training return: tensor(-476.2751, device='cuda:0')
episode: 154 training return: tensor(-358.0769, device='cuda:0')
episode: 155 training return: tensor(-578.3329, device='cuda:0')
epoch: 39 test_true_pfm: 5384.554307456728 sim_pfm: -315.4762267512754
episode: 156 training return: tensor(-446.6543, device='cuda:0')
episode: 157 training return: tensor(-353.5863, device='cuda:0')
episode: 158 training return: tensor(-328.6574, device='cuda:0')
episode: 159 training return: tensor(-359.6936, device='cuda:0')
epoch: 40 test_true_pfm: 5429.1950847301205 sim_pfm: -303.5562880905539
episode: 160 training return: tensor(-315.5106, device='cuda:0')
episode: 161 training return: tensor(-302.1271, device='cuda:0')
episode: 162 training return: tensor(-357.4601, device='cuda:0')
episode: 163 training return: tensor(-333.6298, device='cuda:0')
epoch: 41 test_true_pfm: 5414.1671525266065 sim_pfm: -264.35587229915353
episode: 164 training return: tensor(-360.6985, device='cuda:0')
episode: 165 training return: tensor(-381.1624, device='cuda:0')
episode: 166 training return: tensor(-356.5756, device='cuda:0')
episode: 167 training return: tensor(-379.8346, device='cuda:0')
epoch: 42 test_true_pfm: 5434.152472695513 sim_pfm: -265.38912336465245
episode: 168 training return: tensor(-451.5923, device='cuda:0')
episode: 169 training return: tensor(-396.2666, device='cuda:0')
episode: 170 training return: tensor(-395.5172, device='cuda:0')
episode: 171 training return: tensor(-349.0548, device='cuda:0')
epoch: 43 test_true_pfm: 5428.31773901057 sim_pfm: -216.3988853575526
episode: 172 training return: tensor(-313.8925, device='cuda:0')
episode: 173 training return: tensor(-361.0941, device='cuda:0')
episode: 174 training return: tensor(-336.7853, device='cuda:0')
episode: 175 training return: tensor(-436.4988, device='cuda:0')
epoch: 44 test_true_pfm: 5396.666405640828 sim_pfm: -302.87071444022394
episode: 176 training return: tensor(-386.5862, device='cuda:0')
episode: 177 training return: tensor(-279.3289, device='cuda:0')
episode: 178 training return: tensor(-379.4906, device='cuda:0')
episode: 179 training return: tensor(-330.6257, device='cuda:0')
epoch: 45 test_true_pfm: 5483.998811835548 sim_pfm: -276.29478027225315
episode: 180 training return: tensor(-313.2839, device='cuda:0')
episode: 181 training return: tensor(-363.7207, device='cuda:0')
episode: 182 training return: tensor(-346.5674, device='cuda:0')
episode: 183 training return: tensor(-246.7056, device='cuda:0')
epoch: 46 test_true_pfm: 5472.950244266597 sim_pfm: -238.64334993163357
episode: 184 training return: tensor(-398.8484, device='cuda:0')
episode: 185 training return: tensor(-357.0836, device='cuda:0')
episode: 186 training return: tensor(-330.9348, device='cuda:0')
episode: 187 training return: tensor(-385.3755, device='cuda:0')
epoch: 47 test_true_pfm: 5516.5450834963785 sim_pfm: -269.1961408163964
episode: 188 training return: tensor(-393.3918, device='cuda:0')
episode: 189 training return: tensor(-383.3606, device='cuda:0')
episode: 190 training return: tensor(-399.9318, device='cuda:0')
episode: 191 training return: tensor(-292.4918, device='cuda:0')
epoch: 48 test_true_pfm: 5439.425704752207 sim_pfm: -271.02913588172913
episode: 192 training return: tensor(-401.1436, device='cuda:0')
episode: 193 training return: tensor(-357.3174, device='cuda:0')
episode: 194 training return: tensor(-333.8075, device='cuda:0')
episode: 195 training return: tensor(-388.9252, device='cuda:0')
epoch: 49 test_true_pfm: 5378.890411801224 sim_pfm: -279.57498778156395
episode: 196 training return: tensor(-268.4223, device='cuda:0')
episode: 197 training return: tensor(-329.4455, device='cuda:0')
episode: 198 training return: tensor(-328.3747, device='cuda:0')
episode: 199 training return: tensor(-355.3205, device='cuda:0')
epoch: 50 test_true_pfm: 6832.602733718769 sim_pfm: -254.20256693140254
episode: 200 training return: tensor(-449.3594, device='cuda:0')
episode: 201 training return: tensor(-438.8454, device='cuda:0')
episode: 202 training return: tensor(-399.1497, device='cuda:0')
episode: 203 training return: tensor(-313.8594, device='cuda:0')
epoch: 51 test_true_pfm: 5527.015857486994 sim_pfm: -258.909862889365
episode: 204 training return: tensor(-344.3125, device='cuda:0')
episode: 205 training return: tensor(-291.8065, device='cuda:0')
episode: 206 training return: tensor(-422.0663, device='cuda:0')
episode: 207 training return: tensor(-311.3357, device='cuda:0')
epoch: 52 test_true_pfm: 5609.800842518696 sim_pfm: -248.44620088972928
episode: 208 training return: tensor(-338.2760, device='cuda:0')
episode: 209 training return: tensor(-319.1845, device='cuda:0')
episode: 210 training return: tensor(-404.5042, device='cuda:0')
episode: 211 training return: tensor(-518.9575, device='cuda:0')
epoch: 53 test_true_pfm: 5402.625887438228 sim_pfm: -277.6561180810677
episode: 212 training return: tensor(-326.7886, device='cuda:0')
episode: 213 training return: tensor(-224.1686, device='cuda:0')
episode: 214 training return: tensor(-338.3788, device='cuda:0')
episode: 215 training return: tensor(-399.3136, device='cuda:0')
epoch: 54 test_true_pfm: 5427.951312325215 sim_pfm: -218.34509848298816
episode: 216 training return: tensor(-388.7895, device='cuda:0')
episode: 217 training return: tensor(-329.4261, device='cuda:0')
episode: 218 training return: tensor(-270.5616, device='cuda:0')
episode: 219 training return: tensor(-271.2687, device='cuda:0')
epoch: 55 test_true_pfm: 5417.404473941366 sim_pfm: -267.2375100449038
episode: 220 training return: tensor(-385.8793, device='cuda:0')
episode: 221 training return: tensor(-357.7645, device='cuda:0')
episode: 222 training return: tensor(-380.6444, device='cuda:0')
episode: 223 training return: tensor(-410.7399, device='cuda:0')
epoch: 56 test_true_pfm: 5598.7836693912295 sim_pfm: -240.72115137443566
episode: 224 training return: tensor(-280.0562, device='cuda:0')
episode: 225 training return: tensor(-284.1693, device='cuda:0')
episode: 226 training return: tensor(-343.6408, device='cuda:0')
episode: 227 training return: tensor(-327.0282, device='cuda:0')
epoch: 57 test_true_pfm: 5541.792520553773 sim_pfm: -274.2926631292988
episode: 228 training return: tensor(-298.9753, device='cuda:0')
episode: 229 training return: tensor(-294.3680, device='cuda:0')
episode: 230 training return: tensor(-301.4432, device='cuda:0')
episode: 231 training return: tensor(-325.6853, device='cuda:0')
epoch: 58 test_true_pfm: 5444.995292459036 sim_pfm: -279.6860020676783
episode: 232 training return: tensor(-402.0030, device='cuda:0')
episode: 233 training return: tensor(-361.3030, device='cuda:0')
episode: 234 training return: tensor(-309.7654, device='cuda:0')
episode: 235 training return: tensor(-390.4409, device='cuda:0')
epoch: 59 test_true_pfm: 5447.721780557192 sim_pfm: -248.11880428685495
episode: 236 training return: tensor(-292.0947, device='cuda:0')
episode: 237 training return: tensor(-261.2302, device='cuda:0')
episode: 238 training return: tensor(-338.5791, device='cuda:0')
episode: 239 training return: tensor(-372.7178, device='cuda:0')
epoch: 60 test_true_pfm: 5446.956724881747 sim_pfm: -204.91984119375897
episode: 240 training return: tensor(-351.6238, device='cuda:0')
episode: 241 training return: tensor(-407.8764, device='cuda:0')
episode: 242 training return: tensor(-429.0106, device='cuda:0')
episode: 243 training return: tensor(-392.7650, device='cuda:0')
epoch: 61 test_true_pfm: 5454.690636199434 sim_pfm: -238.32052545810197
episode: 244 training return: tensor(-297.2942, device='cuda:0')
episode: 245 training return: tensor(-408.3636, device='cuda:0')
episode: 246 training return: tensor(-355.0036, device='cuda:0')
episode: 247 training return: tensor(-275.2378, device='cuda:0')
epoch: 62 test_true_pfm: 5584.503232260543 sim_pfm: -227.74687331441478
episode: 248 training return: tensor(-355.1974, device='cuda:0')
episode: 249 training return: tensor(-296.3143, device='cuda:0')
episode: 250 training return: tensor(-320.1415, device='cuda:0')
episode: 251 training return: tensor(-378.1992, device='cuda:0')
epoch: 63 test_true_pfm: 5474.197967444746 sim_pfm: -279.2803667805371
episode: 252 training return: tensor(-328.9797, device='cuda:0')
episode: 253 training return: tensor(-418.3494, device='cuda:0')
episode: 254 training return: tensor(-377.5789, device='cuda:0')
episode: 255 training return: tensor(-237.5099, device='cuda:0')
epoch: 64 test_true_pfm: 5542.852707196606 sim_pfm: -227.7708928171002
episode: 256 training return: tensor(-270.5036, device='cuda:0')
episode: 257 training return: tensor(-376.2247, device='cuda:0')
episode: 258 training return: tensor(-348.9814, device='cuda:0')
episode: 259 training return: tensor(-296.5449, device='cuda:0')
epoch: 65 test_true_pfm: 3577.75561163825 sim_pfm: -221.14241574790017
episode: 260 training return: tensor(-418.5030, device='cuda:0')
episode: 261 training return: tensor(-295.3776, device='cuda:0')
episode: 262 training return: tensor(-367.3138, device='cuda:0')
episode: 263 training return: tensor(-351.8397, device='cuda:0')
epoch: 66 test_true_pfm: 5439.755412259366 sim_pfm: -244.35348849521446
episode: 264 training return: tensor(-335.1062, device='cuda:0')
episode: 265 training return: tensor(-379.0929, device='cuda:0')
episode: 266 training return: tensor(-377.1561, device='cuda:0')
episode: 267 training return: tensor(-399.2707, device='cuda:0')
epoch: 67 test_true_pfm: 5522.326562769397 sim_pfm: -219.98918496776605
episode: 268 training return: tensor(-284.6310, device='cuda:0')
episode: 269 training return: tensor(-352.6368, device='cuda:0')
episode: 270 training return: tensor(-322.8219, device='cuda:0')
episode: 271 training return: tensor(-467.4314, device='cuda:0')
epoch: 68 test_true_pfm: 5627.711909751933 sim_pfm: -231.83046477264725
episode: 272 training return: tensor(-294.9789, device='cuda:0')
episode: 273 training return: tensor(-332.8711, device='cuda:0')
episode: 274 training return: tensor(-338.3802, device='cuda:0')
episode: 275 training return: tensor(-380.2457, device='cuda:0')
epoch: 69 test_true_pfm: 5529.463192929942 sim_pfm: -259.9648774568341
episode: 276 training return: tensor(-302.9421, device='cuda:0')
episode: 277 training return: tensor(-362.7215, device='cuda:0')
episode: 278 training return: tensor(-324.4661, device='cuda:0')
episode: 279 training return: tensor(-322.2419, device='cuda:0')
epoch: 70 test_true_pfm: 5503.471585565403 sim_pfm: -273.8660359357891
episode: 280 training return: tensor(-373.5055, device='cuda:0')
episode: 281 training return: tensor(-262.9988, device='cuda:0')
episode: 282 training return: tensor(-347.1982, device='cuda:0')
episode: 283 training return: tensor(-263.3260, device='cuda:0')
epoch: 71 test_true_pfm: 5587.6426590891815 sim_pfm: -230.01416724772812
episode: 284 training return: tensor(-326.9756, device='cuda:0')
episode: 285 training return: tensor(-325.5281, device='cuda:0')
episode: 286 training return: tensor(-443.8840, device='cuda:0')
episode: 287 training return: tensor(-278.6991, device='cuda:0')
epoch: 72 test_true_pfm: 5525.735125133721 sim_pfm: -259.84643983580946
episode: 288 training return: tensor(-268.8724, device='cuda:0')
episode: 289 training return: tensor(-323.6198, device='cuda:0')
episode: 290 training return: tensor(-291.6477, device='cuda:0')
episode: 291 training return: tensor(-373.0559, device='cuda:0')
epoch: 73 test_true_pfm: 5498.732384035317 sim_pfm: -196.43546616767222
episode: 292 training return: tensor(-319.6713, device='cuda:0')
episode: 293 training return: tensor(-288.0401, device='cuda:0')
episode: 294 training return: tensor(-476.5381, device='cuda:0')
episode: 295 training return: tensor(-397.2992, device='cuda:0')
epoch: 74 test_true_pfm: 5325.756933281098 sim_pfm: -229.99125801338232
episode: 296 training return: tensor(-370.2930, device='cuda:0')
episode: 297 training return: tensor(-237.8239, device='cuda:0')
episode: 298 training return: tensor(-372.9810, device='cuda:0')
episode: 299 training return: tensor(-361.1516, device='cuda:0')
epoch: 75 test_true_pfm: 5479.958537459079 sim_pfm: -287.6097815992155
episode: 300 training return: tensor(-427.7466, device='cuda:0')
episode: 301 training return: tensor(-238.1981, device='cuda:0')
episode: 302 training return: tensor(-387.0854, device='cuda:0')
episode: 303 training return: tensor(-325.5851, device='cuda:0')
epoch: 76 test_true_pfm: 5610.14697144218 sim_pfm: -250.44805015990278
episode: 304 training return: tensor(-261.3184, device='cuda:0')
episode: 305 training return: tensor(-251.2523, device='cuda:0')
episode: 306 training return: tensor(-356.8742, device='cuda:0')
episode: 307 training return: tensor(-282.9863, device='cuda:0')
epoch: 77 test_true_pfm: 5534.924118888749 sim_pfm: -276.0379992521096
episode: 308 training return: tensor(-286.6871, device='cuda:0')
episode: 309 training return: tensor(-380.4702, device='cuda:0')
episode: 310 training return: tensor(-252.1001, device='cuda:0')
episode: 311 training return: tensor(-375.6174, device='cuda:0')
epoch: 78 test_true_pfm: 5529.801133588332 sim_pfm: -209.08182903619795
episode: 312 training return: tensor(-377.4310, device='cuda:0')
episode: 313 training return: tensor(-256.7948, device='cuda:0')
episode: 314 training return: tensor(-375.1595, device='cuda:0')
episode: 315 training return: tensor(-413.7802, device='cuda:0')
epoch: 79 test_true_pfm: 5448.983151806494 sim_pfm: -225.6314774558608
episode: 316 training return: tensor(-374.4564, device='cuda:0')
episode: 317 training return: tensor(-265.2333, device='cuda:0')
episode: 318 training return: tensor(-363.6213, device='cuda:0')
episode: 319 training return: tensor(-223.7023, device='cuda:0')
epoch: 80 test_true_pfm: 5520.563004370342 sim_pfm: -258.7762104562814
episode: 320 training return: tensor(-344.8824, device='cuda:0')
episode: 321 training return: tensor(-368.6562, device='cuda:0')
episode: 322 training return: tensor(-292.4475, device='cuda:0')
episode: 323 training return: tensor(-213.6780, device='cuda:0')
epoch: 81 test_true_pfm: 5542.450829250298 sim_pfm: -247.65882410935592
episode: 324 training return: tensor(-248.1208, device='cuda:0')
episode: 325 training return: tensor(-366.3124, device='cuda:0')
episode: 326 training return: tensor(-443.3118, device='cuda:0')
episode: 327 training return: tensor(-255.4000, device='cuda:0')
epoch: 82 test_true_pfm: 5637.222733296444 sim_pfm: -219.900432929319
episode: 328 training return: tensor(-236.5671, device='cuda:0')
episode: 329 training return: tensor(-309.3844, device='cuda:0')
episode: 330 training return: tensor(-408.5591, device='cuda:0')
episode: 331 training return: tensor(-272.9956, device='cuda:0')
epoch: 83 test_true_pfm: 5540.966525960259 sim_pfm: -188.3828795249865
episode: 332 training return: tensor(-313.5190, device='cuda:0')
episode: 333 training return: tensor(-359.7944, device='cuda:0')
episode: 334 training return: tensor(-243.0363, device='cuda:0')
episode: 335 training return: tensor(-340.9810, device='cuda:0')
epoch: 84 test_true_pfm: 5555.13566075474 sim_pfm: -203.94671883393312
episode: 336 training return: tensor(-390.0298, device='cuda:0')
episode: 337 training return: tensor(-302.5849, device='cuda:0')
episode: 338 training return: tensor(-338.1653, device='cuda:0')
episode: 339 training return: tensor(-377.7021, device='cuda:0')
epoch: 85 test_true_pfm: 5685.4276670474765 sim_pfm: -205.3314830112795
episode: 340 training return: tensor(-365.0513, device='cuda:0')
episode: 341 training return: tensor(-288.9762, device='cuda:0')
episode: 342 training return: tensor(-339.2245, device='cuda:0')
episode: 343 training return: tensor(-356.8051, device='cuda:0')
epoch: 86 test_true_pfm: 5610.469524995161 sim_pfm: -211.66270796597624
episode: 344 training return: tensor(-281.5575, device='cuda:0')
episode: 345 training return: tensor(-401.1084, device='cuda:0')
episode: 346 training return: tensor(-253.0737, device='cuda:0')
episode: 347 training return: tensor(-196.9069, device='cuda:0')
epoch: 87 test_true_pfm: 5598.8130044461 sim_pfm: -215.81348455798192
episode: 348 training return: tensor(-291.3158, device='cuda:0')
episode: 349 training return: tensor(-341.6647, device='cuda:0')
episode: 350 training return: tensor(-330.5802, device='cuda:0')
episode: 351 training return: tensor(-342.2570, device='cuda:0')
epoch: 88 test_true_pfm: 5564.6412715139695 sim_pfm: -198.28552327439925
episode: 352 training return: tensor(-259.1732, device='cuda:0')
episode: 353 training return: tensor(-302.0554, device='cuda:0')
episode: 354 training return: tensor(-393.2486, device='cuda:0')
episode: 355 training return: tensor(-228.4977, device='cuda:0')
epoch: 89 test_true_pfm: 5716.377258605669 sim_pfm: -240.75935825072034
episode: 356 training return: tensor(-285.6192, device='cuda:0')
episode: 357 training return: tensor(-309.8394, device='cuda:0')
episode: 358 training return: tensor(-307.7274, device='cuda:0')
episode: 359 training return: tensor(-338.2139, device='cuda:0')
epoch: 90 test_true_pfm: 5540.19553988236 sim_pfm: -216.02106243435992
episode: 360 training return: tensor(-375.9566, device='cuda:0')
episode: 361 training return: tensor(-288.5605, device='cuda:0')
episode: 362 training return: tensor(-351.1443, device='cuda:0')
episode: 363 training return: tensor(-272.0966, device='cuda:0')
epoch: 91 test_true_pfm: 5534.90317867983 sim_pfm: -205.14377020276152
episode: 364 training return: tensor(-304.5322, device='cuda:0')
episode: 365 training return: tensor(-317.4400, device='cuda:0')
episode: 366 training return: tensor(-321.0014, device='cuda:0')
episode: 367 training return: tensor(-374.2870, device='cuda:0')
epoch: 92 test_true_pfm: 5517.499555992062 sim_pfm: -200.5544698924059
episode: 368 training return: tensor(-471.2625, device='cuda:0')
episode: 369 training return: tensor(-351.2213, device='cuda:0')
episode: 370 training return: tensor(-299.0762, device='cuda:0')
episode: 371 training return: tensor(-350.6363, device='cuda:0')
epoch: 93 test_true_pfm: 5606.21200783354 sim_pfm: -240.47203143177708
episode: 372 training return: tensor(-221.7619, device='cuda:0')
episode: 373 training return: tensor(-392.9398, device='cuda:0')
episode: 374 training return: tensor(-362.7956, device='cuda:0')
episode: 375 training return: tensor(-277.0683, device='cuda:0')
epoch: 94 test_true_pfm: 6788.854564140432 sim_pfm: -213.57888698737952
episode: 376 training return: tensor(-317.6644, device='cuda:0')
episode: 377 training return: tensor(-313.1224, device='cuda:0')
episode: 378 training return: tensor(-258.3574, device='cuda:0')
episode: 379 training return: tensor(-233.8585, device='cuda:0')
epoch: 95 test_true_pfm: 5502.527846798362 sim_pfm: -241.90910629813638
episode: 380 training return: tensor(-304.5667, device='cuda:0')
episode: 381 training return: tensor(-255.3940, device='cuda:0')
episode: 382 training return: tensor(-334.2506, device='cuda:0')
episode: 383 training return: tensor(-324.8621, device='cuda:0')
epoch: 96 test_true_pfm: 5550.291794697393 sim_pfm: -232.8197315731377
episode: 384 training return: tensor(-270.5153, device='cuda:0')
episode: 385 training return: tensor(-298.6575, device='cuda:0')
episode: 386 training return: tensor(-215.0894, device='cuda:0')
episode: 387 training return: tensor(-239.2369, device='cuda:0')
epoch: 97 test_true_pfm: 5639.905325245197 sim_pfm: -227.60268687448115
episode: 388 training return: tensor(-344.7137, device='cuda:0')
episode: 389 training return: tensor(-324.8119, device='cuda:0')
episode: 390 training return: tensor(-227.5642, device='cuda:0')
episode: 391 training return: tensor(-335.4545, device='cuda:0')
epoch: 98 test_true_pfm: 5558.404743140637 sim_pfm: -195.4751093398178
episode: 392 training return: tensor(-289.6069, device='cuda:0')
episode: 393 training return: tensor(-195.9353, device='cuda:0')
episode: 394 training return: tensor(-245.6267, device='cuda:0')
episode: 395 training return: tensor(-293.7463, device='cuda:0')
epoch: 99 test_true_pfm: 5479.0163512898625 sim_pfm: -225.71072748328638
episode: 396 training return: tensor(-252.4647, device='cuda:0')
episode: 397 training return: tensor(-394.1669, device='cuda:0')
episode: 398 training return: tensor(-367.0961, device='cuda:0')
episode: 399 training return: tensor(-353.8585, device='cuda:0')
epoch: 100 test_true_pfm: 5548.715687145524 sim_pfm: -205.12099096202292
episode: 400 training return: tensor(-342.1363, device='cuda:0')
episode: 401 training return: tensor(-346.4835, device='cuda:0')
episode: 402 training return: tensor(-273.9885, device='cuda:0')
episode: 403 training return: tensor(-239.9048, device='cuda:0')
epoch: 101 test_true_pfm: 5600.084376851129 sim_pfm: -179.3644796158284
episode: 404 training return: tensor(-306.5065, device='cuda:0')
episode: 405 training return: tensor(-267.0356, device='cuda:0')
episode: 406 training return: tensor(-240.9260, device='cuda:0')
episode: 407 training return: tensor(-332.0115, device='cuda:0')
epoch: 102 test_true_pfm: 5633.4031088015245 sim_pfm: -234.20179543270692
episode: 408 training return: tensor(-256.2108, device='cuda:0')
episode: 409 training return: tensor(-467.3529, device='cuda:0')
episode: 410 training return: tensor(-326.9555, device='cuda:0')
episode: 411 training return: tensor(-277.8538, device='cuda:0')
epoch: 103 test_true_pfm: 5634.826844217477 sim_pfm: -222.69583752804706
episode: 412 training return: tensor(-354.5146, device='cuda:0')
episode: 413 training return: tensor(-291.9494, device='cuda:0')
episode: 414 training return: tensor(-256.4722, device='cuda:0')
episode: 415 training return: tensor(-257.0892, device='cuda:0')
epoch: 104 test_true_pfm: 5638.447121844193 sim_pfm: -184.5963614449914
episode: 416 training return: tensor(-290.5312, device='cuda:0')
episode: 417 training return: tensor(-293.8246, device='cuda:0')
episode: 418 training return: tensor(-254.0423, device='cuda:0')
episode: 419 training return: tensor(-333.3631, device='cuda:0')
epoch: 105 test_true_pfm: 5593.834099461205 sim_pfm: -202.36949327482338
episode: 420 training return: tensor(-285.6312, device='cuda:0')
episode: 421 training return: tensor(-255.4629, device='cuda:0')
episode: 422 training return: tensor(-276.0929, device='cuda:0')
episode: 423 training return: tensor(-365.0936, device='cuda:0')
epoch: 106 test_true_pfm: 5541.723013537546 sim_pfm: -214.72772634085655
episode: 424 training return: tensor(-238.3466, device='cuda:0')
episode: 425 training return: tensor(-313.7560, device='cuda:0')
episode: 426 training return: tensor(-300.3542, device='cuda:0')
episode: 427 training return: tensor(-226.2406, device='cuda:0')
epoch: 107 test_true_pfm: 5522.110444363156 sim_pfm: -212.0948494344484
episode: 428 training return: tensor(-291.4286, device='cuda:0')
episode: 429 training return: tensor(-360.6198, device='cuda:0')
episode: 430 training return: tensor(-278.8120, device='cuda:0')
episode: 431 training return: tensor(-203.8102, device='cuda:0')
epoch: 108 test_true_pfm: 5652.3384009243955 sim_pfm: -219.1218217793115
episode: 432 training return: tensor(-297.4438, device='cuda:0')
episode: 433 training return: tensor(-227.8849, device='cuda:0')
episode: 434 training return: tensor(-308.0163, device='cuda:0')
episode: 435 training return: tensor(-373.8539, device='cuda:0')
epoch: 109 test_true_pfm: 5565.498555715972 sim_pfm: -205.8383476259866
episode: 436 training return: tensor(-366.1904, device='cuda:0')
episode: 437 training return: tensor(-315.3284, device='cuda:0')
episode: 438 training return: tensor(-316.1284, device='cuda:0')
episode: 439 training return: tensor(-378.9882, device='cuda:0')
epoch: 110 test_true_pfm: 5650.346257787162 sim_pfm: -186.06730915792286
episode: 440 training return: tensor(-329.1134, device='cuda:0')
episode: 441 training return: tensor(-295.8574, device='cuda:0')
episode: 442 training return: tensor(-250.6079, device='cuda:0')
episode: 443 training return: tensor(-342.2371, device='cuda:0')
epoch: 111 test_true_pfm: 5617.529596841487 sim_pfm: -224.2985565684212
episode: 444 training return: tensor(-368.0536, device='cuda:0')
episode: 445 training return: tensor(-536.3926, device='cuda:0')
episode: 446 training return: tensor(-369.9903, device='cuda:0')
episode: 447 training return: tensor(-291.5054, device='cuda:0')
epoch: 112 test_true_pfm: 5801.6971297618975 sim_pfm: -214.87449317717497
episode: 448 training return: tensor(-292.7449, device='cuda:0')
episode: 449 training return: tensor(-239.2164, device='cuda:0')
episode: 450 training return: tensor(-318.0174, device='cuda:0')
episode: 451 training return: tensor(-259.3408, device='cuda:0')
epoch: 113 test_true_pfm: 5625.290669598202 sim_pfm: -174.38937368264305
episode: 452 training return: tensor(-364.1386, device='cuda:0')
episode: 453 training return: tensor(-317.4342, device='cuda:0')
episode: 454 training return: tensor(-388.3654, device='cuda:0')
episode: 455 training return: tensor(-379.0436, device='cuda:0')
epoch: 114 test_true_pfm: 5506.42113117212 sim_pfm: -155.37997972825542
episode: 456 training return: tensor(-197.6347, device='cuda:0')
episode: 457 training return: tensor(-346.4699, device='cuda:0')
episode: 458 training return: tensor(-337.6475, device='cuda:0')
episode: 459 training return: tensor(-288.5173, device='cuda:0')
epoch: 115 test_true_pfm: 5621.838706122144 sim_pfm: -244.9633303672308
episode: 460 training return: tensor(-302.6909, device='cuda:0')
episode: 461 training return: tensor(-333.2018, device='cuda:0')
episode: 462 training return: tensor(-250.1731, device='cuda:0')
episode: 463 training return: tensor(-259.5510, device='cuda:0')
epoch: 116 test_true_pfm: 5632.844060536762 sim_pfm: -218.5179141406552
episode: 464 training return: tensor(-251.1555, device='cuda:0')
episode: 465 training return: tensor(-349.9807, device='cuda:0')
episode: 466 training return: tensor(-365.0894, device='cuda:0')
episode: 467 training return: tensor(-331.5288, device='cuda:0')
epoch: 117 test_true_pfm: 5548.417268866989 sim_pfm: -241.4104472239851
episode: 468 training return: tensor(-287.9291, device='cuda:0')
episode: 469 training return: tensor(-284.0558, device='cuda:0')
episode: 470 training return: tensor(-334.8463, device='cuda:0')
episode: 471 training return: tensor(-261.7495, device='cuda:0')
epoch: 118 test_true_pfm: 5577.452585240758 sim_pfm: -223.41245899429
episode: 472 training return: tensor(-614.4033, device='cuda:0')
episode: 473 training return: tensor(-302.8742, device='cuda:0')
episode: 474 training return: tensor(-304.8100, device='cuda:0')
episode: 475 training return: tensor(-337.2032, device='cuda:0')
epoch: 119 test_true_pfm: 5680.316450438979 sim_pfm: -208.49563807430482
episode: 476 training return: tensor(-314.0475, device='cuda:0')
episode: 477 training return: tensor(-368.9496, device='cuda:0')
episode: 478 training return: tensor(-235.4736, device='cuda:0')
episode: 479 training return: tensor(-128.7182, device='cuda:0')
epoch: 120 test_true_pfm: 5576.365321852635 sim_pfm: -213.6195769768868
episode: 480 training return: tensor(-239.2160, device='cuda:0')
episode: 481 training return: tensor(-282.3309, device='cuda:0')
episode: 482 training return: tensor(-272.6488, device='cuda:0')
episode: 483 training return: tensor(-280.5979, device='cuda:0')
epoch: 121 test_true_pfm: 5657.193640417613 sim_pfm: -215.88687104410687
episode: 484 training return: tensor(-361.0272, device='cuda:0')
episode: 485 training return: tensor(-291.6934, device='cuda:0')
episode: 486 training return: tensor(-351.3663, device='cuda:0')
episode: 487 training return: tensor(-281.3618, device='cuda:0')
epoch: 122 test_true_pfm: 5568.678549754361 sim_pfm: -194.5744573570167
episode: 488 training return: tensor(-370.4682, device='cuda:0')
episode: 489 training return: tensor(-293.3026, device='cuda:0')
episode: 490 training return: tensor(-254.4104, device='cuda:0')
episode: 491 training return: tensor(-327.9554, device='cuda:0')
epoch: 123 test_true_pfm: 5693.233348593812 sim_pfm: -194.05823879295107
episode: 492 training return: tensor(-346.4626, device='cuda:0')
episode: 493 training return: tensor(-275.8972, device='cuda:0')
episode: 494 training return: tensor(-275.3544, device='cuda:0')
episode: 495 training return: tensor(-343.7598, device='cuda:0')
epoch: 124 test_true_pfm: 5577.666032084239 sim_pfm: -209.8416736474222
episode: 496 training return: tensor(-321.6837, device='cuda:0')
episode: 497 training return: tensor(-342.6119, device='cuda:0')
episode: 498 training return: tensor(-336.4663, device='cuda:0')
episode: 499 training return: tensor(-284.6696, device='cuda:0')
epoch: 125 test_true_pfm: 5531.353202380764 sim_pfm: -194.3421791906585
episode: 500 training return: tensor(-285.7451, device='cuda:0')
episode: 501 training return: tensor(-227.7726, device='cuda:0')
episode: 502 training return: tensor(-269.8419, device='cuda:0')
episode: 503 training return: tensor(-318.0359, device='cuda:0')
epoch: 126 test_true_pfm: 5666.492363936338 sim_pfm: -201.33110466011567
episode: 504 training return: tensor(-289.4085, device='cuda:0')
episode: 505 training return: tensor(-202.8262, device='cuda:0')
episode: 506 training return: tensor(-300.3532, device='cuda:0')
episode: 507 training return: tensor(-364.9832, device='cuda:0')
epoch: 127 test_true_pfm: 5587.283838918675 sim_pfm: -196.70820712364124
episode: 508 training return: tensor(-209.9271, device='cuda:0')
episode: 509 training return: tensor(-266.0899, device='cuda:0')
episode: 510 training return: tensor(-246.6627, device='cuda:0')
episode: 511 training return: tensor(-211.7554, device='cuda:0')
epoch: 128 test_true_pfm: 5566.594939754595 sim_pfm: -216.06234872248024
episode: 512 training return: tensor(-279.7342, device='cuda:0')
episode: 513 training return: tensor(-330.8694, device='cuda:0')
episode: 514 training return: tensor(-275.2224, device='cuda:0')
episode: 515 training return: tensor(-347.8105, device='cuda:0')
epoch: 129 test_true_pfm: 5558.910826193805 sim_pfm: -192.58312026023245
episode: 516 training return: tensor(-381.9726, device='cuda:0')
episode: 517 training return: tensor(-266.1832, device='cuda:0')
episode: 518 training return: tensor(-300.8183, device='cuda:0')
episode: 519 training return: tensor(-222.0502, device='cuda:0')
epoch: 130 test_true_pfm: 5600.378410679882 sim_pfm: -204.36425045293677
episode: 520 training return: tensor(-316.1448, device='cuda:0')
episode: 521 training return: tensor(-296.5716, device='cuda:0')
episode: 522 training return: tensor(-345.7035, device='cuda:0')
episode: 523 training return: tensor(-323.3792, device='cuda:0')
epoch: 131 test_true_pfm: 5649.326472238586 sim_pfm: -238.98438968167952
episode: 524 training return: tensor(-228.2814, device='cuda:0')
episode: 525 training return: tensor(-272.5695, device='cuda:0')
episode: 526 training return: tensor(-413.9871, device='cuda:0')
episode: 527 training return: tensor(-277.9354, device='cuda:0')
epoch: 132 test_true_pfm: 5549.142906248933 sim_pfm: -237.10720687712697
episode: 528 training return: tensor(-257.8546, device='cuda:0')
episode: 529 training return: tensor(-272.1443, device='cuda:0')
episode: 530 training return: tensor(-337.3464, device='cuda:0')
episode: 531 training return: tensor(-311.9610, device='cuda:0')
epoch: 133 test_true_pfm: 5619.496744229581 sim_pfm: -180.91744931975458
episode: 532 training return: tensor(-268.3421, device='cuda:0')
episode: 533 training return: tensor(-289.6794, device='cuda:0')
episode: 534 training return: tensor(-277.0781, device='cuda:0')
episode: 535 training return: tensor(-271.0480, device='cuda:0')
epoch: 134 test_true_pfm: 5572.290585897154 sim_pfm: -174.27633675271258
episode: 536 training return: tensor(-241.6079, device='cuda:0')
episode: 537 training return: tensor(-334.0858, device='cuda:0')
episode: 538 training return: tensor(-267.3884, device='cuda:0')
episode: 539 training return: tensor(-337.0119, device='cuda:0')
epoch: 135 test_true_pfm: 5633.454709008674 sim_pfm: -200.12811116010803
episode: 540 training return: tensor(-284.1844, device='cuda:0')
episode: 541 training return: tensor(-253.2716, device='cuda:0')
episode: 542 training return: tensor(-307.8268, device='cuda:0')
episode: 543 training return: tensor(-287.6425, device='cuda:0')
epoch: 136 test_true_pfm: 5692.201203131433 sim_pfm: -179.9485601450724
episode: 544 training return: tensor(-311.5089, device='cuda:0')
episode: 545 training return: tensor(-241.9902, device='cuda:0')
episode: 546 training return: tensor(-254.6870, device='cuda:0')
episode: 547 training return: tensor(-268.3434, device='cuda:0')
epoch: 137 test_true_pfm: 5621.942584371936 sim_pfm: -187.0480264655392
episode: 548 training return: tensor(-303.4443, device='cuda:0')
episode: 549 training return: tensor(-293.8981, device='cuda:0')
episode: 550 training return: tensor(-288.0795, device='cuda:0')
episode: 551 training return: tensor(-271.5289, device='cuda:0')
epoch: 138 test_true_pfm: 5636.941819202425 sim_pfm: -204.3713975212983
episode: 552 training return: tensor(-298.9297, device='cuda:0')
episode: 553 training return: tensor(-334.5016, device='cuda:0')
episode: 554 training return: tensor(-218.0460, device='cuda:0')
episode: 555 training return: tensor(-844.6342, device='cuda:0')
epoch: 139 test_true_pfm: 5645.259016353364 sim_pfm: -192.00539012707304
episode: 556 training return: tensor(-326.6136, device='cuda:0')
episode: 557 training return: tensor(-241.1337, device='cuda:0')
episode: 558 training return: tensor(-351.3897, device='cuda:0')
episode: 559 training return: tensor(-229.9557, device='cuda:0')
epoch: 140 test_true_pfm: 5650.463856105477 sim_pfm: -209.0389064330569
episode: 560 training return: tensor(-302.1105, device='cuda:0')
episode: 561 training return: tensor(-247.3865, device='cuda:0')
episode: 562 training return: tensor(-280.9380, device='cuda:0')
episode: 563 training return: tensor(-275.5292, device='cuda:0')
epoch: 141 test_true_pfm: 5578.521735895287 sim_pfm: -174.84515444281473
episode: 564 training return: tensor(-337.4546, device='cuda:0')
episode: 565 training return: tensor(-259.9016, device='cuda:0')
episode: 566 training return: tensor(-260.6792, device='cuda:0')
episode: 567 training return: tensor(-282.3712, device='cuda:0')
epoch: 142 test_true_pfm: 5584.514994982982 sim_pfm: -136.16412011580542
episode: 568 training return: tensor(-235.4473, device='cuda:0')
episode: 569 training return: tensor(-286.3319, device='cuda:0')
episode: 570 training return: tensor(-363.7887, device='cuda:0')
episode: 571 training return: tensor(-289.1730, device='cuda:0')
epoch: 143 test_true_pfm: 5675.542462713934 sim_pfm: -165.72741863445845
episode: 572 training return: tensor(-279.5788, device='cuda:0')
episode: 573 training return: tensor(-273.0498, device='cuda:0')
episode: 574 training return: tensor(-277.8715, device='cuda:0')
episode: 575 training return: tensor(-315.6091, device='cuda:0')
epoch: 144 test_true_pfm: 5640.562654857179 sim_pfm: -193.43246431439184
episode: 576 training return: tensor(-295.3936, device='cuda:0')
episode: 577 training return: tensor(-255.7509, device='cuda:0')
episode: 578 training return: tensor(-331.5651, device='cuda:0')
episode: 579 training return: tensor(-314.6524, device='cuda:0')
epoch: 145 test_true_pfm: 5632.545908225952 sim_pfm: -163.93158953723227
episode: 580 training return: tensor(-259.7136, device='cuda:0')
episode: 581 training return: tensor(-319.3794, device='cuda:0')
episode: 582 training return: tensor(-243.4444, device='cuda:0')
episode: 583 training return: tensor(-318.1429, device='cuda:0')
epoch: 146 test_true_pfm: 5704.4404579953525 sim_pfm: -173.33784201564654
episode: 584 training return: tensor(-389.4482, device='cuda:0')
episode: 585 training return: tensor(-295.4944, device='cuda:0')
episode: 586 training return: tensor(-325.1048, device='cuda:0')
episode: 587 training return: tensor(-266.9402, device='cuda:0')
epoch: 147 test_true_pfm: 5584.250325218675 sim_pfm: -192.88049025141905
episode: 588 training return: tensor(-292.4233, device='cuda:0')
episode: 589 training return: tensor(-343.6554, device='cuda:0')
episode: 590 training return: tensor(-239.5509, device='cuda:0')
episode: 591 training return: tensor(-753.2158, device='cuda:0')
epoch: 148 test_true_pfm: 5619.436486434782 sim_pfm: -205.53837593448893
episode: 592 training return: tensor(-240.6284, device='cuda:0')
episode: 593 training return: tensor(-283.6404, device='cuda:0')
episode: 594 training return: tensor(-278.3764, device='cuda:0')
episode: 595 training return: tensor(-298.8620, device='cuda:0')
epoch: 149 test_true_pfm: 5603.688548827139 sim_pfm: -184.31808812444797
episode: 596 training return: tensor(-310.0991, device='cuda:0')
episode: 597 training return: tensor(-342.1194, device='cuda:0')
episode: 598 training return: tensor(-238.9221, device='cuda:0')
episode: 599 training return: tensor(-300.8518, device='cuda:0')
epoch: 150 test_true_pfm: 5613.788725848038 sim_pfm: -158.32476252462948
