['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'behavior', '--traj', 'expert', '--seed', '4']
epoch: 0 training_loss 0.31219044059515 test_loss: 0.2044672966003418
epoch: 1 training_loss 0.19132516600191593 test_loss: 0.16595009565353394
epoch: 2 training_loss 0.17409202232956886 test_loss: 0.16748250722885133
epoch: 3 training_loss 0.17048203088343145 test_loss: 0.1837475061416626
epoch: 4 training_loss 0.16677386112511158 test_loss: 0.15039180517196654
epoch: 5 training_loss 0.16063086174428462 test_loss: 0.18256058692932128
epoch: 6 training_loss 0.15664546690881254 test_loss: 0.15903128385543824
epoch: 7 training_loss 0.15549480922520162 test_loss: 0.16002200841903685
epoch: 8 training_loss 0.156026009619236 test_loss: 0.13261960744857787
epoch: 9 training_loss 0.15777294345200063 test_loss: 0.1355189085006714
epoch: 10 training_loss 0.14914201509207486 test_loss: 0.15911433696746827
epoch: 11 training_loss 0.15327620550990104 test_loss: 0.13928555250167846
epoch: 12 training_loss 0.14231204643845558 test_loss: 0.15990935564041137
epoch: 13 training_loss 0.15511848751455545 test_loss: 0.1297615647315979
epoch: 14 training_loss 0.14525759998708965 test_loss: 0.15795977115631105
epoch: 15 training_loss 0.1495392955839634 test_loss: 0.14641579389572143
epoch: 16 training_loss 0.14361805330961944 test_loss: 0.1435675024986267
epoch: 17 training_loss 0.14831139389425516 test_loss: 0.16264889240264893
epoch: 18 training_loss 0.14960654459893705 test_loss: 0.16294949054718016
epoch: 19 training_loss 0.15298011299222708 test_loss: 0.15618935823440552
epoch: 20 training_loss 0.13931334674358367 test_loss: 0.15465474128723145
epoch: 21 training_loss 0.14973488189280032 test_loss: 0.15140795707702637
epoch: 22 training_loss 0.1612935358658433 test_loss: 0.12754610776901246
epoch: 23 training_loss 0.1437860444933176 test_loss: 0.11475404500961303
epoch: 24 training_loss 0.13863800700753928 test_loss: 0.1423335313796997
epoch: 25 training_loss 0.1430761369690299 test_loss: 0.1733071208000183
epoch: 26 training_loss 0.14914121810346842 test_loss: 0.13835095167160033
epoch: 27 training_loss 0.14846752800047397 test_loss: 0.1188880205154419
epoch: 28 training_loss 0.14267413314431907 test_loss: 0.13759927749633788
epoch: 29 training_loss 0.14358175583183766 test_loss: 0.14368646144866942
epoch: 30 training_loss 0.12793995220214127 test_loss: 0.141885507106781
epoch: 31 training_loss 0.145250216498971 test_loss: 0.13060081005096436
epoch: 32 training_loss 0.1420299170166254 test_loss: 0.1211665391921997
epoch: 33 training_loss 0.13746506650000812 test_loss: 0.12659730911254882
epoch: 34 training_loss 0.13253446884453296 test_loss: 0.13125522136688234
epoch: 35 training_loss 0.13254830300807952 test_loss: 0.14763344526290895
epoch: 36 training_loss 0.13937393184751273 test_loss: 0.1323469638824463
epoch: 37 training_loss 0.1336603070050478 test_loss: 0.13269267082214356
epoch: 38 training_loss 0.13431118171662093 test_loss: 0.1474665641784668
epoch: 39 training_loss 0.1456663156300783 test_loss: 0.13506412506103516
epoch: 40 training_loss 0.1332644133269787 test_loss: 0.1306844472885132
epoch: 41 training_loss 0.13812626719474794 test_loss: 0.146856427192688
epoch: 42 training_loss 0.1430267522856593 test_loss: 0.13684344291687012
epoch: 43 training_loss 0.14492268901318311 test_loss: 0.13255631923675537
epoch: 44 training_loss 0.13468120522797108 test_loss: 0.1319848418235779
epoch: 45 training_loss 0.13610474947839976 test_loss: 0.15496171712875367
epoch: 46 training_loss 0.136830539368093 test_loss: 0.13001718521118164
epoch: 47 training_loss 0.1395567796006799 test_loss: 0.13830479383468627
epoch: 48 training_loss 0.14976990025490522 test_loss: 0.1465054988861084
epoch: 49 training_loss 0.1398020739480853 test_loss: 0.10823160409927368
epoch: 50 training_loss 0.13001051474362613 test_loss: 0.15075242519378662
epoch: 51 training_loss 0.14424093157052995 test_loss: 0.14590864181518554
epoch: 52 training_loss 0.14672591410577296 test_loss: 0.14133737087249756
epoch: 53 training_loss 0.12880486607551575 test_loss: 0.13347988128662108
epoch: 54 training_loss 0.13452810406684876 test_loss: 0.12404000759124756
epoch: 55 training_loss 0.1346971971541643 test_loss: 0.12428650856018067
epoch: 56 training_loss 0.13222544558346272 test_loss: 0.1420973062515259
epoch: 57 training_loss 0.12695467196404933 test_loss: 0.15347206592559814
epoch: 58 training_loss 0.1425871991738677 test_loss: 0.1447892427444458
epoch: 59 training_loss 0.13845486119389533 test_loss: 0.12110244035720825
epoch: 60 training_loss 0.13257115513086318 test_loss: 0.13731194734573365
epoch: 61 training_loss 0.1369654694572091 test_loss: 0.12517640590667725
epoch: 62 training_loss 0.13704693667590617 test_loss: 0.1144910216331482
epoch: 63 training_loss 0.13506933216005565 test_loss: 0.1278359055519104
epoch: 64 training_loss 0.13256960786879063 test_loss: 0.10125167369842529
epoch: 65 training_loss 0.13513372130692006 test_loss: 0.1563260555267334
epoch: 66 training_loss 0.141106715798378 test_loss: 0.12459039688110352
epoch: 67 training_loss 0.13461779080331326 test_loss: 0.13799487352371215
epoch: 68 training_loss 0.14017461329698563 test_loss: 0.13396272659301758
epoch: 69 training_loss 0.12969326611608267 test_loss: 0.14939613342285157
epoch: 70 training_loss 0.14112989775836468 test_loss: 0.1552160382270813
epoch: 71 training_loss 0.13417415786534548 test_loss: 0.1331171989440918
epoch: 72 training_loss 0.13524171624332668 test_loss: 0.14060925245285033
epoch: 73 training_loss 0.12840249694883824 test_loss: 0.144257652759552
epoch: 74 training_loss 0.132109256349504 test_loss: 0.13293508291244507
epoch: 75 training_loss 0.13628454223275185 test_loss: 0.12678472995758056
epoch: 76 training_loss 0.13461609303951264 test_loss: 0.13533346652984618
epoch: 77 training_loss 0.13674388747662305 test_loss: 0.12927173376083373
epoch: 78 training_loss 0.14113416247069835 test_loss: 0.11511405706405639
epoch: 79 training_loss 0.1345149189978838 test_loss: 0.11519160270690917
epoch: 80 training_loss 0.1362586859986186 test_loss: 0.13652929067611694
epoch: 81 training_loss 0.1340113814175129 test_loss: 0.1233566164970398
epoch: 82 training_loss 0.13692172028124333 test_loss: 0.12830331325531005
epoch: 83 training_loss 0.13621435828506948 test_loss: 0.12969577312469482
epoch: 84 training_loss 0.13045104395598173 test_loss: 0.14175100326538087
epoch: 85 training_loss 0.13352928519248963 test_loss: 0.11675428152084351
epoch: 86 training_loss 0.1315611321851611 test_loss: 0.12649630308151244
epoch: 87 training_loss 0.13420955680310725 test_loss: 0.15425219535827636
epoch: 88 training_loss 0.13170460173860193 test_loss: 0.12942004203796387
epoch: 89 training_loss 0.13507889349013566 test_loss: 0.14159245491027833
epoch: 90 training_loss 0.13122177172452212 test_loss: 0.1431381344795227
epoch: 91 training_loss 0.12880179926753044 test_loss: 0.12565290927886963
epoch: 92 training_loss 0.1346547858044505 test_loss: 0.12158985137939453
epoch: 93 training_loss 0.13403386812657117 test_loss: 0.14410789012908937
epoch: 94 training_loss 0.13882188256829978 test_loss: 0.13180277347564698
epoch: 95 training_loss 0.13453805778175593 test_loss: 0.1379276156425476
epoch: 96 training_loss 0.13715749759227036 test_loss: 0.12797465324401855
epoch: 97 training_loss 0.12969358801841735 test_loss: 0.11695510149002075
epoch: 98 training_loss 0.13169779401272536 test_loss: 0.14199634790420532
epoch: 99 training_loss 0.12935177128762007 test_loss: 0.13062810897827148
epoch: 100 training_loss 0.13800723902881146 test_loss: 0.15448286533355712
epoch: 101 training_loss 0.13332451205700635 test_loss: 0.14648022651672363
epoch: 102 training_loss 0.1338314366713166 test_loss: 0.125011670589447
epoch: 103 training_loss 0.12773188576102257 test_loss: 0.13438891172409057
epoch: 104 training_loss 0.14014481201767923 test_loss: 0.14691691398620604
epoch: 105 training_loss 0.14847506128251553 test_loss: 0.1324773073196411
epoch: 106 training_loss 0.13162692237645388 test_loss: 0.140172016620636
epoch: 107 training_loss 0.13453746607527137 test_loss: 0.1145776391029358
epoch: 108 training_loss 0.1301732161268592 test_loss: 0.13354012966156006
epoch: 109 training_loss 0.13328728172928095 test_loss: 0.14276473522186278
epoch: 110 training_loss 0.13828424792736768 test_loss: 0.13983124494552612
epoch: 111 training_loss 0.13772559620440006 test_loss: 0.12125033140182495
epoch: 112 training_loss 0.13521381106227637 test_loss: 0.14539674520492554
epoch: 113 training_loss 0.14345390599220992 test_loss: 0.12464257478713989
epoch: 114 training_loss 0.1374143273383379 test_loss: 0.1409180998802185
epoch: 115 training_loss 0.1354241993650794 test_loss: 0.12342864274978638
epoch: 116 training_loss 0.13904448136687278 test_loss: 0.12352646589279175
epoch: 117 training_loss 0.13052694015204908 test_loss: 0.15191047191619872
epoch: 118 training_loss 0.13479766711592675 test_loss: 0.1296635627746582
epoch: 119 training_loss 0.12364936795085668 test_loss: 0.13023241758346557
epoch: 120 training_loss 0.13469278879463673 test_loss: 0.13978511095046997
epoch: 121 training_loss 0.13187715094536542 test_loss: 0.11963236331939697
epoch: 122 training_loss 0.13305143624544144 test_loss: 0.14936953783035278
epoch: 123 training_loss 0.1317792707309127 test_loss: 0.13835407495498658
epoch: 124 training_loss 0.13801252368837594 test_loss: 0.13712379932403565
epoch: 125 training_loss 0.12954683192074298 test_loss: 0.12833861112594605
epoch: 126 training_loss 0.13064653802663087 test_loss: 0.13667992353439332
epoch: 127 training_loss 0.13307518176734448 test_loss: 0.1302754282951355
epoch: 128 training_loss 0.12542511340230703 test_loss: 0.12463587522506714
epoch: 129 training_loss 0.1311267387866974 test_loss: 0.15014930963516235
epoch: 130 training_loss 0.13728587996214628 test_loss: 0.1417130708694458
epoch: 131 training_loss 0.1352401238307357 test_loss: 0.1255470871925354
epoch: 132 training_loss 0.13508882232010364 test_loss: 0.12664135694503784
epoch: 133 training_loss 0.13415514830499886 test_loss: 0.12835447788238524
epoch: 134 training_loss 0.13345019849017262 test_loss: 0.1269839882850647
epoch: 135 training_loss 0.13669327061623335 test_loss: 0.12220679521560669
epoch: 136 training_loss 0.13361215747892857 test_loss: 0.1254772424697876
epoch: 137 training_loss 0.14225333739072085 test_loss: 0.14399144649505616
epoch: 138 training_loss 0.1401990770548582 test_loss: 0.1251225471496582
epoch: 139 training_loss 0.13639397405087947 test_loss: 0.14295369386672974
epoch: 140 training_loss 0.13059918710961937 test_loss: 0.13591171503067018
epoch: 141 training_loss 0.12617080938071012 test_loss: 0.14907275438308715
epoch: 142 training_loss 0.12795614991337062 test_loss: 0.12209101915359497
epoch: 143 training_loss 0.13200467102229596 test_loss: 0.1337076187133789
epoch: 144 training_loss 0.1349804188683629 test_loss: 0.12289103269577026
epoch: 145 training_loss 0.1294575136899948 test_loss: 0.12831379175186158
epoch: 146 training_loss 0.13842170268297196 test_loss: 0.138994038105011
epoch: 147 training_loss 0.13681073747575284 test_loss: 0.140780770778656
epoch: 148 training_loss 0.13976448629051447 test_loss: 0.12460473775863648
epoch: 149 training_loss 0.13600175980478524 test_loss: 0.15282477140426637
epoch: 0 training_loss 7.611512064933777 test_loss: 4.410821533203125
epoch: 1 training_loss 3.306490557193756 test_loss: 2.541671371459961
epoch: 2 training_loss 2.263011338710785 test_loss: 1.9758314132690429
epoch: 3 training_loss 1.7757155191898346 test_loss: 1.6485933303833007
epoch: 4 training_loss 1.5210058104991913 test_loss: 1.4030678749084473
epoch: 5 training_loss 1.3603167700767518 test_loss: 1.3595483779907227
epoch: 6 training_loss 1.2124529844522476 test_loss: 1.2074959754943848
epoch: 7 training_loss 1.1878731501102449 test_loss: 1.1553940773010254
epoch: 8 training_loss 1.0852677309513092 test_loss: 1.0842529296875
epoch: 9 training_loss 1.0269639164209365 test_loss: 0.9625297546386719
epoch: 10 training_loss 0.9958776468038559 test_loss: 1.017486000061035
epoch: 11 training_loss 0.9581834530830383 test_loss: 0.9161696434020996
epoch: 12 training_loss 0.9095063865184784 test_loss: 0.8783413887023925
epoch: 13 training_loss 0.8995461529493332 test_loss: 0.8799862861633301
epoch: 14 training_loss 0.8554147613048554 test_loss: 0.8492874145507813
epoch: 15 training_loss 0.8550705283880233 test_loss: 0.8730062484741211
epoch: 16 training_loss 0.8136019659042358 test_loss: 0.8323323249816894
epoch: 17 training_loss 0.804282330274582 test_loss: 0.80899076461792
epoch: 18 training_loss 0.8035592818260193 test_loss: 0.7819035053253174
epoch: 19 training_loss 0.7742044651508331 test_loss: 0.7564141273498535
epoch: 20 training_loss 0.7773748052120208 test_loss: 0.8197761535644531
epoch: 21 training_loss 0.7573406183719635 test_loss: 0.7782889366149902
epoch: 22 training_loss 0.7538962072134018 test_loss: 0.7873508930206299
epoch: 23 training_loss 0.7385062158107758 test_loss: 0.7553377628326416
epoch: 24 training_loss 0.7147780948877335 test_loss: 0.7137067794799805
epoch: 25 training_loss 0.7070039737224579 test_loss: 0.6975274085998535
epoch: 26 training_loss 0.6989789760112762 test_loss: 0.6847169399261475
epoch: 27 training_loss 0.6922446769475937 test_loss: 0.6737230300903321
epoch: 28 training_loss 0.6961277395486831 test_loss: 0.690242862701416
epoch: 29 training_loss 0.679112976193428 test_loss: 0.6695475101470947
epoch: 30 training_loss 0.694601114988327 test_loss: 0.6914453983306885
epoch: 31 training_loss 0.6689604079723358 test_loss: 0.6848270893096924
epoch: 32 training_loss 0.6700791084766388 test_loss: 0.6668010711669922
epoch: 33 training_loss 0.6917433816194535 test_loss: 0.6405621528625488
epoch: 34 training_loss 0.6476153260469437 test_loss: 0.6430874824523926
epoch: 35 training_loss 0.655855159163475 test_loss: 0.664251184463501
epoch: 36 training_loss 0.6396581447124481 test_loss: 0.6541901111602784
epoch: 37 training_loss 0.6397247850894928 test_loss: 0.6539501667022705
epoch: 38 training_loss 0.6382723480463028 test_loss: 0.6397291660308838
epoch: 39 training_loss 0.6269484174251556 test_loss: 0.6544330596923829
epoch: 40 training_loss 0.634680587053299 test_loss: 0.6278644561767578
epoch: 41 training_loss 0.6400030767917633 test_loss: 0.6458044528961182
epoch: 42 training_loss 0.6385655134916306 test_loss: 0.6041895389556885
epoch: 43 training_loss 0.6261592662334442 test_loss: 0.5960779666900635
epoch: 44 training_loss 0.6275309777259827 test_loss: 0.5968609809875488
epoch: 45 training_loss 0.6150193279981613 test_loss: 0.592653465270996
epoch: 46 training_loss 0.6106364309787751 test_loss: 0.6020553588867188
epoch: 47 training_loss 0.6073786336183548 test_loss: 0.6180423259735107
epoch: 48 training_loss 0.5889308661222458 test_loss: 0.57808837890625
epoch: 49 training_loss 0.6213913416862488 test_loss: 0.6207162380218506
epoch: 50 training_loss 0.5978466230630874 test_loss: 0.616844654083252
epoch: 51 training_loss 0.5813180702924728 test_loss: 0.5640387058258056
epoch: 52 training_loss 0.5730096358060837 test_loss: 0.5841371059417725
epoch: 53 training_loss 0.5817696249485016 test_loss: 0.5597362518310547
epoch: 54 training_loss 0.5845763689279556 test_loss: 0.6277174949645996
epoch: 55 training_loss 0.5864500522613525 test_loss: 0.5594680786132813
epoch: 56 training_loss 0.5820742058753967 test_loss: 0.5943161487579346
epoch: 57 training_loss 0.5711240699887276 test_loss: 0.5962705612182617
epoch: 58 training_loss 0.5728339329361916 test_loss: 0.5553022861480713
epoch: 59 training_loss 0.5817463764548302 test_loss: 0.5661309242248536
epoch: 60 training_loss 0.5884830263257027 test_loss: 0.5629671096801758
epoch: 61 training_loss 0.5627312070131302 test_loss: 0.5701291561126709
epoch: 62 training_loss 0.555479206442833 test_loss: 0.5555356025695801
epoch: 63 training_loss 0.55818039894104 test_loss: 0.5586394309997559
epoch: 64 training_loss 0.5791516581177711 test_loss: 0.5320506572723389
epoch: 65 training_loss 0.5609189903736115 test_loss: 0.546933650970459
epoch: 66 training_loss 0.5550166314840317 test_loss: 0.5769893169403076
epoch: 67 training_loss 0.5460982581973076 test_loss: 0.5511927604675293
epoch: 68 training_loss 0.5598137179017066 test_loss: 0.5317992210388184
epoch: 69 training_loss 0.5399526762962341 test_loss: 0.5279601097106934
epoch: 70 training_loss 0.5490250259637832 test_loss: 0.5144989967346192
epoch: 71 training_loss 0.5543447908759117 test_loss: 0.5345460414886475
epoch: 72 training_loss 0.5433027955889702 test_loss: 0.5140904903411865
epoch: 73 training_loss 0.5352141135931014 test_loss: 0.5541316509246826
epoch: 74 training_loss 0.5337756794691085 test_loss: 0.5425374031066894
epoch: 75 training_loss 0.5314517414569855 test_loss: 0.5474704265594482
epoch: 76 training_loss 0.534680378139019 test_loss: 0.5146519184112549
epoch: 77 training_loss 0.5434068274497986 test_loss: 0.517300033569336
epoch: 78 training_loss 0.5229309123754501 test_loss: 0.5411322593688965
epoch: 79 training_loss 0.5196298304200172 test_loss: 0.5206088542938232
epoch: 80 training_loss 0.5269225215911866 test_loss: 0.5237997055053711
epoch: 81 training_loss 0.5289338698983193 test_loss: 0.5514855861663819
epoch: 82 training_loss 0.5310229563713074 test_loss: 0.51602783203125
epoch: 83 training_loss 0.5269668355584145 test_loss: 0.5170856952667237
epoch: 84 training_loss 0.5214045917987824 test_loss: 0.5152902126312255
epoch: 85 training_loss 0.5234292152523995 test_loss: 0.5268120288848877
epoch: 86 training_loss 0.5272702491283416 test_loss: 0.522067928314209
epoch: 87 training_loss 0.5227343779802323 test_loss: 0.5236284732818604
epoch: 88 training_loss 0.5249224498867988 test_loss: 0.5298160076141357
epoch: 89 training_loss 0.5188357844948769 test_loss: 0.5293692111968994
epoch: 90 training_loss 0.5250390872359276 test_loss: 0.5349430084228516
epoch: 91 training_loss 0.49884257137775423 test_loss: 0.5195427417755127
epoch: 92 training_loss 0.5027006813883781 test_loss: 0.5003736972808838
epoch: 93 training_loss 0.505663332939148 test_loss: 0.5170895099639893
epoch: 94 training_loss 0.5077811062335968 test_loss: 0.5063690662384033
epoch: 95 training_loss 0.5099431562423706 test_loss: 0.4994203567504883
epoch: 96 training_loss 0.5078994849324227 test_loss: 0.5067651271820068
epoch: 97 training_loss 0.5197913753986358 test_loss: 0.5332730770111084
epoch: 98 training_loss 0.5079105505347252 test_loss: 0.5278666496276856
epoch: 99 training_loss 0.5097196826338768 test_loss: 0.5051993370056153
epoch: 100 training_loss 0.4994171795248985 test_loss: 0.5061344146728516
epoch: 101 training_loss 0.5031909102201462 test_loss: 0.518088960647583
epoch: 102 training_loss 0.5071802413463593 test_loss: 0.5088173389434815
epoch: 103 training_loss 0.49468312472105025 test_loss: 0.5284038543701172
epoch: 104 training_loss 0.5027149474620819 test_loss: 0.5023346900939941
epoch: 105 training_loss 0.49094871908426285 test_loss: 0.5099453926086426
epoch: 106 training_loss 0.49059456914663313 test_loss: 0.49857749938964846
epoch: 107 training_loss 0.48674889385700226 test_loss: 0.490964937210083
epoch: 108 training_loss 0.5001565632224083 test_loss: 0.4943988800048828
epoch: 109 training_loss 0.4915172478556633 test_loss: 0.5043580532073975
epoch: 110 training_loss 0.4847642961144447 test_loss: 0.5038689613342285
epoch: 111 training_loss 0.49193606823682784 test_loss: 0.4913483619689941
epoch: 112 training_loss 0.4977205115556717 test_loss: 0.5033483505249023
epoch: 113 training_loss 0.48816899955272675 test_loss: 0.5020562171936035
epoch: 114 training_loss 0.4972699964046478 test_loss: 0.496079683303833
epoch: 115 training_loss 0.4917794269323349 test_loss: 0.4818274974822998
epoch: 116 training_loss 0.4839645135402679 test_loss: 0.4992982864379883
epoch: 117 training_loss 0.48688800394535064 test_loss: 0.48475008010864257
epoch: 118 training_loss 0.48321301996707916 test_loss: 0.500098705291748
epoch: 119 training_loss 0.49069226562976836 test_loss: 0.5057896137237549
epoch: 120 training_loss 0.4906567427515984 test_loss: 0.49719557762145994
epoch: 121 training_loss 0.478723609149456 test_loss: 0.5061110973358154
epoch: 122 training_loss 0.4914719325304031 test_loss: 0.48845696449279785
epoch: 123 training_loss 0.49011482268571854 test_loss: 0.49822444915771485
epoch: 124 training_loss 0.480572929084301 test_loss: 0.48768153190612795
epoch: 125 training_loss 0.4819760954380035 test_loss: 0.4819373607635498
epoch: 126 training_loss 0.4809012681245804 test_loss: 0.5331310272216797
epoch: 127 training_loss 0.4861675751209259 test_loss: 0.4781622886657715
epoch: 128 training_loss 0.4825705263018608 test_loss: 0.4778470516204834
epoch: 129 training_loss 0.47663442194461825 test_loss: 0.47411360740661623
epoch: 130 training_loss 0.47750255703926087 test_loss: 0.4886291027069092
epoch: 131 training_loss 0.48797773241996767 test_loss: 0.48665714263916016
epoch: 132 training_loss 0.47597735941410063 test_loss: 0.47986278533935545
epoch: 133 training_loss 0.4851043784618378 test_loss: 0.5008039951324463
epoch: 134 training_loss 0.4740951853990555 test_loss: 0.4851529598236084
epoch: 135 training_loss 0.48285726219415664 test_loss: 0.47702341079711913
epoch: 136 training_loss 0.482622829079628 test_loss: 0.4731149673461914
epoch: 137 training_loss 0.4763539582490921 test_loss: 0.47972927093505857
epoch: 138 training_loss 0.4684145426750183 test_loss: 0.4613179206848145
epoch: 139 training_loss 0.4713191422820091 test_loss: 0.4803736686706543
epoch: 140 training_loss 0.47249010324478147 test_loss: 0.49571890830993653
epoch: 141 training_loss 0.47882815390825273 test_loss: 0.49669957160949707
epoch: 142 training_loss 0.46885733634233473 test_loss: 0.47322812080383303
epoch: 143 training_loss 0.4665839496254921 test_loss: 0.47749032974243166
epoch: 144 training_loss 0.47124798327684403 test_loss: 0.4723190784454346
epoch: 145 training_loss 0.4661612978577614 test_loss: 0.46530818939208984
epoch: 146 training_loss 0.4710822319984436 test_loss: 0.4701897144317627
epoch: 147 training_loss 0.4702626660466194 test_loss: 0.4802420139312744
epoch: 148 training_loss 0.46949357986450196 test_loss: 0.49930591583251954
epoch: 149 training_loss 0.4722404819726944 test_loss: 0.4672715187072754
3191.347622683322
episode: 0 training return: tensor(-674.9086, device='cuda:0')
episode: 1 training return: tensor(-107.1499, device='cuda:0')
episode: 2 training return: tensor(-92.6301, device='cuda:0')
episode: 3 training return: tensor(-82.4019, device='cuda:0')
epoch: 1 test_true_pfm: 3204.8192817918157 sim_pfm: -101.04545042344641
episode: 4 training return: tensor(-651.2466, device='cuda:0')
episode: 5 training return: tensor(-125.6652, device='cuda:0')
episode: 6 training return: tensor(-675.5519, device='cuda:0')
episode: 7 training return: tensor(-72.4398, device='cuda:0')
epoch: 2 test_true_pfm: 1951.8849611063372 sim_pfm: -103.31186284523574
episode: 8 training return: tensor(-666.6234, device='cuda:0')
episode: 9 training return: tensor(-110.8352, device='cuda:0')
episode: 10 training return: tensor(-711.8856, device='cuda:0')
episode: 11 training return: tensor(-136.3580, device='cuda:0')
epoch: 3 test_true_pfm: 3178.091904127195 sim_pfm: -131.0757793438097
episode: 12 training return: tensor(-714.3727, device='cuda:0')
episode: 13 training return: tensor(-677.0758, device='cuda:0')
episode: 14 training return: tensor(-125.6510, device='cuda:0')
episode: 15 training return: tensor(-271.1549, device='cuda:0')
epoch: 4 test_true_pfm: 1488.854296990945 sim_pfm: -425.36425184534164
episode: 16 training return: tensor(-630.2677, device='cuda:0')
episode: 17 training return: tensor(-121.4296, device='cuda:0')
episode: 18 training return: tensor(-124.9349, device='cuda:0')
episode: 19 training return: tensor(-121.0735, device='cuda:0')
epoch: 5 test_true_pfm: 1413.5002889600607 sim_pfm: -648.0729924697467
episode: 20 training return: tensor(-676.9518, device='cuda:0')
episode: 21 training return: tensor(-622.1047, device='cuda:0')
episode: 22 training return: tensor(-401.3203, device='cuda:0')
episode: 23 training return: tensor(-644.9386, device='cuda:0')
epoch: 6 test_true_pfm: 2825.0708066916463 sim_pfm: -191.457639762831
episode: 24 training return: tensor(-709.2494, device='cuda:0')
episode: 25 training return: tensor(-196.7259, device='cuda:0')
episode: 26 training return: tensor(-652.8743, device='cuda:0')
episode: 27 training return: tensor(-669.5410, device='cuda:0')
epoch: 7 test_true_pfm: 2471.724695191823 sim_pfm: -481.13452274810214
episode: 28 training return: tensor(-709.5837, device='cuda:0')
episode: 29 training return: tensor(-181.9091, device='cuda:0')
episode: 30 training return: tensor(-124.4173, device='cuda:0')
episode: 31 training return: tensor(-635.0341, device='cuda:0')
epoch: 8 test_true_pfm: 3210.047498492268 sim_pfm: -255.14405945958183
episode: 32 training return: tensor(-115.8308, device='cuda:0')
episode: 33 training return: tensor(-649.2886, device='cuda:0')
episode: 34 training return: tensor(-133.2407, device='cuda:0')
episode: 35 training return: tensor(-681.5323, device='cuda:0')
epoch: 9 test_true_pfm: 2733.6492670701027 sim_pfm: -86.65639746237623
episode: 36 training return: tensor(-123.7591, device='cuda:0')
episode: 37 training return: tensor(-111.8865, device='cuda:0')
episode: 38 training return: tensor(-142.8059, device='cuda:0')
episode: 39 training return: tensor(-179.2089, device='cuda:0')
epoch: 10 test_true_pfm: 3232.977425878595 sim_pfm: -381.2354977389817
episode: 40 training return: tensor(-87.9915, device='cuda:0')
episode: 41 training return: tensor(-709.0539, device='cuda:0')
episode: 42 training return: tensor(-112.1496, device='cuda:0')
episode: 43 training return: tensor(-670.5545, device='cuda:0')
epoch: 11 test_true_pfm: 3173.0458276414665 sim_pfm: -152.67736112027583
episode: 44 training return: tensor(-346.1129, device='cuda:0')
episode: 45 training return: tensor(-71.1895, device='cuda:0')
episode: 46 training return: tensor(-354.9988, device='cuda:0')
episode: 47 training return: tensor(-135.7500, device='cuda:0')
epoch: 12 test_true_pfm: 3274.8045990497653 sim_pfm: -241.12014041889537
episode: 48 training return: tensor(-663.5785, device='cuda:0')
episode: 49 training return: tensor(-101.4685, device='cuda:0')
episode: 50 training return: tensor(-85.0608, device='cuda:0')
episode: 51 training return: tensor(-121.8952, device='cuda:0')
epoch: 13 test_true_pfm: 3224.873079360494 sim_pfm: -77.7430793812285
episode: 52 training return: tensor(-115.0166, device='cuda:0')
episode: 53 training return: tensor(-76.1177, device='cuda:0')
episode: 54 training return: tensor(-76.4720, device='cuda:0')
episode: 55 training return: tensor(-101.0821, device='cuda:0')
epoch: 14 test_true_pfm: 2906.7584425625487 sim_pfm: -215.96709196656593
episode: 56 training return: tensor(-123.8505, device='cuda:0')
episode: 57 training return: tensor(-101.4504, device='cuda:0')
episode: 58 training return: tensor(-101.7090, device='cuda:0')
episode: 59 training return: tensor(-113.4861, device='cuda:0')
epoch: 15 test_true_pfm: 3188.467061782094 sim_pfm: -178.1215962008379
episode: 60 training return: tensor(-676.8300, device='cuda:0')
episode: 61 training return: tensor(-101.4295, device='cuda:0')
episode: 62 training return: tensor(-604.4829, device='cuda:0')
episode: 63 training return: tensor(-643.0904, device='cuda:0')
epoch: 16 test_true_pfm: 2806.488109787663 sim_pfm: -125.71951140736928
episode: 64 training return: tensor(-128.8715, device='cuda:0')
episode: 65 training return: tensor(-90.1946, device='cuda:0')
episode: 66 training return: tensor(-124.2770, device='cuda:0')
episode: 67 training return: tensor(-146.1023, device='cuda:0')
epoch: 17 test_true_pfm: 2822.4411754323155 sim_pfm: -141.72830169349132
episode: 68 training return: tensor(-118.8841, device='cuda:0')
episode: 69 training return: tensor(-92.3110, device='cuda:0')
episode: 70 training return: tensor(-175.7656, device='cuda:0')
episode: 71 training return: tensor(-87.9703, device='cuda:0')
epoch: 18 test_true_pfm: 3093.3704250578653 sim_pfm: -137.47843291829727
episode: 72 training return: tensor(-123.1056, device='cuda:0')
episode: 73 training return: tensor(-120.1581, device='cuda:0')
episode: 74 training return: tensor(-99.4691, device='cuda:0')
episode: 75 training return: tensor(-85.2568, device='cuda:0')
epoch: 19 test_true_pfm: 3213.1890896016253 sim_pfm: -99.8518334441081
episode: 76 training return: tensor(-90.0790, device='cuda:0')
episode: 77 training return: tensor(-116.3440, device='cuda:0')
episode: 78 training return: tensor(-114.7318, device='cuda:0')
episode: 79 training return: tensor(-71.4723, device='cuda:0')
epoch: 20 test_true_pfm: 3174.5668883481217 sim_pfm: -213.85396694520023
episode: 80 training return: tensor(-97.0024, device='cuda:0')
episode: 81 training return: tensor(-127.5931, device='cuda:0')
episode: 82 training return: tensor(-68.2798, device='cuda:0')
episode: 83 training return: tensor(-131.7891, device='cuda:0')
epoch: 21 test_true_pfm: 3189.5176635874436 sim_pfm: -117.79100326516588
episode: 84 training return: tensor(-98.5671, device='cuda:0')
episode: 85 training return: tensor(-120.3949, device='cuda:0')
episode: 86 training return: tensor(-101.2450, device='cuda:0')
episode: 87 training return: tensor(-130.0044, device='cuda:0')
epoch: 22 test_true_pfm: 3182.9523002865285 sim_pfm: -147.08965248965737
episode: 88 training return: tensor(-97.0349, device='cuda:0')
episode: 89 training return: tensor(-100.4011, device='cuda:0')
episode: 90 training return: tensor(-161.5632, device='cuda:0')
episode: 91 training return: tensor(-103.2255, device='cuda:0')
epoch: 23 test_true_pfm: 3186.362472352441 sim_pfm: -82.55300848882568
episode: 92 training return: tensor(-92.5249, device='cuda:0')
episode: 93 training return: tensor(-122.5099, device='cuda:0')
episode: 94 training return: tensor(-100.0704, device='cuda:0')
episode: 95 training return: tensor(-118.6823, device='cuda:0')
epoch: 24 test_true_pfm: 3162.723298720111 sim_pfm: -134.50170200531525
episode: 96 training return: tensor(-97.6616, device='cuda:0')
episode: 97 training return: tensor(-671.5949, device='cuda:0')
episode: 98 training return: tensor(-101.8475, device='cuda:0')
episode: 99 training return: tensor(-109.7516, device='cuda:0')
epoch: 25 test_true_pfm: 3193.1585251562683 sim_pfm: -114.69725485922147
episode: 100 training return: tensor(-71.1934, device='cuda:0')
episode: 101 training return: tensor(-166.2426, device='cuda:0')
episode: 102 training return: tensor(-104.4772, device='cuda:0')
episode: 103 training return: tensor(-247.8960, device='cuda:0')
epoch: 26 test_true_pfm: 2848.9734476260946 sim_pfm: -105.77450462077589
episode: 104 training return: tensor(-118.1107, device='cuda:0')
episode: 105 training return: tensor(-98.3480, device='cuda:0')
episode: 106 training return: tensor(-118.8742, device='cuda:0')
episode: 107 training return: tensor(-106.4866, device='cuda:0')
epoch: 27 test_true_pfm: 3164.250010820346 sim_pfm: -89.62584812214482
episode: 108 training return: tensor(-71.5053, device='cuda:0')
episode: 109 training return: tensor(-89.9114, device='cuda:0')
episode: 110 training return: tensor(-97.8843, device='cuda:0')
episode: 111 training return: tensor(-627.9332, device='cuda:0')
epoch: 28 test_true_pfm: 3157.2389789599442 sim_pfm: -123.1094461606505
episode: 112 training return: tensor(-105.7703, device='cuda:0')
episode: 113 training return: tensor(-664.2065, device='cuda:0')
episode: 114 training return: tensor(-78.2262, device='cuda:0')
episode: 115 training return: tensor(-128.3394, device='cuda:0')
epoch: 29 test_true_pfm: 3211.605155776515 sim_pfm: -87.7092978139214
episode: 116 training return: tensor(-118.7525, device='cuda:0')
episode: 117 training return: tensor(-105.1866, device='cuda:0')
episode: 118 training return: tensor(-474.3758, device='cuda:0')
episode: 119 training return: tensor(-118.2671, device='cuda:0')
epoch: 30 test_true_pfm: 2657.8848817799912 sim_pfm: -59.00414087583582
episode: 120 training return: tensor(-103.0965, device='cuda:0')
episode: 121 training return: tensor(-73.7040, device='cuda:0')
episode: 122 training return: tensor(-139.8167, device='cuda:0')
episode: 123 training return: tensor(-460.7495, device='cuda:0')
epoch: 31 test_true_pfm: 3162.751469062155 sim_pfm: -111.73763977683848
episode: 124 training return: tensor(-90.9542, device='cuda:0')
episode: 125 training return: tensor(-628.9291, device='cuda:0')
episode: 126 training return: tensor(-115.7927, device='cuda:0')
episode: 127 training return: tensor(-684.2647, device='cuda:0')
epoch: 32 test_true_pfm: 3230.591494692894 sim_pfm: -80.13821715395898
episode: 128 training return: tensor(-91.5207, device='cuda:0')
episode: 129 training return: tensor(-78.0202, device='cuda:0')
episode: 130 training return: tensor(-635.2775, device='cuda:0')
episode: 131 training return: tensor(-87.4488, device='cuda:0')
epoch: 33 test_true_pfm: 3216.0879795668225 sim_pfm: -68.71199087138909
episode: 132 training return: tensor(-107.0494, device='cuda:0')
episode: 133 training return: tensor(-556.5527, device='cuda:0')
episode: 134 training return: tensor(-644.3282, device='cuda:0')
episode: 135 training return: tensor(-105.1394, device='cuda:0')
epoch: 34 test_true_pfm: 3209.7471299412887 sim_pfm: -77.06937386960878
episode: 136 training return: tensor(-92.4159, device='cuda:0')
episode: 137 training return: tensor(-87.5631, device='cuda:0')
episode: 138 training return: tensor(-79.2896, device='cuda:0')
episode: 139 training return: tensor(-85.5104, device='cuda:0')
epoch: 35 test_true_pfm: 3210.671629344466 sim_pfm: -84.46987126903453
episode: 140 training return: tensor(-76.8806, device='cuda:0')
episode: 141 training return: tensor(-683.7812, device='cuda:0')
episode: 142 training return: tensor(-96.6403, device='cuda:0')
episode: 143 training return: tensor(-110.1098, device='cuda:0')
epoch: 36 test_true_pfm: 3242.864944185451 sim_pfm: -79.47652705437697
episode: 144 training return: tensor(-629.0834, device='cuda:0')
episode: 145 training return: tensor(-60.0114, device='cuda:0')
episode: 146 training return: tensor(-103.8556, device='cuda:0')
episode: 147 training return: tensor(-111.8247, device='cuda:0')
epoch: 37 test_true_pfm: 3224.192816832794 sim_pfm: -91.49007447646
episode: 148 training return: tensor(-66.6298, device='cuda:0')
episode: 149 training return: tensor(-118.7825, device='cuda:0')
episode: 150 training return: tensor(-98.9996, device='cuda:0')
episode: 151 training return: tensor(-131.3428, device='cuda:0')
epoch: 38 test_true_pfm: 3207.8754119158743 sim_pfm: -87.33304290945914
episode: 152 training return: tensor(-633.7808, device='cuda:0')
episode: 153 training return: tensor(-79.0360, device='cuda:0')
episode: 154 training return: tensor(-603.2355, device='cuda:0')
episode: 155 training return: tensor(-95.9113, device='cuda:0')
epoch: 39 test_true_pfm: 3190.687144942232 sim_pfm: -106.93674260618475
episode: 156 training return: tensor(-660.2260, device='cuda:0')
episode: 157 training return: tensor(-80.3165, device='cuda:0')
episode: 158 training return: tensor(-340.7032, device='cuda:0')
episode: 159 training return: tensor(-623.6307, device='cuda:0')
epoch: 40 test_true_pfm: 3227.118277788179 sim_pfm: -92.10619701118169
episode: 160 training return: tensor(-104.9190, device='cuda:0')
episode: 161 training return: tensor(-159.0371, device='cuda:0')
episode: 162 training return: tensor(-170.5399, device='cuda:0')
episode: 163 training return: tensor(-627.9432, device='cuda:0')
epoch: 41 test_true_pfm: 3231.29859090919 sim_pfm: -98.0032679446352
episode: 164 training return: tensor(-106.1643, device='cuda:0')
episode: 165 training return: tensor(-138.7164, device='cuda:0')
episode: 166 training return: tensor(-92.2077, device='cuda:0')
episode: 167 training return: tensor(-87.0606, device='cuda:0')
epoch: 42 test_true_pfm: 3216.433651470852 sim_pfm: -67.1807561844762
episode: 168 training return: tensor(-87.7473, device='cuda:0')
episode: 169 training return: tensor(-52.2840, device='cuda:0')
episode: 170 training return: tensor(-62.5375, device='cuda:0')
episode: 171 training return: tensor(-122.4944, device='cuda:0')
epoch: 43 test_true_pfm: 3220.8508141701464 sim_pfm: -103.57853825834657
episode: 172 training return: tensor(-635.8302, device='cuda:0')
episode: 173 training return: tensor(-75.2107, device='cuda:0')
episode: 174 training return: tensor(-50.1400, device='cuda:0')
episode: 175 training return: tensor(-380.3238, device='cuda:0')
epoch: 44 test_true_pfm: 3191.315942972058 sim_pfm: -100.1294881196615
episode: 176 training return: tensor(-116.4328, device='cuda:0')
episode: 177 training return: tensor(-708.5307, device='cuda:0')
episode: 178 training return: tensor(-323.0033, device='cuda:0')
episode: 179 training return: tensor(-67.7187, device='cuda:0')
epoch: 45 test_true_pfm: 3240.01435542722 sim_pfm: -71.1271337675668
episode: 180 training return: tensor(-109.9492, device='cuda:0')
episode: 181 training return: tensor(-73.2688, device='cuda:0')
episode: 182 training return: tensor(-607.8428, device='cuda:0')
episode: 183 training return: tensor(-103.3380, device='cuda:0')
epoch: 46 test_true_pfm: 3257.0455316806947 sim_pfm: -68.04516810219502
episode: 184 training return: tensor(-673.1544, device='cuda:0')
episode: 185 training return: tensor(-661.4531, device='cuda:0')
episode: 186 training return: tensor(-89.1869, device='cuda:0')
episode: 187 training return: tensor(-84.4649, device='cuda:0')
epoch: 47 test_true_pfm: 3229.074532135724 sim_pfm: -74.8461179874721
episode: 188 training return: tensor(-319.1023, device='cuda:0')
episode: 189 training return: tensor(-104.2678, device='cuda:0')
episode: 190 training return: tensor(-731.3380, device='cuda:0')
episode: 191 training return: tensor(-699.6080, device='cuda:0')
epoch: 48 test_true_pfm: 3207.082596760345 sim_pfm: -96.11639143174398
episode: 192 training return: tensor(-57.3722, device='cuda:0')
episode: 193 training return: tensor(-97.4474, device='cuda:0')
episode: 194 training return: tensor(-115.0667, device='cuda:0')
episode: 195 training return: tensor(-71.0233, device='cuda:0')
epoch: 49 test_true_pfm: 3237.4853659423334 sim_pfm: -61.353454391501145
episode: 196 training return: tensor(-593.4643, device='cuda:0')
episode: 197 training return: tensor(-554.9293, device='cuda:0')
episode: 198 training return: tensor(-696.9150, device='cuda:0')
episode: 199 training return: tensor(-68.4187, device='cuda:0')
epoch: 50 test_true_pfm: 3235.6356209339015 sim_pfm: -70.83658715430647
episode: 200 training return: tensor(-92.8731, device='cuda:0')
episode: 201 training return: tensor(-75.1465, device='cuda:0')
episode: 202 training return: tensor(-93.2158, device='cuda:0')
episode: 203 training return: tensor(-108.6317, device='cuda:0')
epoch: 51 test_true_pfm: 3235.6038690549835 sim_pfm: -62.33537033998679
episode: 204 training return: tensor(-86.1471, device='cuda:0')
episode: 205 training return: tensor(-93.5896, device='cuda:0')
episode: 206 training return: tensor(-71.6091, device='cuda:0')
episode: 207 training return: tensor(-93.4618, device='cuda:0')
epoch: 52 test_true_pfm: 3230.9795371157975 sim_pfm: -93.43733279007331
episode: 208 training return: tensor(-387.8734, device='cuda:0')
episode: 209 training return: tensor(-59.7390, device='cuda:0')
episode: 210 training return: tensor(-83.6987, device='cuda:0')
episode: 211 training return: tensor(-108.6687, device='cuda:0')
epoch: 53 test_true_pfm: 3227.790964280461 sim_pfm: -71.61331679072464
episode: 212 training return: tensor(-96.8911, device='cuda:0')
episode: 213 training return: tensor(-476.3591, device='cuda:0')
episode: 214 training return: tensor(-442.1518, device='cuda:0')
episode: 215 training return: tensor(-83.3543, device='cuda:0')
epoch: 54 test_true_pfm: 3238.813298100891 sim_pfm: -75.84903135183656
episode: 216 training return: tensor(-103.3797, device='cuda:0')
episode: 217 training return: tensor(-91.4010, device='cuda:0')
episode: 218 training return: tensor(-72.0798, device='cuda:0')
episode: 219 training return: tensor(-101.1841, device='cuda:0')
epoch: 55 test_true_pfm: 3249.54408123234 sim_pfm: -74.64080500708467
episode: 220 training return: tensor(-118.5833, device='cuda:0')
episode: 221 training return: tensor(-347.0632, device='cuda:0')
episode: 222 training return: tensor(-630.7807, device='cuda:0')
episode: 223 training return: tensor(-103.7965, device='cuda:0')
epoch: 56 test_true_pfm: 3221.9402801893316 sim_pfm: -70.30343168576171
episode: 224 training return: tensor(-80.4452, device='cuda:0')
episode: 225 training return: tensor(-92.4575, device='cuda:0')
episode: 226 training return: tensor(-75.1727, device='cuda:0')
episode: 227 training return: tensor(-92.3437, device='cuda:0')
epoch: 57 test_true_pfm: 3207.330180515372 sim_pfm: -72.04796515535175
episode: 228 training return: tensor(-618.7258, device='cuda:0')
episode: 229 training return: tensor(-70.4348, device='cuda:0')
episode: 230 training return: tensor(-102.2187, device='cuda:0')
episode: 231 training return: tensor(-553.1826, device='cuda:0')
epoch: 58 test_true_pfm: 3235.9906318563935 sim_pfm: -78.94438847509446
episode: 232 training return: tensor(-429.0732, device='cuda:0')
episode: 233 training return: tensor(-48.5323, device='cuda:0')
episode: 234 training return: tensor(-68.4474, device='cuda:0')
episode: 235 training return: tensor(-471.2792, device='cuda:0')
epoch: 59 test_true_pfm: 3231.756541903213 sim_pfm: -79.16180775346584
episode: 236 training return: tensor(-38.0331, device='cuda:0')
episode: 237 training return: tensor(-91.3464, device='cuda:0')
episode: 238 training return: tensor(-384.6284, device='cuda:0')
episode: 239 training return: tensor(-348.7829, device='cuda:0')
epoch: 60 test_true_pfm: 3218.489664793588 sim_pfm: -66.93809100073607
episode: 240 training return: tensor(-45.6009, device='cuda:0')
episode: 241 training return: tensor(-120.2960, device='cuda:0')
episode: 242 training return: tensor(-99.3952, device='cuda:0')
episode: 243 training return: tensor(-704.1782, device='cuda:0')
epoch: 61 test_true_pfm: 3215.3164830630726 sim_pfm: -66.00499253967428
episode: 244 training return: tensor(-243.9511, device='cuda:0')
episode: 245 training return: tensor(-363.9839, device='cuda:0')
episode: 246 training return: tensor(-709.9194, device='cuda:0')
episode: 247 training return: tensor(-72.3718, device='cuda:0')
epoch: 62 test_true_pfm: 3224.8598278014474 sim_pfm: -67.2003264745678
episode: 248 training return: tensor(-67.3510, device='cuda:0')
episode: 249 training return: tensor(-89.4998, device='cuda:0')
episode: 250 training return: tensor(-74.9420, device='cuda:0')
episode: 251 training return: tensor(-314.3188, device='cuda:0')
epoch: 63 test_true_pfm: 3228.46335230524 sim_pfm: -77.37479717987783
episode: 252 training return: tensor(-100.6746, device='cuda:0')
episode: 253 training return: tensor(-460.2907, device='cuda:0')
episode: 254 training return: tensor(-705.4955, device='cuda:0')
episode: 255 training return: tensor(-459.2023, device='cuda:0')
epoch: 64 test_true_pfm: 3236.915394441547 sim_pfm: -81.5666559706636
episode: 256 training return: tensor(-67.7926, device='cuda:0')
episode: 257 training return: tensor(-82.7150, device='cuda:0')
episode: 258 training return: tensor(-96.7116, device='cuda:0')
episode: 259 training return: tensor(-702.1889, device='cuda:0')
epoch: 65 test_true_pfm: 3233.222688275286 sim_pfm: -63.75081760529429
episode: 260 training return: tensor(-102.4849, device='cuda:0')
episode: 261 training return: tensor(-97.6860, device='cuda:0')
episode: 262 training return: tensor(-628.3281, device='cuda:0')
episode: 263 training return: tensor(-71.4343, device='cuda:0')
epoch: 66 test_true_pfm: 3217.9228814181693 sim_pfm: -84.49918572119593
episode: 264 training return: tensor(-94.6904, device='cuda:0')
episode: 265 training return: tensor(-319.3002, device='cuda:0')
episode: 266 training return: tensor(-662.9257, device='cuda:0')
episode: 267 training return: tensor(-712.4299, device='cuda:0')
epoch: 67 test_true_pfm: 3130.555920576284 sim_pfm: -84.37511227658251
episode: 268 training return: tensor(-84.3984, device='cuda:0')
episode: 269 training return: tensor(-76.8595, device='cuda:0')
episode: 270 training return: tensor(-164.4143, device='cuda:0')
episode: 271 training return: tensor(-106.5519, device='cuda:0')
epoch: 68 test_true_pfm: 3245.145483207123 sim_pfm: -69.60559945099521
episode: 272 training return: tensor(-59.8308, device='cuda:0')
episode: 273 training return: tensor(-392.9497, device='cuda:0')
episode: 274 training return: tensor(-84.8539, device='cuda:0')
episode: 275 training return: tensor(-54.1025, device='cuda:0')
epoch: 69 test_true_pfm: 3223.1341779456397 sim_pfm: -64.64376336914331
episode: 276 training return: tensor(-90.6767, device='cuda:0')
episode: 277 training return: tensor(-25.2097, device='cuda:0')
episode: 278 training return: tensor(-85.0432, device='cuda:0')
episode: 279 training return: tensor(-86.1343, device='cuda:0')
epoch: 70 test_true_pfm: 3229.68983107681 sim_pfm: -73.85505579938763
episode: 280 training return: tensor(-81.9491, device='cuda:0')
episode: 281 training return: tensor(-88.9057, device='cuda:0')
episode: 282 training return: tensor(-82.3394, device='cuda:0')
episode: 283 training return: tensor(-234.2152, device='cuda:0')
epoch: 71 test_true_pfm: 3236.392570419043 sim_pfm: -61.8289190508173
episode: 284 training return: tensor(-77.1937, device='cuda:0')
episode: 285 training return: tensor(-102.6605, device='cuda:0')
episode: 286 training return: tensor(-87.8203, device='cuda:0')
episode: 287 training return: tensor(-692.0669, device='cuda:0')
epoch: 72 test_true_pfm: 3234.630668457437 sim_pfm: -80.09807381866267
episode: 288 training return: tensor(-80.8015, device='cuda:0')
episode: 289 training return: tensor(-129.4239, device='cuda:0')
episode: 290 training return: tensor(-78.1193, device='cuda:0')
episode: 291 training return: tensor(-92.7190, device='cuda:0')
epoch: 73 test_true_pfm: 3248.2033791943704 sim_pfm: -71.40944418406191
episode: 292 training return: tensor(-625.4965, device='cuda:0')
episode: 293 training return: tensor(-74.2528, device='cuda:0')
episode: 294 training return: tensor(-640.0042, device='cuda:0')
episode: 295 training return: tensor(-89.3558, device='cuda:0')
epoch: 74 test_true_pfm: 3218.5724291288493 sim_pfm: -63.135643181701504
episode: 296 training return: tensor(-83.9289, device='cuda:0')
episode: 297 training return: tensor(-394.3059, device='cuda:0')
episode: 298 training return: tensor(-90.6439, device='cuda:0')
episode: 299 training return: tensor(-75.1304, device='cuda:0')
epoch: 75 test_true_pfm: 3243.750378719886 sim_pfm: -70.34557521631359
episode: 300 training return: tensor(-706.9927, device='cuda:0')
episode: 301 training return: tensor(-375.0567, device='cuda:0')
episode: 302 training return: tensor(-70.5399, device='cuda:0')
episode: 303 training return: tensor(-110.1779, device='cuda:0')
epoch: 76 test_true_pfm: 3239.1088903470772 sim_pfm: -73.04008066563983
episode: 304 training return: tensor(-419.5120, device='cuda:0')
episode: 305 training return: tensor(-75.8710, device='cuda:0')
episode: 306 training return: tensor(-43.9030, device='cuda:0')
episode: 307 training return: tensor(-73.8760, device='cuda:0')
epoch: 77 test_true_pfm: 3231.283169680919 sim_pfm: -82.33561084768735
episode: 308 training return: tensor(-113.2381, device='cuda:0')
episode: 309 training return: tensor(-664.5019, device='cuda:0')
episode: 310 training return: tensor(-78.2138, device='cuda:0')
episode: 311 training return: tensor(-90.4285, device='cuda:0')
epoch: 78 test_true_pfm: 3235.4215171772435 sim_pfm: -63.696078729369525
episode: 312 training return: tensor(-85.3943, device='cuda:0')
episode: 313 training return: tensor(-134.2037, device='cuda:0')
episode: 314 training return: tensor(-91.8090, device='cuda:0')
episode: 315 training return: tensor(-76.3891, device='cuda:0')
epoch: 79 test_true_pfm: 3220.6626360172145 sim_pfm: -79.85771534424082
episode: 316 training return: tensor(-61.6770, device='cuda:0')
episode: 317 training return: tensor(-89.6399, device='cuda:0')
episode: 318 training return: tensor(-61.8537, device='cuda:0')
episode: 319 training return: tensor(-68.8617, device='cuda:0')
epoch: 80 test_true_pfm: 3240.5891901020664 sim_pfm: -63.69130151798405
episode: 320 training return: tensor(-438.6947, device='cuda:0')
episode: 321 training return: tensor(-362.3349, device='cuda:0')
episode: 322 training return: tensor(-573.9083, device='cuda:0')
episode: 323 training return: tensor(-709.9756, device='cuda:0')
epoch: 81 test_true_pfm: 3229.457821304705 sim_pfm: -60.21071994154287
episode: 324 training return: tensor(-59.5370, device='cuda:0')
episode: 325 training return: tensor(-88.1111, device='cuda:0')
episode: 326 training return: tensor(-78.1952, device='cuda:0')
episode: 327 training return: tensor(-81.7333, device='cuda:0')
epoch: 82 test_true_pfm: 3245.3065503151083 sim_pfm: -68.08767931748298
episode: 328 training return: tensor(-51.8724, device='cuda:0')
episode: 329 training return: tensor(-156.2033, device='cuda:0')
episode: 330 training return: tensor(-81.8278, device='cuda:0')
episode: 331 training return: tensor(-47.7067, device='cuda:0')
epoch: 83 test_true_pfm: 3244.428150333142 sim_pfm: -65.6098582004585
episode: 332 training return: tensor(-79.0058, device='cuda:0')
episode: 333 training return: tensor(-111.4548, device='cuda:0')
episode: 334 training return: tensor(-626.8447, device='cuda:0')
episode: 335 training return: tensor(-711.6719, device='cuda:0')
epoch: 84 test_true_pfm: 2826.849880436875 sim_pfm: -74.19668373737174
episode: 336 training return: tensor(-706.6791, device='cuda:0')
episode: 337 training return: tensor(-89.2366, device='cuda:0')
episode: 338 training return: tensor(-90.9589, device='cuda:0')
episode: 339 training return: tensor(-71.0941, device='cuda:0')
epoch: 85 test_true_pfm: 3247.4419242720655 sim_pfm: -64.78895977175368
episode: 340 training return: tensor(-672.7273, device='cuda:0')
episode: 341 training return: tensor(-81.5575, device='cuda:0')
episode: 342 training return: tensor(-708.6271, device='cuda:0')
episode: 343 training return: tensor(-266.6333, device='cuda:0')
epoch: 86 test_true_pfm: 2815.6563306512617 sim_pfm: -70.16998757173617
episode: 344 training return: tensor(-61.3488, device='cuda:0')
episode: 345 training return: tensor(-73.4613, device='cuda:0')
episode: 346 training return: tensor(-119.4901, device='cuda:0')
episode: 347 training return: tensor(-78.8311, device='cuda:0')
epoch: 87 test_true_pfm: 3237.7798782402256 sim_pfm: -78.0556081449613
episode: 348 training return: tensor(-636.0117, device='cuda:0')
episode: 349 training return: tensor(-96.7516, device='cuda:0')
episode: 350 training return: tensor(-64.5903, device='cuda:0')
episode: 351 training return: tensor(-471.8564, device='cuda:0')
epoch: 88 test_true_pfm: 3236.2636365567323 sim_pfm: -77.48173957365604
episode: 352 training return: tensor(-44.5312, device='cuda:0')
episode: 353 training return: tensor(-103.2495, device='cuda:0')
episode: 354 training return: tensor(-96.7388, device='cuda:0')
episode: 355 training return: tensor(-94.3902, device='cuda:0')
epoch: 89 test_true_pfm: 3237.3607131420904 sim_pfm: -68.72594726439759
episode: 356 training return: tensor(-84.8779, device='cuda:0')
episode: 357 training return: tensor(-90.1908, device='cuda:0')
episode: 358 training return: tensor(-125.3642, device='cuda:0')
episode: 359 training return: tensor(-89.5128, device='cuda:0')
epoch: 90 test_true_pfm: 3240.572880009336 sim_pfm: -72.76127946772613
episode: 360 training return: tensor(-64.7127, device='cuda:0')
episode: 361 training return: tensor(-351.1120, device='cuda:0')
episode: 362 training return: tensor(-74.9761, device='cuda:0')
episode: 363 training return: tensor(-99.6005, device='cuda:0')
epoch: 91 test_true_pfm: 3234.79010293696 sim_pfm: -72.48660637949554
episode: 364 training return: tensor(-97.5404, device='cuda:0')
episode: 365 training return: tensor(-700.6857, device='cuda:0')
episode: 366 training return: tensor(-110.5891, device='cuda:0')
episode: 367 training return: tensor(-91.0383, device='cuda:0')
epoch: 92 test_true_pfm: 3231.822609056085 sim_pfm: -69.58337001384159
episode: 368 training return: tensor(-62.4099, device='cuda:0')
episode: 369 training return: tensor(-75.0288, device='cuda:0')
episode: 370 training return: tensor(-216.3052, device='cuda:0')
episode: 371 training return: tensor(-71.4908, device='cuda:0')
epoch: 93 test_true_pfm: 3236.8762692995474 sim_pfm: -60.70863407605793
episode: 372 training return: tensor(-112.8112, device='cuda:0')
episode: 373 training return: tensor(-87.5185, device='cuda:0')
episode: 374 training return: tensor(-473.7350, device='cuda:0')
episode: 375 training return: tensor(-710.6427, device='cuda:0')
epoch: 94 test_true_pfm: 3243.381258888683 sim_pfm: -70.22006422152238
episode: 376 training return: tensor(-77.5013, device='cuda:0')
episode: 377 training return: tensor(-113.3736, device='cuda:0')
episode: 378 training return: tensor(-110.9726, device='cuda:0')
episode: 379 training return: tensor(-83.1373, device='cuda:0')
epoch: 95 test_true_pfm: 3232.139448314481 sim_pfm: -70.88939017310622
episode: 380 training return: tensor(-81.0475, device='cuda:0')
episode: 381 training return: tensor(-80.6353, device='cuda:0')
episode: 382 training return: tensor(-78.7291, device='cuda:0')
episode: 383 training return: tensor(-109.9909, device='cuda:0')
epoch: 96 test_true_pfm: 3215.1811487772247 sim_pfm: -84.37332710216288
episode: 384 training return: tensor(-82.4168, device='cuda:0')
episode: 385 training return: tensor(-710.0948, device='cuda:0')
episode: 386 training return: tensor(-78.1229, device='cuda:0')
episode: 387 training return: tensor(-631.4308, device='cuda:0')
epoch: 97 test_true_pfm: 3239.669475556784 sim_pfm: -73.25881140798447
episode: 388 training return: tensor(-301.7383, device='cuda:0')
episode: 389 training return: tensor(-105.4491, device='cuda:0')
episode: 390 training return: tensor(-75.6500, device='cuda:0')
episode: 391 training return: tensor(-81.5654, device='cuda:0')
epoch: 98 test_true_pfm: 3237.5080211656204 sim_pfm: -70.05495658956352
episode: 392 training return: tensor(-83.9401, device='cuda:0')
episode: 393 training return: tensor(-624.4464, device='cuda:0')
episode: 394 training return: tensor(-625.8113, device='cuda:0')
episode: 395 training return: tensor(-466.3844, device='cuda:0')
epoch: 99 test_true_pfm: 3230.9548498897298 sim_pfm: -79.55153584531702
episode: 396 training return: tensor(-386.3145, device='cuda:0')
episode: 397 training return: tensor(0.6409, device='cuda:0')
episode: 398 training return: tensor(-46.3471, device='cuda:0')
episode: 399 training return: tensor(-536.4922, device='cuda:0')
epoch: 100 test_true_pfm: 3216.3808919722082 sim_pfm: -67.75320105495241
episode: 400 training return: tensor(-118.5021, device='cuda:0')
episode: 401 training return: tensor(-95.9888, device='cuda:0')
episode: 402 training return: tensor(-694.7670, device='cuda:0')
episode: 403 training return: tensor(-594.9320, device='cuda:0')
epoch: 101 test_true_pfm: 3220.8315705448963 sim_pfm: -77.90103400771234
episode: 404 training return: tensor(-74.5325, device='cuda:0')
episode: 405 training return: tensor(-68.2020, device='cuda:0')
episode: 406 training return: tensor(-69.8602, device='cuda:0')
episode: 407 training return: tensor(-106.1445, device='cuda:0')
epoch: 102 test_true_pfm: 3243.451657369396 sim_pfm: -70.94247617391132
episode: 408 training return: tensor(-98.4957, device='cuda:0')
episode: 409 training return: tensor(-83.9753, device='cuda:0')
episode: 410 training return: tensor(-96.3226, device='cuda:0')
episode: 411 training return: tensor(-73.5517, device='cuda:0')
epoch: 103 test_true_pfm: 3237.17803805109 sim_pfm: -60.544602694785375
episode: 412 training return: tensor(-88.5988, device='cuda:0')
episode: 413 training return: tensor(-83.4223, device='cuda:0')
episode: 414 training return: tensor(-71.0095, device='cuda:0')
episode: 415 training return: tensor(-68.8304, device='cuda:0')
epoch: 104 test_true_pfm: 3241.8385952612794 sim_pfm: -73.43699603194061
episode: 416 training return: tensor(-82.8123, device='cuda:0')
episode: 417 training return: tensor(-107.3487, device='cuda:0')
episode: 418 training return: tensor(-90.0869, device='cuda:0')
episode: 419 training return: tensor(-77.2194, device='cuda:0')
epoch: 105 test_true_pfm: 3237.9848146226086 sim_pfm: -63.35318902309518
episode: 420 training return: tensor(-87.9364, device='cuda:0')
episode: 421 training return: tensor(-95.0548, device='cuda:0')
episode: 422 training return: tensor(-83.2279, device='cuda:0')
episode: 423 training return: tensor(-102.6096, device='cuda:0')
epoch: 106 test_true_pfm: 3232.522146475281 sim_pfm: -87.80162362351741
episode: 424 training return: tensor(-700.5126, device='cuda:0')
episode: 425 training return: tensor(-53.9783, device='cuda:0')
episode: 426 training return: tensor(-84.9195, device='cuda:0')
episode: 427 training return: tensor(-544.4821, device='cuda:0')
epoch: 107 test_true_pfm: 3218.231411535127 sim_pfm: -78.29878241250601
episode: 428 training return: tensor(-73.6764, device='cuda:0')
episode: 429 training return: tensor(-90.2336, device='cuda:0')
episode: 430 training return: tensor(-91.4523, device='cuda:0')
episode: 431 training return: tensor(-75.0250, device='cuda:0')
epoch: 108 test_true_pfm: 3235.2953278417795 sim_pfm: -64.56882340960631
episode: 432 training return: tensor(-316.4653, device='cuda:0')
episode: 433 training return: tensor(-87.3715, device='cuda:0')
episode: 434 training return: tensor(-75.1675, device='cuda:0')
episode: 435 training return: tensor(-73.1323, device='cuda:0')
epoch: 109 test_true_pfm: 3220.167487243693 sim_pfm: -71.26184432090183
episode: 436 training return: tensor(-515.2141, device='cuda:0')
episode: 437 training return: tensor(-683.6454, device='cuda:0')
episode: 438 training return: tensor(-69.8380, device='cuda:0')
episode: 439 training return: tensor(-47.9104, device='cuda:0')
epoch: 110 test_true_pfm: 3247.9626909772755 sim_pfm: -65.77577700292265
episode: 440 training return: tensor(-58.1119, device='cuda:0')
episode: 441 training return: tensor(-79.6379, device='cuda:0')
episode: 442 training return: tensor(-74.5585, device='cuda:0')
episode: 443 training return: tensor(-557.4385, device='cuda:0')
epoch: 111 test_true_pfm: 3230.4393658234585 sim_pfm: -70.89548645063769
episode: 444 training return: tensor(-153.8019, device='cuda:0')
episode: 445 training return: tensor(-68.0371, device='cuda:0')
episode: 446 training return: tensor(-700.5925, device='cuda:0')
episode: 447 training return: tensor(-534.7396, device='cuda:0')
epoch: 112 test_true_pfm: 3239.0265072915604 sim_pfm: -67.25004210711147
episode: 448 training return: tensor(-84.2018, device='cuda:0')
episode: 449 training return: tensor(-68.2528, device='cuda:0')
episode: 450 training return: tensor(-90.1218, device='cuda:0')
episode: 451 training return: tensor(-392.0434, device='cuda:0')
epoch: 113 test_true_pfm: 3222.8711183423125 sim_pfm: -86.59723263484193
episode: 452 training return: tensor(-71.2118, device='cuda:0')
episode: 453 training return: tensor(-702.4725, device='cuda:0')
episode: 454 training return: tensor(-133.2481, device='cuda:0')
episode: 455 training return: tensor(-584.5319, device='cuda:0')
epoch: 114 test_true_pfm: 3239.6504222761237 sim_pfm: -60.42273173779055
episode: 456 training return: tensor(-380.1059, device='cuda:0')
episode: 457 training return: tensor(-93.8102, device='cuda:0')
episode: 458 training return: tensor(-81.2455, device='cuda:0')
episode: 459 training return: tensor(-35.9853, device='cuda:0')
epoch: 115 test_true_pfm: 2833.6877241377383 sim_pfm: -63.560949241063405
episode: 460 training return: tensor(-80.9694, device='cuda:0')
episode: 461 training return: tensor(-100.5414, device='cuda:0')
episode: 462 training return: tensor(-83.1185, device='cuda:0')
episode: 463 training return: tensor(-105.9654, device='cuda:0')
epoch: 116 test_true_pfm: 3237.3157915739243 sim_pfm: -66.81944931093797
episode: 464 training return: tensor(-94.6789, device='cuda:0')
episode: 465 training return: tensor(-655.4682, device='cuda:0')
episode: 466 training return: tensor(-126.7466, device='cuda:0')
episode: 467 training return: tensor(-166.1743, device='cuda:0')
epoch: 117 test_true_pfm: 3250.3951317485603 sim_pfm: -57.89288709553269
episode: 468 training return: tensor(-619.4142, device='cuda:0')
episode: 469 training return: tensor(-103.4288, device='cuda:0')
episode: 470 training return: tensor(-597.1015, device='cuda:0')
episode: 471 training return: tensor(-75.6525, device='cuda:0')
epoch: 118 test_true_pfm: 3227.991931227269 sim_pfm: -68.33646094685537
episode: 472 training return: tensor(-327.2169, device='cuda:0')
episode: 473 training return: tensor(-90.8836, device='cuda:0')
episode: 474 training return: tensor(-77.6035, device='cuda:0')
episode: 475 training return: tensor(-4.1240, device='cuda:0')
epoch: 119 test_true_pfm: 3232.052798890283 sim_pfm: -69.781598683078
episode: 476 training return: tensor(-625.3805, device='cuda:0')
episode: 477 training return: tensor(-77.6223, device='cuda:0')
episode: 478 training return: tensor(-705.7366, device='cuda:0')
episode: 479 training return: tensor(-109.6161, device='cuda:0')
epoch: 120 test_true_pfm: 3243.783836951137 sim_pfm: -63.764767002062094
episode: 480 training return: tensor(-82.9045, device='cuda:0')
episode: 481 training return: tensor(-82.9032, device='cuda:0')
episode: 482 training return: tensor(-96.7941, device='cuda:0')
episode: 483 training return: tensor(-100.7607, device='cuda:0')
epoch: 121 test_true_pfm: 3227.577527389812 sim_pfm: -65.57561675437803
episode: 484 training return: tensor(-684.6680, device='cuda:0')
episode: 485 training return: tensor(-62.8164, device='cuda:0')
episode: 486 training return: tensor(-323.6023, device='cuda:0')
episode: 487 training return: tensor(-88.8147, device='cuda:0')
epoch: 122 test_true_pfm: 3241.4885002125834 sim_pfm: -62.095059125150634
episode: 488 training return: tensor(-73.5504, device='cuda:0')
episode: 489 training return: tensor(-97.4605, device='cuda:0')
episode: 490 training return: tensor(-317.8333, device='cuda:0')
episode: 491 training return: tensor(-66.1383, device='cuda:0')
epoch: 123 test_true_pfm: 3244.6351690710403 sim_pfm: -69.72414458345156
episode: 492 training return: tensor(-406.6025, device='cuda:0')
episode: 493 training return: tensor(-480.5949, device='cuda:0')
episode: 494 training return: tensor(-78.1358, device='cuda:0')
episode: 495 training return: tensor(-63.3378, device='cuda:0')
epoch: 124 test_true_pfm: 3221.642998324997 sim_pfm: -71.5911601165232
episode: 496 training return: tensor(-123.2135, device='cuda:0')
episode: 497 training return: tensor(-116.5625, device='cuda:0')
episode: 498 training return: tensor(-62.3495, device='cuda:0')
episode: 499 training return: tensor(-395.6503, device='cuda:0')
epoch: 125 test_true_pfm: 3239.954449587452 sim_pfm: -63.91450121936699
episode: 500 training return: tensor(-112.8807, device='cuda:0')
episode: 501 training return: tensor(-67.1122, device='cuda:0')
episode: 502 training return: tensor(-88.4742, device='cuda:0')
episode: 503 training return: tensor(-104.3619, device='cuda:0')
epoch: 126 test_true_pfm: 3234.482354785158 sim_pfm: -71.02936818436137
episode: 504 training return: tensor(-94.8060, device='cuda:0')
episode: 505 training return: tensor(-708.4688, device='cuda:0')
episode: 506 training return: tensor(-63.5132, device='cuda:0')
episode: 507 training return: tensor(-103.7754, device='cuda:0')
epoch: 127 test_true_pfm: 3202.056837677514 sim_pfm: -67.51705358914721
episode: 508 training return: tensor(-80.3288, device='cuda:0')
episode: 509 training return: tensor(-74.6958, device='cuda:0')
episode: 510 training return: tensor(-83.8693, device='cuda:0')
episode: 511 training return: tensor(-91.2022, device='cuda:0')
epoch: 128 test_true_pfm: 3232.1011551484894 sim_pfm: -78.14248355078355
episode: 512 training return: tensor(-103.3922, device='cuda:0')
episode: 513 training return: tensor(-71.4358, device='cuda:0')
episode: 514 training return: tensor(-50.7567, device='cuda:0')
episode: 515 training return: tensor(-65.7523, device='cuda:0')
epoch: 129 test_true_pfm: 3250.811673139981 sim_pfm: -71.16056236063984
episode: 516 training return: tensor(-97.3863, device='cuda:0')
episode: 517 training return: tensor(-475.3306, device='cuda:0')
episode: 518 training return: tensor(-78.7889, device='cuda:0')
episode: 519 training return: tensor(-0.9533, device='cuda:0')
epoch: 130 test_true_pfm: 3219.459634786021 sim_pfm: -72.48092356618145
episode: 520 training return: tensor(-68.9657, device='cuda:0')
episode: 521 training return: tensor(-535.2272, device='cuda:0')
episode: 522 training return: tensor(-78.8054, device='cuda:0')
episode: 523 training return: tensor(-630.8145, device='cuda:0')
epoch: 131 test_true_pfm: 3232.0525731181183 sim_pfm: -67.98538720042173
episode: 524 training return: tensor(-77.6497, device='cuda:0')
episode: 525 training return: tensor(-581.3983, device='cuda:0')
episode: 526 training return: tensor(-362.9677, device='cuda:0')
episode: 527 training return: tensor(-74.9798, device='cuda:0')
epoch: 132 test_true_pfm: 3239.121987556337 sim_pfm: -69.56973204742341
episode: 528 training return: tensor(-76.6680, device='cuda:0')
episode: 529 training return: tensor(-306.0872, device='cuda:0')
episode: 530 training return: tensor(-76.7192, device='cuda:0')
episode: 531 training return: tensor(-72.3558, device='cuda:0')
epoch: 133 test_true_pfm: 3226.8585221218505 sim_pfm: -66.22777412401047
episode: 532 training return: tensor(-38.4321, device='cuda:0')
episode: 533 training return: tensor(-413.4205, device='cuda:0')
episode: 534 training return: tensor(-75.3022, device='cuda:0')
episode: 535 training return: tensor(-711.2278, device='cuda:0')
epoch: 134 test_true_pfm: 3241.123557832658 sim_pfm: -59.21099282676975
episode: 536 training return: tensor(-584.8568, device='cuda:0')
episode: 537 training return: tensor(-707.4485, device='cuda:0')
episode: 538 training return: tensor(-35.0245, device='cuda:0')
episode: 539 training return: tensor(-85.2882, device='cuda:0')
epoch: 135 test_true_pfm: 3237.2837252914965 sim_pfm: -74.86901228578063
episode: 540 training return: tensor(-116.4695, device='cuda:0')
episode: 541 training return: tensor(-617.5859, device='cuda:0')
episode: 542 training return: tensor(-68.1522, device='cuda:0')
episode: 543 training return: tensor(-95.0755, device='cuda:0')
epoch: 136 test_true_pfm: 3237.171210668159 sim_pfm: -164.2388967300552
episode: 544 training return: tensor(-52.4239, device='cuda:0')
episode: 545 training return: tensor(-73.7113, device='cuda:0')
episode: 546 training return: tensor(-111.7810, device='cuda:0')
episode: 547 training return: tensor(-79.1772, device='cuda:0')
epoch: 137 test_true_pfm: 3215.504419579897 sim_pfm: -76.21060360234696
episode: 548 training return: tensor(-110.1851, device='cuda:0')
episode: 549 training return: tensor(-545.3058, device='cuda:0')
episode: 550 training return: tensor(-81.5885, device='cuda:0')
episode: 551 training return: tensor(-88.1696, device='cuda:0')
epoch: 138 test_true_pfm: 3235.379943433481 sim_pfm: -124.03546191128164
episode: 552 training return: tensor(-711.1940, device='cuda:0')
episode: 553 training return: tensor(-87.6389, device='cuda:0')
episode: 554 training return: tensor(4.2149, device='cuda:0')
episode: 555 training return: tensor(-70.1072, device='cuda:0')
epoch: 139 test_true_pfm: 3233.1942178921768 sim_pfm: -75.66014749512154
episode: 556 training return: tensor(-73.7991, device='cuda:0')
episode: 557 training return: tensor(-485.8954, device='cuda:0')
episode: 558 training return: tensor(-103.8069, device='cuda:0')
episode: 559 training return: tensor(-102.4413, device='cuda:0')
epoch: 140 test_true_pfm: 3214.9649301421173 sim_pfm: -60.18607760895005
episode: 560 training return: tensor(-642.8446, device='cuda:0')
episode: 561 training return: tensor(-106.8183, device='cuda:0')
episode: 562 training return: tensor(-608.1792, device='cuda:0')
episode: 563 training return: tensor(-310.5213, device='cuda:0')
epoch: 141 test_true_pfm: 3232.8178965041916 sim_pfm: -61.24863102345262
episode: 564 training return: tensor(-130.5093, device='cuda:0')
episode: 565 training return: tensor(-58.4138, device='cuda:0')
episode: 566 training return: tensor(-395.2224, device='cuda:0')
episode: 567 training return: tensor(-98.4400, device='cuda:0')
epoch: 142 test_true_pfm: 3246.438443973832 sim_pfm: -72.27039279048525
episode: 568 training return: tensor(-113.5181, device='cuda:0')
episode: 569 training return: tensor(-67.8162, device='cuda:0')
episode: 570 training return: tensor(-388.0905, device='cuda:0')
episode: 571 training return: tensor(-79.5508, device='cuda:0')
epoch: 143 test_true_pfm: 3240.865497142005 sim_pfm: -76.00685340787943
episode: 572 training return: tensor(-624.7314, device='cuda:0')
episode: 573 training return: tensor(-64.4610, device='cuda:0')
episode: 574 training return: tensor(-84.2090, device='cuda:0')
episode: 575 training return: tensor(-90.9119, device='cuda:0')
epoch: 144 test_true_pfm: 3247.461940064782 sim_pfm: -76.37737021958067
episode: 576 training return: tensor(-569.8665, device='cuda:0')
episode: 577 training return: tensor(-566.7781, device='cuda:0')
episode: 578 training return: tensor(-95.8732, device='cuda:0')
episode: 579 training return: tensor(-104.0113, device='cuda:0')
epoch: 145 test_true_pfm: 3256.981673500877 sim_pfm: -61.103883406797344
episode: 580 training return: tensor(-86.6562, device='cuda:0')
episode: 581 training return: tensor(-626.8625, device='cuda:0')
episode: 582 training return: tensor(-79.5658, device='cuda:0')
episode: 583 training return: tensor(-74.3250, device='cuda:0')
epoch: 146 test_true_pfm: 3231.119418119232 sim_pfm: -58.75371327517011
episode: 584 training return: tensor(-395.5296, device='cuda:0')
episode: 585 training return: tensor(-77.2329, device='cuda:0')
episode: 586 training return: tensor(-86.2520, device='cuda:0')
episode: 587 training return: tensor(-80.5532, device='cuda:0')
epoch: 147 test_true_pfm: 3231.712527145932 sim_pfm: -74.84046119371972
episode: 588 training return: tensor(-110.9513, device='cuda:0')
episode: 589 training return: tensor(-462.6833, device='cuda:0')
episode: 590 training return: tensor(-71.1401, device='cuda:0')
episode: 591 training return: tensor(-455.3598, device='cuda:0')
epoch: 148 test_true_pfm: 3215.4790280063294 sim_pfm: -78.49436464870814
episode: 592 training return: tensor(-82.7245, device='cuda:0')
episode: 593 training return: tensor(-544.8782, device='cuda:0')
episode: 594 training return: tensor(-71.5564, device='cuda:0')
episode: 595 training return: tensor(-88.9482, device='cuda:0')
epoch: 149 test_true_pfm: 3219.88430709277 sim_pfm: -60.84349441293549
episode: 596 training return: tensor(-554.0004, device='cuda:0')
episode: 597 training return: tensor(-88.8970, device='cuda:0')
episode: 598 training return: tensor(-61.5664, device='cuda:0')
episode: 599 training return: tensor(-658.1214, device='cuda:0')
epoch: 150 test_true_pfm: 2914.1307543967414 sim_pfm: -87.59019651422084
