['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'behavior', '--traj', 'expert', '--seed', '2']
epoch: 0 training_loss 0.2965970242768526 test_loss: 0.1899438977241516
epoch: 1 training_loss 0.20582520484924316 test_loss: 0.17103416919708253
epoch: 2 training_loss 0.17376006931066512 test_loss: 0.1885640025138855
epoch: 3 training_loss 0.17918377287685872 test_loss: 0.1557065486907959
epoch: 4 training_loss 0.15709040068089963 test_loss: 0.2283797025680542
epoch: 5 training_loss 0.16252306170761585 test_loss: 0.19386942386627198
epoch: 6 training_loss 0.15758531898260117 test_loss: 0.15577185153961182
epoch: 7 training_loss 0.15176152423024178 test_loss: 0.15448439121246338
epoch: 8 training_loss 0.1582564401626587 test_loss: 0.12493412494659424
epoch: 9 training_loss 0.1449604357779026 test_loss: 0.13064994812011718
epoch: 10 training_loss 0.1484720486961305 test_loss: 0.16117322444915771
epoch: 11 training_loss 0.144109418168664 test_loss: 0.16206573247909545
epoch: 12 training_loss 0.15401032730937003 test_loss: 0.12870349884033203
epoch: 13 training_loss 0.1456311023235321 test_loss: 0.15255838632583618
epoch: 14 training_loss 0.1519076406210661 test_loss: 0.14338762760162355
epoch: 15 training_loss 0.15048281084746123 test_loss: 0.13496123552322387
epoch: 16 training_loss 0.14350081872195006 test_loss: 0.15126125812530516
epoch: 17 training_loss 0.1489239839836955 test_loss: 0.13383454084396362
epoch: 18 training_loss 0.12950814910233022 test_loss: 0.14882789850234984
epoch: 19 training_loss 0.14975540537387133 test_loss: 0.13464112281799318
epoch: 20 training_loss 0.15455419953912497 test_loss: 0.15418813228607178
epoch: 21 training_loss 0.14293442096561193 test_loss: 0.13034017086029054
epoch: 22 training_loss 0.13913722041994334 test_loss: 0.12806320190429688
epoch: 23 training_loss 0.13995044775307178 test_loss: 0.1244226336479187
epoch: 24 training_loss 0.1529626253247261 test_loss: 0.14887217283248902
epoch: 25 training_loss 0.13923306852579118 test_loss: 0.1296652674674988
epoch: 26 training_loss 0.1534353543445468 test_loss: 0.1286574125289917
epoch: 27 training_loss 0.1438459948450327 test_loss: 0.12293792963027954
epoch: 28 training_loss 0.13611867055296897 test_loss: 0.13089253902435302
epoch: 29 training_loss 0.1492666144669056 test_loss: 0.1219475269317627
epoch: 30 training_loss 0.13712362036108972 test_loss: 0.1353705644607544
epoch: 31 training_loss 0.1374654071033001 test_loss: 0.14683531522750853
epoch: 32 training_loss 0.13779884457588196 test_loss: 0.14565309286117553
epoch: 33 training_loss 0.13727104131132364 test_loss: 0.13623284101486205
epoch: 34 training_loss 0.13863481558859347 test_loss: 0.12900063991546631
epoch: 35 training_loss 0.14976566724479198 test_loss: 0.1351301074028015
epoch: 36 training_loss 0.14339626125991345 test_loss: 0.1349421501159668
epoch: 37 training_loss 0.1313025014102459 test_loss: 0.12983845472335814
epoch: 38 training_loss 0.13607092015445232 test_loss: 0.12981175184249877
epoch: 39 training_loss 0.14114518295973538 test_loss: 0.1368364453315735
epoch: 40 training_loss 0.14390808314085007 test_loss: 0.1332497000694275
epoch: 41 training_loss 0.14090648092329502 test_loss: 0.15378450155258178
epoch: 42 training_loss 0.1414240826666355 test_loss: 0.11920031309127807
epoch: 43 training_loss 0.13859949976205826 test_loss: 0.14167721271514894
epoch: 44 training_loss 0.14535357870161533 test_loss: 0.13924254179000856
epoch: 45 training_loss 0.14239422980695962 test_loss: 0.12755860090255738
epoch: 46 training_loss 0.13544319860637188 test_loss: 0.1432052493095398
epoch: 47 training_loss 0.1422741001099348 test_loss: 0.14401309490203856
epoch: 48 training_loss 0.1392764722555876 test_loss: 0.13345321416854858
epoch: 49 training_loss 0.13889509785920381 test_loss: 0.1408410906791687
epoch: 50 training_loss 0.133918435908854 test_loss: 0.13402270078659057
epoch: 51 training_loss 0.14592027358710766 test_loss: 0.14296326637268067
epoch: 52 training_loss 0.13750333327800035 test_loss: 0.1427693247795105
epoch: 53 training_loss 0.13271749239414932 test_loss: 0.1276955485343933
epoch: 54 training_loss 0.13894539166241884 test_loss: 0.13283742666244508
epoch: 55 training_loss 0.13928963787853718 test_loss: 0.14106459617614747
epoch: 56 training_loss 0.14891294011846185 test_loss: 0.14291211366653442
epoch: 57 training_loss 0.1359095260873437 test_loss: 0.13592867851257323
epoch: 58 training_loss 0.13661032408475876 test_loss: 0.13224570751190184
epoch: 59 training_loss 0.1410663702711463 test_loss: 0.13345531225204468
epoch: 60 training_loss 0.13880258809775115 test_loss: 0.13823131322860718
epoch: 61 training_loss 0.13911412969231607 test_loss: 0.13963435888290404
epoch: 62 training_loss 0.12841031178832055 test_loss: 0.13427311182022095
epoch: 63 training_loss 0.14307807493954897 test_loss: 0.1244322419166565
epoch: 64 training_loss 0.1419876591861248 test_loss: 0.14375588893890381
epoch: 65 training_loss 0.1424798746407032 test_loss: 0.14426932334899903
epoch: 66 training_loss 0.13475478205829858 test_loss: 0.13187519311904908
epoch: 67 training_loss 0.14128713898360729 test_loss: 0.15098586082458496
epoch: 68 training_loss 0.14122247781604527 test_loss: 0.12968000173568725
epoch: 69 training_loss 0.127516632899642 test_loss: 0.12596817016601564
epoch: 70 training_loss 0.13614210691303014 test_loss: 0.12903181314468384
epoch: 71 training_loss 0.1388740747794509 test_loss: 0.13553003072738648
epoch: 72 training_loss 0.1364123711362481 test_loss: 0.13371187448501587
epoch: 73 training_loss 0.13949848454445601 test_loss: 0.14811431169509887
epoch: 74 training_loss 0.1473266077414155 test_loss: 0.13573886156082154
epoch: 75 training_loss 0.13343412782996894 test_loss: 0.12674009799957275
epoch: 76 training_loss 0.14223717082291842 test_loss: 0.12333022356033325
epoch: 77 training_loss 0.14099458683282137 test_loss: 0.1349969983100891
epoch: 78 training_loss 0.14002471681684256 test_loss: 0.14478343725204468
epoch: 79 training_loss 0.13638794850558042 test_loss: 0.153630793094635
epoch: 80 training_loss 0.13876542855054141 test_loss: 0.12806202173233033
epoch: 81 training_loss 0.13886507328599693 test_loss: 0.1312715530395508
epoch: 82 training_loss 0.1323224701359868 test_loss: 0.13804434537887572
epoch: 83 training_loss 0.14054244183003903 test_loss: 0.15819252729415895
epoch: 84 training_loss 0.1396448418498039 test_loss: 0.13906493186950683
epoch: 85 training_loss 0.1366275403648615 test_loss: 0.11689996719360352
epoch: 86 training_loss 0.1415296777151525 test_loss: 0.13288451433181764
epoch: 87 training_loss 0.13121126141399145 test_loss: 0.13586621284484862
epoch: 88 training_loss 0.14337496638298033 test_loss: 0.14172875881195068
epoch: 89 training_loss 0.13231129072606562 test_loss: 0.1426466941833496
epoch: 90 training_loss 0.13329493436962367 test_loss: 0.13730417490005492
epoch: 91 training_loss 0.13753559557721018 test_loss: 0.13987756967544557
epoch: 92 training_loss 0.13281003829091786 test_loss: 0.12626363039016725
epoch: 93 training_loss 0.13599867943674326 test_loss: 0.12820591926574706
epoch: 94 training_loss 0.13432969411835075 test_loss: 0.12858394384384156
epoch: 95 training_loss 0.1302296520769596 test_loss: 0.13886853456497192
epoch: 96 training_loss 0.13255011033266784 test_loss: 0.1277194857597351
epoch: 97 training_loss 0.13838181793689727 test_loss: 0.12074942588806152
epoch: 98 training_loss 0.13203467465937138 test_loss: 0.1316640257835388
epoch: 99 training_loss 0.1347904608771205 test_loss: 0.12365562915802002
epoch: 100 training_loss 0.13788553822785615 test_loss: 0.13261998891830445
epoch: 101 training_loss 0.13602904826402665 test_loss: 0.11569938659667969
epoch: 102 training_loss 0.14745032820850612 test_loss: 0.13850687742233275
epoch: 103 training_loss 0.12403345249593258 test_loss: 0.12696728706359864
epoch: 104 training_loss 0.13183286156505347 test_loss: 0.1287722706794739
epoch: 105 training_loss 0.12438603848218918 test_loss: 0.11235905885696411
epoch: 106 training_loss 0.13924474216997623 test_loss: 0.13383102416992188
epoch: 107 training_loss 0.13666877280920744 test_loss: 0.14090508222579956
epoch: 108 training_loss 0.1334839065000415 test_loss: 0.12285023927688599
epoch: 109 training_loss 0.141496401168406 test_loss: 0.11712876558303834
epoch: 110 training_loss 0.13199853118509053 test_loss: 0.1392109751701355
epoch: 111 training_loss 0.13625170566141606 test_loss: 0.13436077833175658
epoch: 112 training_loss 0.12980919186025858 test_loss: 0.13301725387573243
epoch: 113 training_loss 0.1303544459864497 test_loss: 0.1377907633781433
epoch: 114 training_loss 0.126746914871037 test_loss: 0.11631239652633667
epoch: 115 training_loss 0.14073254484683276 test_loss: 0.12351295948028565
epoch: 116 training_loss 0.1234881617128849 test_loss: 0.11825038194656372
epoch: 117 training_loss 0.13103619672358036 test_loss: 0.14532481431961058
epoch: 118 training_loss 0.13276511516422032 test_loss: 0.13914670944213867
epoch: 119 training_loss 0.14015841400250792 test_loss: 0.11206727027893067
epoch: 120 training_loss 0.13767260912805795 test_loss: 0.12418211698532104
epoch: 121 training_loss 0.13987040799111128 test_loss: 0.12381502389907836
epoch: 122 training_loss 0.13700756508857012 test_loss: 0.13303231000900267
epoch: 123 training_loss 0.1363043288886547 test_loss: 0.12963727712631226
epoch: 124 training_loss 0.14144477885216475 test_loss: 0.11886419057846069
epoch: 125 training_loss 0.14287269573658704 test_loss: 0.11937495470046997
epoch: 126 training_loss 0.13527457624673844 test_loss: 0.15756385326385497
epoch: 127 training_loss 0.12498362328857184 test_loss: 0.13069101572036743
epoch: 128 training_loss 0.1398990624025464 test_loss: 0.12226098775863647
epoch: 129 training_loss 0.13435845583677292 test_loss: 0.12663607597351073
epoch: 130 training_loss 0.13349215604364872 test_loss: 0.14055390357971193
epoch: 131 training_loss 0.13671176597476006 test_loss: 0.13025782108306885
epoch: 132 training_loss 0.1255126165598631 test_loss: 0.11928673982620239
epoch: 133 training_loss 0.13748541742563247 test_loss: 0.12369109392166137
epoch: 134 training_loss 0.13567440859973431 test_loss: 0.13510531187057495
epoch: 135 training_loss 0.13159097854048013 test_loss: 0.11799601316452027
epoch: 136 training_loss 0.1334600268676877 test_loss: 0.12300885915756225
epoch: 137 training_loss 0.1266084473952651 test_loss: 0.1347716212272644
epoch: 138 training_loss 0.13851177582517266 test_loss: 0.11302471160888672
epoch: 139 training_loss 0.1436609158292413 test_loss: 0.13747222423553468
epoch: 140 training_loss 0.13447289887815714 test_loss: 0.1224820852279663
epoch: 141 training_loss 0.13749840911477804 test_loss: 0.11158963441848754
epoch: 142 training_loss 0.1254599930346012 test_loss: 0.13110123872756957
epoch: 143 training_loss 0.13096790928393603 test_loss: 0.12902168035507203
epoch: 144 training_loss 0.13147542428225278 test_loss: 0.11634974479675293
epoch: 145 training_loss 0.13274874333292247 test_loss: 0.13776562213897706
epoch: 146 training_loss 0.13016665756702422 test_loss: 0.13057053089141846
epoch: 147 training_loss 0.13105883285403253 test_loss: 0.1253311038017273
epoch: 148 training_loss 0.1348565238714218 test_loss: 0.15357356071472167
epoch: 149 training_loss 0.13332684587687255 test_loss: 0.1280912160873413
epoch: 0 training_loss 7.844382100105285 test_loss: 4.628225326538086
epoch: 1 training_loss 3.5572025275230406 test_loss: 2.757601737976074
epoch: 2 training_loss 2.3969656300544737 test_loss: 2.1084341049194335
epoch: 3 training_loss 1.8620472598075866 test_loss: 1.6857864379882812
epoch: 4 training_loss 1.5931945466995239 test_loss: 1.5197269439697265
epoch: 5 training_loss 1.4082333099842073 test_loss: 1.3386643409729004
epoch: 6 training_loss 1.2678775155544282 test_loss: 1.2343730926513672
epoch: 7 training_loss 1.162527050971985 test_loss: 1.1026155471801757
epoch: 8 training_loss 1.0846062058210373 test_loss: 1.0466100692749023
epoch: 9 training_loss 1.0343749010562897 test_loss: 1.0560256958007812
epoch: 10 training_loss 0.9822172266244888 test_loss: 0.9556317329406738
epoch: 11 training_loss 0.9499659460783004 test_loss: 0.9118962287902832
epoch: 12 training_loss 0.8930881041288375 test_loss: 0.9007400512695313
epoch: 13 training_loss 0.8716529077291488 test_loss: 0.9139270782470703
epoch: 14 training_loss 0.8601817274093628 test_loss: 0.8750204086303711
epoch: 15 training_loss 0.840848942399025 test_loss: 0.8244967460632324
epoch: 16 training_loss 0.8304482179880143 test_loss: 0.8063454627990723
epoch: 17 training_loss 0.7986410105228424 test_loss: 0.7629438877105713
epoch: 18 training_loss 0.7912423872947693 test_loss: 0.807579231262207
epoch: 19 training_loss 0.773783546090126 test_loss: 0.8002183914184571
epoch: 20 training_loss 0.7565283292531967 test_loss: 0.7931861400604248
epoch: 21 training_loss 0.7636748152971268 test_loss: 0.7662501811981202
epoch: 22 training_loss 0.7392971128225326 test_loss: 0.7177373886108398
epoch: 23 training_loss 0.735765432715416 test_loss: 0.708620262145996
epoch: 24 training_loss 0.721427663564682 test_loss: 0.7047188758850098
epoch: 25 training_loss 0.7226033437252045 test_loss: 0.6988662719726563
epoch: 26 training_loss 0.7059769695997238 test_loss: 0.7071805953979492
epoch: 27 training_loss 0.697882519364357 test_loss: 0.7398967266082763
epoch: 28 training_loss 0.6876389974355698 test_loss: 0.7021195411682128
epoch: 29 training_loss 0.6823533576726913 test_loss: 0.7218827724456787
epoch: 30 training_loss 0.6672505474090576 test_loss: 0.6848509311676025
epoch: 31 training_loss 0.6593042200803757 test_loss: 0.6850934028625488
epoch: 32 training_loss 0.6831968402862549 test_loss: 0.6606688022613525
epoch: 33 training_loss 0.6565992176532746 test_loss: 0.6774441242218018
epoch: 34 training_loss 0.6408954387903214 test_loss: 0.6351155757904052
epoch: 35 training_loss 0.6511003315448761 test_loss: 0.6353918552398682
epoch: 36 training_loss 0.6430648028850555 test_loss: 0.6581925392150879
epoch: 37 training_loss 0.654278101325035 test_loss: 0.6490397453308105
epoch: 38 training_loss 0.636538850069046 test_loss: 0.6286512851715088
epoch: 39 training_loss 0.6164770513772965 test_loss: 0.6240877151489258
epoch: 40 training_loss 0.6168040651082992 test_loss: 0.637291669845581
epoch: 41 training_loss 0.6203917121887207 test_loss: 0.6220560073852539
epoch: 42 training_loss 0.6253385102748871 test_loss: 0.6426753997802734
epoch: 43 training_loss 0.6103547710180283 test_loss: 0.5926394462585449
epoch: 44 training_loss 0.6146355766057968 test_loss: 0.6083197593688965
epoch: 45 training_loss 0.5901789170503616 test_loss: 0.5988701820373535
epoch: 46 training_loss 0.5997677671909333 test_loss: 0.590087604522705
epoch: 47 training_loss 0.5971452426910401 test_loss: 0.5770633220672607
epoch: 48 training_loss 0.5950012719631195 test_loss: 0.5978903770446777
epoch: 49 training_loss 0.599594755768776 test_loss: 0.5838729858398437
epoch: 50 training_loss 0.5870027774572373 test_loss: 0.5985281467437744
epoch: 51 training_loss 0.5905859577655792 test_loss: 0.5731601715087891
epoch: 52 training_loss 0.5767827486991882 test_loss: 0.5621806621551514
epoch: 53 training_loss 0.5723681253194809 test_loss: 0.587520170211792
epoch: 54 training_loss 0.5779257822036743 test_loss: 0.563958215713501
epoch: 55 training_loss 0.5701149752736092 test_loss: 0.5809339046478271
epoch: 56 training_loss 0.5794004261493683 test_loss: 0.5664607048034668
epoch: 57 training_loss 0.5747914904356003 test_loss: 0.5663835525512695
epoch: 58 training_loss 0.5734783837199211 test_loss: 0.571378517150879
epoch: 59 training_loss 0.5640676498413086 test_loss: 0.5845517158508301
epoch: 60 training_loss 0.5527068957686424 test_loss: 0.5746453762054443
epoch: 61 training_loss 0.5624126517772674 test_loss: 0.5553578853607177
epoch: 62 training_loss 0.5605699065327644 test_loss: 0.5579102516174317
epoch: 63 training_loss 0.5513850343227387 test_loss: 0.5839325428009033
epoch: 64 training_loss 0.5510065090656281 test_loss: 0.5245141506195068
epoch: 65 training_loss 0.5595367908477783 test_loss: 0.5495659351348877
epoch: 66 training_loss 0.5450753715634346 test_loss: 0.5572718620300293
epoch: 67 training_loss 0.5410396042466163 test_loss: 0.5483548164367675
epoch: 68 training_loss 0.5436096420884132 test_loss: 0.5364394187927246
epoch: 69 training_loss 0.5500803244113922 test_loss: 0.5786067008972168
epoch: 70 training_loss 0.5474732020497322 test_loss: 0.5222235679626465
epoch: 71 training_loss 0.5335641229152679 test_loss: 0.53437819480896
epoch: 72 training_loss 0.5377448523044586 test_loss: 0.5411707401275635
epoch: 73 training_loss 0.5346211135387421 test_loss: 0.524717903137207
epoch: 74 training_loss 0.5422776144742966 test_loss: 0.5281261444091797
epoch: 75 training_loss 0.5342859789729119 test_loss: 0.5263063430786132
epoch: 76 training_loss 0.5293869483470917 test_loss: 0.5382593631744385
epoch: 77 training_loss 0.5332734385132789 test_loss: 0.552882194519043
epoch: 78 training_loss 0.5301740476489067 test_loss: 0.5247074604034424
epoch: 79 training_loss 0.5268595802783966 test_loss: 0.5579675674438477
epoch: 80 training_loss 0.5221364831924439 test_loss: 0.5496438026428223
epoch: 81 training_loss 0.5250846540927887 test_loss: 0.529032850265503
epoch: 82 training_loss 0.510464744567871 test_loss: 0.5219570159912109
epoch: 83 training_loss 0.5274742501974106 test_loss: 0.509282922744751
epoch: 84 training_loss 0.5179908889532089 test_loss: 0.5195799827575683
epoch: 85 training_loss 0.5241262087225914 test_loss: 0.5000783920288085
epoch: 86 training_loss 0.513841335773468 test_loss: 0.5043636322021484
epoch: 87 training_loss 0.5141452327370644 test_loss: 0.5091956615447998
epoch: 88 training_loss 0.5096151593327523 test_loss: 0.5098010063171386
epoch: 89 training_loss 0.5094573816657066 test_loss: 0.5058176040649414
epoch: 90 training_loss 0.5096975687146187 test_loss: 0.5250714778900146
epoch: 91 training_loss 0.5120285594463349 test_loss: 0.5244577884674072
epoch: 92 training_loss 0.5184097445011139 test_loss: 0.5064816951751709
epoch: 93 training_loss 0.508415952026844 test_loss: 0.500722074508667
epoch: 94 training_loss 0.5112376695871353 test_loss: 0.5091810703277588
epoch: 95 training_loss 0.5055624955892563 test_loss: 0.49845046997070314
epoch: 96 training_loss 0.5018227779865265 test_loss: 0.5072877407073975
epoch: 97 training_loss 0.5007940709590912 test_loss: 0.4933222770690918
epoch: 98 training_loss 0.5039364421367645 test_loss: 0.5186635017395019
epoch: 99 training_loss 0.5005703777074814 test_loss: 0.5189088344573974
epoch: 100 training_loss 0.49789789378643035 test_loss: 0.4954326629638672
epoch: 101 training_loss 0.4971965834498405 test_loss: 0.4993425369262695
epoch: 102 training_loss 0.5028260532021522 test_loss: 0.48836750984191896
epoch: 103 training_loss 0.4868242600560188 test_loss: 0.503305196762085
epoch: 104 training_loss 0.4970613420009613 test_loss: 0.5041076183319092
epoch: 105 training_loss 0.5010701137781143 test_loss: 0.4751698970794678
epoch: 106 training_loss 0.4995991691946983 test_loss: 0.4815969467163086
epoch: 107 training_loss 0.5008314779400825 test_loss: 0.48255181312561035
epoch: 108 training_loss 0.5007623106241226 test_loss: 0.4902585506439209
epoch: 109 training_loss 0.49263117492198943 test_loss: 0.4971616744995117
epoch: 110 training_loss 0.48928540527820585 test_loss: 0.491947603225708
epoch: 111 training_loss 0.48944827914237976 test_loss: 0.47724037170410155
epoch: 112 training_loss 0.49049592316150664 test_loss: 0.4925105094909668
epoch: 113 training_loss 0.4967122262716293 test_loss: 0.4827791690826416
epoch: 114 training_loss 0.49387515962123874 test_loss: 0.5266970634460449
epoch: 115 training_loss 0.4972807303071022 test_loss: 0.5313124656677246
epoch: 116 training_loss 0.48000872522592547 test_loss: 0.4894546031951904
epoch: 117 training_loss 0.4876387044787407 test_loss: 0.5118476390838623
epoch: 118 training_loss 0.4761079150438309 test_loss: 0.497403621673584
epoch: 119 training_loss 0.48369173318147657 test_loss: 0.47011828422546387
epoch: 120 training_loss 0.4810093042254448 test_loss: 0.5021817207336425
epoch: 121 training_loss 0.4743814641237259 test_loss: 0.47041640281677244
epoch: 122 training_loss 0.4759728381037712 test_loss: 0.49585771560668945
epoch: 123 training_loss 0.4883707350492477 test_loss: 0.4954821586608887
epoch: 124 training_loss 0.4803612494468689 test_loss: 0.4856404781341553
epoch: 125 training_loss 0.4876264962553978 test_loss: 0.48808135986328127
epoch: 126 training_loss 0.48018872648477556 test_loss: 0.4761632442474365
epoch: 127 training_loss 0.4778755694627762 test_loss: 0.46890764236450194
epoch: 128 training_loss 0.48517227709293365 test_loss: 0.4794162273406982
epoch: 129 training_loss 0.4808866617083549 test_loss: 0.48171119689941405
epoch: 130 training_loss 0.4829309093952179 test_loss: 0.47654109001159667
epoch: 131 training_loss 0.4809602412581444 test_loss: 0.46767435073852537
epoch: 132 training_loss 0.477258480489254 test_loss: 0.4768501281738281
epoch: 133 training_loss 0.469772469997406 test_loss: 0.4708676815032959
epoch: 134 training_loss 0.4752160033583641 test_loss: 0.4667762279510498
epoch: 135 training_loss 0.48170222878456115 test_loss: 0.467549991607666
epoch: 136 training_loss 0.4765989625453949 test_loss: 0.4720810890197754
epoch: 137 training_loss 0.47625856459140775 test_loss: 0.47783627510070803
epoch: 138 training_loss 0.47004564553499223 test_loss: 0.4627424716949463
epoch: 139 training_loss 0.46980502545833586 test_loss: 0.4636882781982422
epoch: 140 training_loss 0.4744585508108139 test_loss: 0.4704772472381592
epoch: 141 training_loss 0.46935803204774856 test_loss: 0.46713624000549314
epoch: 142 training_loss 0.47481044501066205 test_loss: 0.46656270027160646
epoch: 143 training_loss 0.4718739062547684 test_loss: 0.47516827583312987
epoch: 144 training_loss 0.4725082418322563 test_loss: 0.46403279304504397
epoch: 145 training_loss 0.45829916805028914 test_loss: 0.4516876220703125
epoch: 146 training_loss 0.46864200353622437 test_loss: 0.480256462097168
epoch: 147 training_loss 0.46553292870521545 test_loss: 0.46903533935546876
epoch: 148 training_loss 0.4705353885889053 test_loss: 0.4728808879852295
epoch: 149 training_loss 0.47253866523504257 test_loss: 0.4880019187927246
3155.733836756862
episode: 0 training return: tensor(-117.6730, device='cuda:0')
episode: 1 training return: tensor(-300.6508, device='cuda:0')
episode: 2 training return: tensor(-109.7813, device='cuda:0')
episode: 3 training return: tensor(-104.2061, device='cuda:0')
epoch: 1 test_true_pfm: 3151.289406903104 sim_pfm: -126.7380017179433
episode: 4 training return: tensor(-114.4657, device='cuda:0')
episode: 5 training return: tensor(-107.2702, device='cuda:0')
episode: 6 training return: tensor(-126.0268, device='cuda:0')
episode: 7 training return: tensor(-105.3223, device='cuda:0')
epoch: 2 test_true_pfm: 2633.912110350726 sim_pfm: -98.34324240652495
episode: 8 training return: tensor(-128.2096, device='cuda:0')
episode: 9 training return: tensor(-114.7514, device='cuda:0')
episode: 10 training return: tensor(-92.2799, device='cuda:0')
episode: 11 training return: tensor(-133.8043, device='cuda:0')
epoch: 3 test_true_pfm: 2997.3979654930404 sim_pfm: -95.93913347931812
episode: 12 training return: tensor(-104.9377, device='cuda:0')
episode: 13 training return: tensor(-104.4216, device='cuda:0')
episode: 14 training return: tensor(-133.2224, device='cuda:0')
episode: 15 training return: tensor(-101.8817, device='cuda:0')
epoch: 4 test_true_pfm: 2989.7952973291044 sim_pfm: -98.99466327746632
episode: 16 training return: tensor(-99.5089, device='cuda:0')
episode: 17 training return: tensor(-639.7964, device='cuda:0')
episode: 18 training return: tensor(-97.8931, device='cuda:0')
episode: 19 training return: tensor(-102.6757, device='cuda:0')
epoch: 5 test_true_pfm: 3182.46033487827 sim_pfm: -76.6935382274193
episode: 20 training return: tensor(-485.6580, device='cuda:0')
episode: 21 training return: tensor(-109.7440, device='cuda:0')
episode: 22 training return: tensor(-695.5698, device='cuda:0')
episode: 23 training return: tensor(-95.0217, device='cuda:0')
epoch: 6 test_true_pfm: 3151.0008290837036 sim_pfm: -86.90255108825902
episode: 24 training return: tensor(-90.4479, device='cuda:0')
episode: 25 training return: tensor(-92.8847, device='cuda:0')
episode: 26 training return: tensor(-92.4468, device='cuda:0')
episode: 27 training return: tensor(-103.3203, device='cuda:0')
epoch: 7 test_true_pfm: 3203.2177940885463 sim_pfm: -93.06642481511032
episode: 28 training return: tensor(-227.0394, device='cuda:0')
episode: 29 training return: tensor(-119.3347, device='cuda:0')
episode: 30 training return: tensor(-95.4293, device='cuda:0')
episode: 31 training return: tensor(-63.0182, device='cuda:0')
epoch: 8 test_true_pfm: 3203.7605831818764 sim_pfm: -48.67762747785309
episode: 32 training return: tensor(-384.8893, device='cuda:0')
episode: 33 training return: tensor(-112.7584, device='cuda:0')
episode: 34 training return: tensor(-376.8384, device='cuda:0')
episode: 35 training return: tensor(-72.6749, device='cuda:0')
epoch: 9 test_true_pfm: 2731.054089252843 sim_pfm: -147.0120609178072
episode: 36 training return: tensor(-106.8209, device='cuda:0')
episode: 37 training return: tensor(-89.5699, device='cuda:0')
episode: 38 training return: tensor(-106.7242, device='cuda:0')
episode: 39 training return: tensor(-91.6349, device='cuda:0')
epoch: 10 test_true_pfm: 2655.8441234678426 sim_pfm: -82.61148121516453
episode: 40 training return: tensor(-338.3615, device='cuda:0')
episode: 41 training return: tensor(-85.5578, device='cuda:0')
episode: 42 training return: tensor(-95.2459, device='cuda:0')
episode: 43 training return: tensor(-105.2013, device='cuda:0')
epoch: 11 test_true_pfm: 2645.259286044506 sim_pfm: -336.57908989219385
episode: 44 training return: tensor(-107.5048, device='cuda:0')
episode: 45 training return: tensor(-85.5534, device='cuda:0')
episode: 46 training return: tensor(-60.9069, device='cuda:0')
episode: 47 training return: tensor(-72.8768, device='cuda:0')
epoch: 12 test_true_pfm: 3059.956589906777 sim_pfm: -91.05767626435652
episode: 48 training return: tensor(-535.7590, device='cuda:0')
episode: 49 training return: tensor(-100.4214, device='cuda:0')
episode: 50 training return: tensor(-77.2443, device='cuda:0')
episode: 51 training return: tensor(-106.8215, device='cuda:0')
epoch: 13 test_true_pfm: 3034.177252355055 sim_pfm: -84.83714605452649
episode: 52 training return: tensor(-108.4846, device='cuda:0')
episode: 53 training return: tensor(-104.4954, device='cuda:0')
episode: 54 training return: tensor(-92.1097, device='cuda:0')
episode: 55 training return: tensor(-84.2048, device='cuda:0')
epoch: 14 test_true_pfm: 3175.5826298619318 sim_pfm: -151.74430763197597
episode: 56 training return: tensor(-96.7444, device='cuda:0')
episode: 57 training return: tensor(-90.2722, device='cuda:0')
episode: 58 training return: tensor(-87.7056, device='cuda:0')
episode: 59 training return: tensor(-65.8671, device='cuda:0')
epoch: 15 test_true_pfm: 2837.031462338833 sim_pfm: -139.45795444163377
episode: 60 training return: tensor(-110.1685, device='cuda:0')
episode: 61 training return: tensor(-100.4484, device='cuda:0')
episode: 62 training return: tensor(-102.4145, device='cuda:0')
episode: 63 training return: tensor(-113.2904, device='cuda:0')
epoch: 16 test_true_pfm: 2802.3449844523116 sim_pfm: -92.64664616662776
episode: 64 training return: tensor(-69.3425, device='cuda:0')
episode: 65 training return: tensor(-67.6266, device='cuda:0')
episode: 66 training return: tensor(-90.4303, device='cuda:0')
episode: 67 training return: tensor(-64.1914, device='cuda:0')
epoch: 17 test_true_pfm: 3190.7677552900736 sim_pfm: -107.54723245752393
episode: 68 training return: tensor(-672.3367, device='cuda:0')
episode: 69 training return: tensor(-112.2715, device='cuda:0')
episode: 70 training return: tensor(-144.4698, device='cuda:0')
episode: 71 training return: tensor(-85.0472, device='cuda:0')
epoch: 18 test_true_pfm: 3183.3742914392915 sim_pfm: -180.8199103839385
episode: 72 training return: tensor(-540.8897, device='cuda:0')
episode: 73 training return: tensor(-12.0662, device='cuda:0')
episode: 74 training return: tensor(-58.4669, device='cuda:0')
episode: 75 training return: tensor(-94.2782, device='cuda:0')
epoch: 19 test_true_pfm: 3099.54074541121 sim_pfm: -84.17436978781673
episode: 76 training return: tensor(-63.7266, device='cuda:0')
episode: 77 training return: tensor(-94.5796, device='cuda:0')
episode: 78 training return: tensor(-87.9953, device='cuda:0')
episode: 79 training return: tensor(-62.8134, device='cuda:0')
epoch: 20 test_true_pfm: 3189.8689600076546 sim_pfm: -195.0014336152041
episode: 80 training return: tensor(-85.0817, device='cuda:0')
episode: 81 training return: tensor(-89.9771, device='cuda:0')
episode: 82 training return: tensor(-127.5521, device='cuda:0')
episode: 83 training return: tensor(-124.3030, device='cuda:0')
epoch: 21 test_true_pfm: 2867.28747659707 sim_pfm: -81.02239209222414
episode: 84 training return: tensor(-110.2198, device='cuda:0')
episode: 85 training return: tensor(-88.9167, device='cuda:0')
episode: 86 training return: tensor(-100.4957, device='cuda:0')
episode: 87 training return: tensor(-381.1804, device='cuda:0')
epoch: 22 test_true_pfm: 3176.836140357845 sim_pfm: -103.77968608677232
episode: 88 training return: tensor(-100.5091, device='cuda:0')
episode: 89 training return: tensor(-684.1563, device='cuda:0')
episode: 90 training return: tensor(-56.9188, device='cuda:0')
episode: 91 training return: tensor(-76.2037, device='cuda:0')
epoch: 23 test_true_pfm: 2901.9524416980817 sim_pfm: -78.18550282275343
episode: 92 training return: tensor(-88.7163, device='cuda:0')
episode: 93 training return: tensor(-97.5182, device='cuda:0')
episode: 94 training return: tensor(-95.6013, device='cuda:0')
episode: 95 training return: tensor(-102.5873, device='cuda:0')
epoch: 24 test_true_pfm: 3185.267815000714 sim_pfm: -100.2955980450303
episode: 96 training return: tensor(-324.9709, device='cuda:0')
episode: 97 training return: tensor(-78.5096, device='cuda:0')
episode: 98 training return: tensor(-88.8468, device='cuda:0')
episode: 99 training return: tensor(-105.8161, device='cuda:0')
epoch: 25 test_true_pfm: 3161.3403273452864 sim_pfm: -82.3747940552227
episode: 100 training return: tensor(-101.7382, device='cuda:0')
episode: 101 training return: tensor(-72.0354, device='cuda:0')
episode: 102 training return: tensor(-93.0137, device='cuda:0')
episode: 103 training return: tensor(-64.6089, device='cuda:0')
epoch: 26 test_true_pfm: 3174.1745731166297 sim_pfm: -161.81789354262096
episode: 104 training return: tensor(-95.5407, device='cuda:0')
episode: 105 training return: tensor(-90.5856, device='cuda:0')
episode: 106 training return: tensor(-114.3117, device='cuda:0')
episode: 107 training return: tensor(-70.5004, device='cuda:0')
epoch: 27 test_true_pfm: 2925.1955409225793 sim_pfm: -90.6179722751646
episode: 108 training return: tensor(-82.3862, device='cuda:0')
episode: 109 training return: tensor(-92.2823, device='cuda:0')
episode: 110 training return: tensor(-677.8364, device='cuda:0')
episode: 111 training return: tensor(-102.8463, device='cuda:0')
epoch: 28 test_true_pfm: 3208.159699918225 sim_pfm: -72.4316441672078
episode: 112 training return: tensor(-524.7792, device='cuda:0')
episode: 113 training return: tensor(-69.3612, device='cuda:0')
episode: 114 training return: tensor(-92.3612, device='cuda:0')
episode: 115 training return: tensor(-55.6785, device='cuda:0')
epoch: 29 test_true_pfm: 3197.2444367067687 sim_pfm: -94.58022676771118
episode: 116 training return: tensor(-210.6838, device='cuda:0')
episode: 117 training return: tensor(-92.2125, device='cuda:0')
episode: 118 training return: tensor(-96.6440, device='cuda:0')
episode: 119 training return: tensor(-121.9077, device='cuda:0')
epoch: 30 test_true_pfm: 2848.1758810901083 sim_pfm: -239.1174089622024
episode: 120 training return: tensor(-70.1610, device='cuda:0')
episode: 121 training return: tensor(-107.1903, device='cuda:0')
episode: 122 training return: tensor(-77.4886, device='cuda:0')
episode: 123 training return: tensor(-78.7754, device='cuda:0')
epoch: 31 test_true_pfm: 3198.860819269448 sim_pfm: -105.91806648147758
episode: 124 training return: tensor(-58.7142, device='cuda:0')
episode: 125 training return: tensor(-101.0329, device='cuda:0')
episode: 126 training return: tensor(-80.6874, device='cuda:0')
episode: 127 training return: tensor(-99.6669, device='cuda:0')
epoch: 32 test_true_pfm: 3174.559261680368 sim_pfm: -92.26558611814592
episode: 128 training return: tensor(-85.9784, device='cuda:0')
episode: 129 training return: tensor(-127.2798, device='cuda:0')
episode: 130 training return: tensor(-60.9960, device='cuda:0')
episode: 131 training return: tensor(-78.3009, device='cuda:0')
epoch: 33 test_true_pfm: 3184.2547189231022 sim_pfm: -236.31198637613366
episode: 132 training return: tensor(-85.4473, device='cuda:0')
episode: 133 training return: tensor(-110.4948, device='cuda:0')
episode: 134 training return: tensor(-65.1371, device='cuda:0')
episode: 135 training return: tensor(-82.7781, device='cuda:0')
epoch: 34 test_true_pfm: 3211.0299699812967 sim_pfm: -67.00769429989548
episode: 136 training return: tensor(-93.7701, device='cuda:0')
episode: 137 training return: tensor(-85.8206, device='cuda:0')
episode: 138 training return: tensor(-86.9651, device='cuda:0')
episode: 139 training return: tensor(-87.3141, device='cuda:0')
epoch: 35 test_true_pfm: 3204.2706288444165 sim_pfm: -104.02956773330031
episode: 140 training return: tensor(-67.9382, device='cuda:0')
episode: 141 training return: tensor(-98.6310, device='cuda:0')
episode: 142 training return: tensor(-98.6548, device='cuda:0')
episode: 143 training return: tensor(-89.0939, device='cuda:0')
epoch: 36 test_true_pfm: 3200.5620789053487 sim_pfm: -56.56828364558169
episode: 144 training return: tensor(-281.1745, device='cuda:0')
episode: 145 training return: tensor(-107.5566, device='cuda:0')
episode: 146 training return: tensor(-92.0547, device='cuda:0')
episode: 147 training return: tensor(-87.3749, device='cuda:0')
epoch: 37 test_true_pfm: 3181.4748039883157 sim_pfm: -93.23642923177492
episode: 148 training return: tensor(-103.2659, device='cuda:0')
episode: 149 training return: tensor(-42.5842, device='cuda:0')
episode: 150 training return: tensor(-62.3122, device='cuda:0')
episode: 151 training return: tensor(-72.7630, device='cuda:0')
epoch: 38 test_true_pfm: 3215.5984577826493 sim_pfm: -81.49602804331032
episode: 152 training return: tensor(-94.9243, device='cuda:0')
episode: 153 training return: tensor(-105.7993, device='cuda:0')
episode: 154 training return: tensor(-85.9625, device='cuda:0')
episode: 155 training return: tensor(-67.6276, device='cuda:0')
epoch: 39 test_true_pfm: 3052.6768604502245 sim_pfm: -172.28137716810065
episode: 156 training return: tensor(-120.5008, device='cuda:0')
episode: 157 training return: tensor(-80.0387, device='cuda:0')
episode: 158 training return: tensor(-86.8171, device='cuda:0')
episode: 159 training return: tensor(-99.8432, device='cuda:0')
epoch: 40 test_true_pfm: 3210.555646405957 sim_pfm: -71.66882615094073
episode: 160 training return: tensor(-92.1908, device='cuda:0')
episode: 161 training return: tensor(-76.4617, device='cuda:0')
episode: 162 training return: tensor(-89.6737, device='cuda:0')
episode: 163 training return: tensor(-611.8901, device='cuda:0')
epoch: 41 test_true_pfm: 3197.9607831385515 sim_pfm: -85.60092792466942
episode: 164 training return: tensor(-110.9748, device='cuda:0')
episode: 165 training return: tensor(-54.5141, device='cuda:0')
episode: 166 training return: tensor(-94.8764, device='cuda:0')
episode: 167 training return: tensor(-107.1096, device='cuda:0')
epoch: 42 test_true_pfm: 3193.737487104839 sim_pfm: -82.84588734726033
episode: 168 training return: tensor(-100.1903, device='cuda:0')
episode: 169 training return: tensor(-83.2108, device='cuda:0')
episode: 170 training return: tensor(50.3327, device='cuda:0')
episode: 171 training return: tensor(-80.2663, device='cuda:0')
epoch: 43 test_true_pfm: 3198.0257856640164 sim_pfm: -84.56580215141487
episode: 172 training return: tensor(-69.6736, device='cuda:0')
episode: 173 training return: tensor(-80.5482, device='cuda:0')
episode: 174 training return: tensor(-87.6067, device='cuda:0')
episode: 175 training return: tensor(-57.5417, device='cuda:0')
epoch: 44 test_true_pfm: 3180.816198109636 sim_pfm: -95.25289962016784
episode: 176 training return: tensor(-113.2479, device='cuda:0')
episode: 177 training return: tensor(-56.0081, device='cuda:0')
episode: 178 training return: tensor(-72.4846, device='cuda:0')
episode: 179 training return: tensor(-61.0223, device='cuda:0')
epoch: 45 test_true_pfm: 3200.946721919254 sim_pfm: -70.74540148765664
episode: 180 training return: tensor(-94.8866, device='cuda:0')
episode: 181 training return: tensor(-86.7187, device='cuda:0')
episode: 182 training return: tensor(-73.7296, device='cuda:0')
episode: 183 training return: tensor(-91.9378, device='cuda:0')
epoch: 46 test_true_pfm: 3221.6472149661054 sim_pfm: -62.40019322534014
episode: 184 training return: tensor(-68.5704, device='cuda:0')
episode: 185 training return: tensor(-61.3581, device='cuda:0')
episode: 186 training return: tensor(-94.4269, device='cuda:0')
episode: 187 training return: tensor(-71.9787, device='cuda:0')
epoch: 47 test_true_pfm: 3200.709081345621 sim_pfm: -54.792938355075115
episode: 188 training return: tensor(-84.4898, device='cuda:0')
episode: 189 training return: tensor(-609.6952, device='cuda:0')
episode: 190 training return: tensor(-57.8639, device='cuda:0')
episode: 191 training return: tensor(-76.0573, device='cuda:0')
epoch: 48 test_true_pfm: 3167.0479140293005 sim_pfm: -99.58716879874312
episode: 192 training return: tensor(-76.3160, device='cuda:0')
episode: 193 training return: tensor(-89.4216, device='cuda:0')
episode: 194 training return: tensor(-90.9652, device='cuda:0')
episode: 195 training return: tensor(-96.6199, device='cuda:0')
epoch: 49 test_true_pfm: 3197.1344256106954 sim_pfm: -74.45739610586315
episode: 196 training return: tensor(-92.9162, device='cuda:0')
episode: 197 training return: tensor(-84.3605, device='cuda:0')
episode: 198 training return: tensor(-63.3250, device='cuda:0')
episode: 199 training return: tensor(-67.1092, device='cuda:0')
epoch: 50 test_true_pfm: 3202.4175145479435 sim_pfm: -60.566616699642815
episode: 200 training return: tensor(-95.5447, device='cuda:0')
episode: 201 training return: tensor(-73.3974, device='cuda:0')
episode: 202 training return: tensor(-104.4482, device='cuda:0')
episode: 203 training return: tensor(-109.6826, device='cuda:0')
epoch: 51 test_true_pfm: 3201.2609322062567 sim_pfm: -89.49584454211679
episode: 204 training return: tensor(-69.0102, device='cuda:0')
episode: 205 training return: tensor(-60.2858, device='cuda:0')
episode: 206 training return: tensor(-105.5091, device='cuda:0')
episode: 207 training return: tensor(-84.3283, device='cuda:0')
epoch: 52 test_true_pfm: 3180.7261176088773 sim_pfm: -99.90044428079273
episode: 208 training return: tensor(-85.3529, device='cuda:0')
episode: 209 training return: tensor(-110.3118, device='cuda:0')
episode: 210 training return: tensor(-77.0519, device='cuda:0')
episode: 211 training return: tensor(-45.3248, device='cuda:0')
epoch: 53 test_true_pfm: 3179.619329341284 sim_pfm: -101.04086359825062
episode: 212 training return: tensor(-93.6644, device='cuda:0')
episode: 213 training return: tensor(-74.2290, device='cuda:0')
episode: 214 training return: tensor(-69.0120, device='cuda:0')
episode: 215 training return: tensor(-70.5567, device='cuda:0')
epoch: 54 test_true_pfm: 3204.4834209786063 sim_pfm: -91.74700482596138
episode: 216 training return: tensor(-69.0329, device='cuda:0')
episode: 217 training return: tensor(-96.5183, device='cuda:0')
episode: 218 training return: tensor(-99.9121, device='cuda:0')
episode: 219 training return: tensor(-94.5438, device='cuda:0')
epoch: 55 test_true_pfm: 3184.4423969764807 sim_pfm: -75.39780572489447
episode: 220 training return: tensor(-77.7085, device='cuda:0')
episode: 221 training return: tensor(-77.9525, device='cuda:0')
episode: 222 training return: tensor(-73.2011, device='cuda:0')
episode: 223 training return: tensor(-105.7692, device='cuda:0')
epoch: 56 test_true_pfm: 3188.756786752539 sim_pfm: -83.36468519235495
episode: 224 training return: tensor(-64.2243, device='cuda:0')
episode: 225 training return: tensor(-77.7994, device='cuda:0')
episode: 226 training return: tensor(-79.5629, device='cuda:0')
episode: 227 training return: tensor(-85.1840, device='cuda:0')
epoch: 57 test_true_pfm: 3203.771887780919 sim_pfm: -69.84583350917092
episode: 228 training return: tensor(-56.4571, device='cuda:0')
episode: 229 training return: tensor(-91.8463, device='cuda:0')
episode: 230 training return: tensor(-355.0714, device='cuda:0')
episode: 231 training return: tensor(-81.8933, device='cuda:0')
epoch: 58 test_true_pfm: 3208.0767694283495 sim_pfm: -65.00544867755768
episode: 232 training return: tensor(-61.3397, device='cuda:0')
episode: 233 training return: tensor(-79.9835, device='cuda:0')
episode: 234 training return: tensor(-72.7780, device='cuda:0')
episode: 235 training return: tensor(-33.8793, device='cuda:0')
epoch: 59 test_true_pfm: 3213.0531160542578 sim_pfm: -61.93285713455407
episode: 236 training return: tensor(-86.6234, device='cuda:0')
episode: 237 training return: tensor(-92.3941, device='cuda:0')
episode: 238 training return: tensor(-69.2579, device='cuda:0')
episode: 239 training return: tensor(-97.4215, device='cuda:0')
epoch: 60 test_true_pfm: 3186.1861351116045 sim_pfm: -71.20210234697636
episode: 240 training return: tensor(-73.1546, device='cuda:0')
episode: 241 training return: tensor(-39.9483, device='cuda:0')
episode: 242 training return: tensor(-114.4134, device='cuda:0')
episode: 243 training return: tensor(-68.8444, device='cuda:0')
epoch: 61 test_true_pfm: 3207.598085940261 sim_pfm: -73.35582661134929
episode: 244 training return: tensor(-90.8410, device='cuda:0')
episode: 245 training return: tensor(-93.9665, device='cuda:0')
episode: 246 training return: tensor(-111.7785, device='cuda:0')
episode: 247 training return: tensor(-43.2035, device='cuda:0')
epoch: 62 test_true_pfm: 3192.549820101883 sim_pfm: -64.54060050220384
episode: 248 training return: tensor(-36.7025, device='cuda:0')
episode: 249 training return: tensor(-53.5150, device='cuda:0')
episode: 250 training return: tensor(-104.5996, device='cuda:0')
episode: 251 training return: tensor(-76.7049, device='cuda:0')
epoch: 63 test_true_pfm: 3186.1768015109024 sim_pfm: -89.64293029593925
episode: 252 training return: tensor(-55.6444, device='cuda:0')
episode: 253 training return: tensor(-79.9701, device='cuda:0')
episode: 254 training return: tensor(-65.6104, device='cuda:0')
episode: 255 training return: tensor(-59.4642, device='cuda:0')
epoch: 64 test_true_pfm: 3192.3480502256266 sim_pfm: -83.65779990696076
episode: 256 training return: tensor(-69.0613, device='cuda:0')
episode: 257 training return: tensor(-71.0064, device='cuda:0')
episode: 258 training return: tensor(-606.1176, device='cuda:0')
episode: 259 training return: tensor(-98.0540, device='cuda:0')
epoch: 65 test_true_pfm: 3209.316963461555 sim_pfm: -78.65162149649889
episode: 260 training return: tensor(-108.2704, device='cuda:0')
episode: 261 training return: tensor(-52.4264, device='cuda:0')
episode: 262 training return: tensor(-75.9351, device='cuda:0')
episode: 263 training return: tensor(-53.2041, device='cuda:0')
epoch: 66 test_true_pfm: 3207.8627218181414 sim_pfm: -79.27364879109275
episode: 264 training return: tensor(-76.5093, device='cuda:0')
episode: 265 training return: tensor(-71.6842, device='cuda:0')
episode: 266 training return: tensor(-71.7014, device='cuda:0')
episode: 267 training return: tensor(-58.6943, device='cuda:0')
epoch: 67 test_true_pfm: 3192.321219571055 sim_pfm: -74.88057441625278
episode: 268 training return: tensor(-106.8606, device='cuda:0')
episode: 269 training return: tensor(-76.9336, device='cuda:0')
episode: 270 training return: tensor(-89.7516, device='cuda:0')
episode: 271 training return: tensor(-106.1570, device='cuda:0')
epoch: 68 test_true_pfm: 3205.520954391859 sim_pfm: -76.10929138959425
episode: 272 training return: tensor(-76.9407, device='cuda:0')
episode: 273 training return: tensor(-90.9119, device='cuda:0')
episode: 274 training return: tensor(-66.2479, device='cuda:0')
episode: 275 training return: tensor(-98.1636, device='cuda:0')
epoch: 69 test_true_pfm: 3182.858188875143 sim_pfm: -85.16340456817609
episode: 276 training return: tensor(-95.7924, device='cuda:0')
episode: 277 training return: tensor(-80.4398, device='cuda:0')
episode: 278 training return: tensor(-98.9277, device='cuda:0')
episode: 279 training return: tensor(-47.5391, device='cuda:0')
epoch: 70 test_true_pfm: 3203.584609442331 sim_pfm: -72.01342682521015
episode: 280 training return: tensor(-65.7550, device='cuda:0')
episode: 281 training return: tensor(-684.5385, device='cuda:0')
episode: 282 training return: tensor(-58.2457, device='cuda:0')
episode: 283 training return: tensor(-65.3445, device='cuda:0')
epoch: 71 test_true_pfm: 3202.7861411777026 sim_pfm: -61.58023056524689
episode: 284 training return: tensor(-54.1637, device='cuda:0')
episode: 285 training return: tensor(-107.1063, device='cuda:0')
episode: 286 training return: tensor(-40.7749, device='cuda:0')
episode: 287 training return: tensor(-74.1042, device='cuda:0')
epoch: 72 test_true_pfm: 3209.837960422439 sim_pfm: -61.7037061759911
episode: 288 training return: tensor(-63.5143, device='cuda:0')
episode: 289 training return: tensor(-72.8399, device='cuda:0')
episode: 290 training return: tensor(-510.9090, device='cuda:0')
episode: 291 training return: tensor(-101.1168, device='cuda:0')
epoch: 73 test_true_pfm: 3182.653473042781 sim_pfm: -97.26294009311823
episode: 292 training return: tensor(-64.7775, device='cuda:0')
episode: 293 training return: tensor(-82.1754, device='cuda:0')
episode: 294 training return: tensor(-50.5606, device='cuda:0')
episode: 295 training return: tensor(-67.7505, device='cuda:0')
epoch: 74 test_true_pfm: 3183.069883690849 sim_pfm: -68.72299074036225
episode: 296 training return: tensor(-95.8962, device='cuda:0')
episode: 297 training return: tensor(-84.3900, device='cuda:0')
episode: 298 training return: tensor(-95.7035, device='cuda:0')
episode: 299 training return: tensor(-109.1119, device='cuda:0')
epoch: 75 test_true_pfm: 3168.341083217523 sim_pfm: -78.33427566643029
episode: 300 training return: tensor(-68.5489, device='cuda:0')
episode: 301 training return: tensor(-47.4558, device='cuda:0')
episode: 302 training return: tensor(-92.1154, device='cuda:0')
episode: 303 training return: tensor(-72.3360, device='cuda:0')
epoch: 76 test_true_pfm: 3185.2379984282993 sim_pfm: -63.763694110394376
episode: 304 training return: tensor(-86.9590, device='cuda:0')
episode: 305 training return: tensor(-79.6889, device='cuda:0')
episode: 306 training return: tensor(-77.8883, device='cuda:0')
episode: 307 training return: tensor(-72.8957, device='cuda:0')
epoch: 77 test_true_pfm: 3199.8027739091854 sim_pfm: -73.06655509537086
episode: 308 training return: tensor(-63.8805, device='cuda:0')
episode: 309 training return: tensor(-109.0008, device='cuda:0')
episode: 310 training return: tensor(-84.5000, device='cuda:0')
episode: 311 training return: tensor(-91.7986, device='cuda:0')
epoch: 78 test_true_pfm: 3196.903560993491 sim_pfm: -105.21602488857268
episode: 312 training return: tensor(-70.9597, device='cuda:0')
episode: 313 training return: tensor(-72.8567, device='cuda:0')
episode: 314 training return: tensor(-67.5300, device='cuda:0')
episode: 315 training return: tensor(-60.4678, device='cuda:0')
epoch: 79 test_true_pfm: 3204.0964407185857 sim_pfm: -92.32043237813438
episode: 316 training return: tensor(-116.3661, device='cuda:0')
episode: 317 training return: tensor(0.2452, device='cuda:0')
episode: 318 training return: tensor(-96.3629, device='cuda:0')
episode: 319 training return: tensor(-54.1968, device='cuda:0')
epoch: 80 test_true_pfm: 3212.652221387181 sim_pfm: -70.26316922121138
episode: 320 training return: tensor(-91.6428, device='cuda:0')
episode: 321 training return: tensor(-76.7174, device='cuda:0')
episode: 322 training return: tensor(-105.9811, device='cuda:0')
episode: 323 training return: tensor(-79.3032, device='cuda:0')
epoch: 81 test_true_pfm: 3203.917844746296 sim_pfm: -68.02864758944877
episode: 324 training return: tensor(-96.2599, device='cuda:0')
episode: 325 training return: tensor(-46.6812, device='cuda:0')
episode: 326 training return: tensor(-44.3268, device='cuda:0')
episode: 327 training return: tensor(-79.7187, device='cuda:0')
epoch: 82 test_true_pfm: 3201.938784369387 sim_pfm: -83.16315539613909
episode: 328 training return: tensor(-51.8242, device='cuda:0')
episode: 329 training return: tensor(-82.0651, device='cuda:0')
episode: 330 training return: tensor(-404.7945, device='cuda:0')
episode: 331 training return: tensor(-71.6728, device='cuda:0')
epoch: 83 test_true_pfm: 3220.275835806567 sim_pfm: -64.09842866255592
episode: 332 training return: tensor(-64.8906, device='cuda:0')
episode: 333 training return: tensor(-237.4103, device='cuda:0')
episode: 334 training return: tensor(-108.7874, device='cuda:0')
episode: 335 training return: tensor(-60.0676, device='cuda:0')
epoch: 84 test_true_pfm: 3198.8329169917524 sim_pfm: -82.72288313974666
episode: 336 training return: tensor(-76.0807, device='cuda:0')
episode: 337 training return: tensor(-84.2269, device='cuda:0')
episode: 338 training return: tensor(-85.4388, device='cuda:0')
episode: 339 training return: tensor(-84.4324, device='cuda:0')
epoch: 85 test_true_pfm: 3205.9469047628395 sim_pfm: -80.90138452272124
episode: 340 training return: tensor(-68.1738, device='cuda:0')
episode: 341 training return: tensor(-87.4142, device='cuda:0')
episode: 342 training return: tensor(-28.1910, device='cuda:0')
episode: 343 training return: tensor(-695.2527, device='cuda:0')
epoch: 86 test_true_pfm: 3205.508419625474 sim_pfm: -62.52068925347218
episode: 344 training return: tensor(-100.7205, device='cuda:0')
episode: 345 training return: tensor(-99.8606, device='cuda:0')
episode: 346 training return: tensor(-76.3091, device='cuda:0')
episode: 347 training return: tensor(-104.3135, device='cuda:0')
epoch: 87 test_true_pfm: 3215.011247461498 sim_pfm: -86.71717597174575
episode: 348 training return: tensor(-47.5608, device='cuda:0')
episode: 349 training return: tensor(-81.2697, device='cuda:0')
episode: 350 training return: tensor(-203.7549, device='cuda:0')
episode: 351 training return: tensor(-55.9447, device='cuda:0')
epoch: 88 test_true_pfm: 3203.66208713224 sim_pfm: -61.64180886013006
episode: 352 training return: tensor(-68.8785, device='cuda:0')
episode: 353 training return: tensor(-100.6097, device='cuda:0')
episode: 354 training return: tensor(-82.3391, device='cuda:0')
episode: 355 training return: tensor(-65.0767, device='cuda:0')
epoch: 89 test_true_pfm: 3200.8691889379593 sim_pfm: -82.60321589371112
episode: 356 training return: tensor(-64.8493, device='cuda:0')
episode: 357 training return: tensor(-58.3324, device='cuda:0')
episode: 358 training return: tensor(-42.5920, device='cuda:0')
episode: 359 training return: tensor(-63.6371, device='cuda:0')
epoch: 90 test_true_pfm: 3188.1447159513314 sim_pfm: -84.41542166430736
episode: 360 training return: tensor(-73.9815, device='cuda:0')
episode: 361 training return: tensor(-89.1214, device='cuda:0')
episode: 362 training return: tensor(-55.4634, device='cuda:0')
episode: 363 training return: tensor(-74.1537, device='cuda:0')
epoch: 91 test_true_pfm: 3188.67410236787 sim_pfm: -80.94606811590104
episode: 364 training return: tensor(-108.5496, device='cuda:0')
episode: 365 training return: tensor(-54.3211, device='cuda:0')
episode: 366 training return: tensor(-73.0871, device='cuda:0')
episode: 367 training return: tensor(-109.2134, device='cuda:0')
epoch: 92 test_true_pfm: 3207.292616572122 sim_pfm: -74.19784647711397
episode: 368 training return: tensor(-58.9488, device='cuda:0')
episode: 369 training return: tensor(-77.7811, device='cuda:0')
episode: 370 training return: tensor(-86.5038, device='cuda:0')
episode: 371 training return: tensor(-72.1373, device='cuda:0')
epoch: 93 test_true_pfm: 3187.7006222826208 sim_pfm: -73.32705447856763
episode: 372 training return: tensor(-60.8947, device='cuda:0')
episode: 373 training return: tensor(-58.6119, device='cuda:0')
episode: 374 training return: tensor(-85.1897, device='cuda:0')
episode: 375 training return: tensor(-58.2447, device='cuda:0')
epoch: 94 test_true_pfm: 3189.366460387186 sim_pfm: -82.27162112500325
episode: 376 training return: tensor(-65.9284, device='cuda:0')
episode: 377 training return: tensor(-74.5127, device='cuda:0')
episode: 378 training return: tensor(-61.7119, device='cuda:0')
episode: 379 training return: tensor(-53.8747, device='cuda:0')
epoch: 95 test_true_pfm: 3197.392791864829 sim_pfm: -77.99991842772579
episode: 380 training return: tensor(-76.0323, device='cuda:0')
episode: 381 training return: tensor(-82.0139, device='cuda:0')
episode: 382 training return: tensor(-48.7877, device='cuda:0')
episode: 383 training return: tensor(-93.8519, device='cuda:0')
epoch: 96 test_true_pfm: 3219.0677819117846 sim_pfm: -63.244090186140966
episode: 384 training return: tensor(-87.3983, device='cuda:0')
episode: 385 training return: tensor(-75.8787, device='cuda:0')
episode: 386 training return: tensor(-69.4032, device='cuda:0')
episode: 387 training return: tensor(-70.5941, device='cuda:0')
epoch: 97 test_true_pfm: 3188.5599568325774 sim_pfm: -83.26209289328351
episode: 388 training return: tensor(-87.3938, device='cuda:0')
episode: 389 training return: tensor(-55.2474, device='cuda:0')
episode: 390 training return: tensor(-46.0338, device='cuda:0')
episode: 391 training return: tensor(-544.4653, device='cuda:0')
epoch: 98 test_true_pfm: 3200.75568772688 sim_pfm: -74.44873341772472
episode: 392 training return: tensor(-634.3431, device='cuda:0')
episode: 393 training return: tensor(-104.6633, device='cuda:0')
episode: 394 training return: tensor(-611.3577, device='cuda:0')
episode: 395 training return: tensor(-51.8472, device='cuda:0')
epoch: 99 test_true_pfm: 3210.5382874549286 sim_pfm: -70.50702781567816
episode: 396 training return: tensor(-432.6145, device='cuda:0')
episode: 397 training return: tensor(-53.6222, device='cuda:0')
episode: 398 training return: tensor(-78.3484, device='cuda:0')
episode: 399 training return: tensor(-79.1385, device='cuda:0')
epoch: 100 test_true_pfm: 3212.838257254521 sim_pfm: -80.72345823879975
episode: 400 training return: tensor(-81.5606, device='cuda:0')
episode: 401 training return: tensor(-179.4682, device='cuda:0')
episode: 402 training return: tensor(-91.2720, device='cuda:0')
episode: 403 training return: tensor(-70.6990, device='cuda:0')
epoch: 101 test_true_pfm: 3202.7280094549274 sim_pfm: -64.22166599490447
episode: 404 training return: tensor(-85.2948, device='cuda:0')
episode: 405 training return: tensor(-66.8682, device='cuda:0')
episode: 406 training return: tensor(-77.1498, device='cuda:0')
episode: 407 training return: tensor(-662.4485, device='cuda:0')
epoch: 102 test_true_pfm: 3205.5402819928872 sim_pfm: -89.56270434385321
episode: 408 training return: tensor(-76.3492, device='cuda:0')
episode: 409 training return: tensor(-697.7728, device='cuda:0')
episode: 410 training return: tensor(-58.8874, device='cuda:0')
episode: 411 training return: tensor(-67.7124, device='cuda:0')
epoch: 103 test_true_pfm: 3180.1882149049306 sim_pfm: -69.70874799830683
episode: 412 training return: tensor(-54.4105, device='cuda:0')
episode: 413 training return: tensor(-93.4435, device='cuda:0')
episode: 414 training return: tensor(-97.0106, device='cuda:0')
episode: 415 training return: tensor(-83.3388, device='cuda:0')
epoch: 104 test_true_pfm: 3214.8827127726945 sim_pfm: -67.67108787863981
episode: 416 training return: tensor(-60.9467, device='cuda:0')
episode: 417 training return: tensor(-628.1924, device='cuda:0')
episode: 418 training return: tensor(-84.7976, device='cuda:0')
episode: 419 training return: tensor(-524.3992, device='cuda:0')
epoch: 105 test_true_pfm: 3189.043107797144 sim_pfm: -68.59243172692368
episode: 420 training return: tensor(-69.3750, device='cuda:0')
episode: 421 training return: tensor(-92.0674, device='cuda:0')
episode: 422 training return: tensor(-108.7000, device='cuda:0')
episode: 423 training return: tensor(-49.6450, device='cuda:0')
epoch: 106 test_true_pfm: 3202.4693153975586 sim_pfm: -77.10681314291044
episode: 424 training return: tensor(-69.3117, device='cuda:0')
episode: 425 training return: tensor(-570.1912, device='cuda:0')
episode: 426 training return: tensor(-72.0725, device='cuda:0')
episode: 427 training return: tensor(-27.2108, device='cuda:0')
epoch: 107 test_true_pfm: 3191.1121990340584 sim_pfm: -66.65706902003149
episode: 428 training return: tensor(-91.4801, device='cuda:0')
episode: 429 training return: tensor(-36.0345, device='cuda:0')
episode: 430 training return: tensor(-50.2741, device='cuda:0')
episode: 431 training return: tensor(-53.1590, device='cuda:0')
epoch: 108 test_true_pfm: 3214.985435849974 sim_pfm: -64.97228160043596
episode: 432 training return: tensor(-55.6420, device='cuda:0')
episode: 433 training return: tensor(-84.8046, device='cuda:0')
episode: 434 training return: tensor(-67.8822, device='cuda:0')
episode: 435 training return: tensor(-67.5633, device='cuda:0')
epoch: 109 test_true_pfm: 3203.0581421565985 sim_pfm: -72.22344844134447
episode: 436 training return: tensor(-83.2614, device='cuda:0')
episode: 437 training return: tensor(-410.4249, device='cuda:0')
episode: 438 training return: tensor(-600.4929, device='cuda:0')
episode: 439 training return: tensor(-74.0658, device='cuda:0')
epoch: 110 test_true_pfm: 3213.8130780466895 sim_pfm: -45.809183540404774
episode: 440 training return: tensor(-85.0448, device='cuda:0')
episode: 441 training return: tensor(-76.4582, device='cuda:0')
episode: 442 training return: tensor(-78.8981, device='cuda:0')
episode: 443 training return: tensor(-196.9129, device='cuda:0')
epoch: 111 test_true_pfm: 3188.720745105073 sim_pfm: -76.46350691225962
episode: 444 training return: tensor(-76.8408, device='cuda:0')
episode: 445 training return: tensor(-70.1975, device='cuda:0')
episode: 446 training return: tensor(-59.0830, device='cuda:0')
episode: 447 training return: tensor(-108.4919, device='cuda:0')
epoch: 112 test_true_pfm: 3220.527120711755 sim_pfm: -83.82289317726584
episode: 448 training return: tensor(-99.2482, device='cuda:0')
episode: 449 training return: tensor(-88.2850, device='cuda:0')
episode: 450 training return: tensor(-61.1905, device='cuda:0')
episode: 451 training return: tensor(-62.3438, device='cuda:0')
epoch: 113 test_true_pfm: 3213.9447760613707 sim_pfm: -62.70179600957393
episode: 452 training return: tensor(-100.2946, device='cuda:0')
episode: 453 training return: tensor(-57.2309, device='cuda:0')
episode: 454 training return: tensor(-72.5828, device='cuda:0')
episode: 455 training return: tensor(-115.6142, device='cuda:0')
epoch: 114 test_true_pfm: 3201.4462480027146 sim_pfm: -81.85815963843682
episode: 456 training return: tensor(-99.9888, device='cuda:0')
episode: 457 training return: tensor(-72.8479, device='cuda:0')
episode: 458 training return: tensor(-85.7989, device='cuda:0')
episode: 459 training return: tensor(-48.7831, device='cuda:0')
epoch: 115 test_true_pfm: 3202.131072189602 sim_pfm: -70.12290834983772
episode: 460 training return: tensor(-68.8529, device='cuda:0')
episode: 461 training return: tensor(-72.1818, device='cuda:0')
episode: 462 training return: tensor(-101.3943, device='cuda:0')
episode: 463 training return: tensor(-67.1397, device='cuda:0')
epoch: 116 test_true_pfm: 3205.179544592569 sim_pfm: -77.70721093937755
episode: 464 training return: tensor(-83.2074, device='cuda:0')
episode: 465 training return: tensor(-64.8451, device='cuda:0')
episode: 466 training return: tensor(-61.3869, device='cuda:0')
episode: 467 training return: tensor(-80.4974, device='cuda:0')
epoch: 117 test_true_pfm: 3209.5458618992366 sim_pfm: -73.13758728974305
episode: 468 training return: tensor(-101.4457, device='cuda:0')
episode: 469 training return: tensor(-92.9447, device='cuda:0')
episode: 470 training return: tensor(-56.5629, device='cuda:0')
episode: 471 training return: tensor(-76.5099, device='cuda:0')
epoch: 118 test_true_pfm: 3208.6954158748817 sim_pfm: -70.96035623784216
episode: 472 training return: tensor(-114.9332, device='cuda:0')
episode: 473 training return: tensor(-83.1240, device='cuda:0')
episode: 474 training return: tensor(-118.1665, device='cuda:0')
episode: 475 training return: tensor(-48.6563, device='cuda:0')
epoch: 119 test_true_pfm: 3190.08111280247 sim_pfm: -87.93302142364944
episode: 476 training return: tensor(-64.0743, device='cuda:0')
episode: 477 training return: tensor(-75.4482, device='cuda:0')
episode: 478 training return: tensor(-71.4958, device='cuda:0')
episode: 479 training return: tensor(-94.1242, device='cuda:0')
epoch: 120 test_true_pfm: 3206.457002683806 sim_pfm: -70.37080576243655
episode: 480 training return: tensor(31.4083, device='cuda:0')
episode: 481 training return: tensor(-68.0298, device='cuda:0')
episode: 482 training return: tensor(-65.5902, device='cuda:0')
episode: 483 training return: tensor(-80.8783, device='cuda:0')
epoch: 121 test_true_pfm: 3213.3391953459545 sim_pfm: -73.07153556980968
episode: 484 training return: tensor(-44.6421, device='cuda:0')
episode: 485 training return: tensor(-70.1960, device='cuda:0')
episode: 486 training return: tensor(-689.4728, device='cuda:0')
episode: 487 training return: tensor(-46.1688, device='cuda:0')
epoch: 122 test_true_pfm: 3186.5590582591253 sim_pfm: -72.58713347539499
episode: 488 training return: tensor(-96.8379, device='cuda:0')
episode: 489 training return: tensor(-73.1625, device='cuda:0')
episode: 490 training return: tensor(-99.9837, device='cuda:0')
episode: 491 training return: tensor(-361.4568, device='cuda:0')
epoch: 123 test_true_pfm: 3212.082866549621 sim_pfm: -61.89734668306968
episode: 492 training return: tensor(-57.1207, device='cuda:0')
episode: 493 training return: tensor(-70.0468, device='cuda:0')
episode: 494 training return: tensor(-64.2601, device='cuda:0')
episode: 495 training return: tensor(-44.8847, device='cuda:0')
epoch: 124 test_true_pfm: 3198.9529842548236 sim_pfm: -68.36592840354812
episode: 496 training return: tensor(-76.2044, device='cuda:0')
episode: 497 training return: tensor(-56.8490, device='cuda:0')
episode: 498 training return: tensor(-705.3947, device='cuda:0')
episode: 499 training return: tensor(-287.5667, device='cuda:0')
epoch: 125 test_true_pfm: 3211.4342677085983 sim_pfm: -62.495956427893056
episode: 500 training return: tensor(-93.9031, device='cuda:0')
episode: 501 training return: tensor(-91.8155, device='cuda:0')
episode: 502 training return: tensor(-129.4181, device='cuda:0')
episode: 503 training return: tensor(-73.9488, device='cuda:0')
epoch: 126 test_true_pfm: 3231.5009848039676 sim_pfm: -63.23709837588831
episode: 504 training return: tensor(-83.1544, device='cuda:0')
episode: 505 training return: tensor(-81.0007, device='cuda:0')
episode: 506 training return: tensor(-80.1431, device='cuda:0')
episode: 507 training return: tensor(-46.0689, device='cuda:0')
epoch: 127 test_true_pfm: 3190.7420465626324 sim_pfm: -82.80354751721218
episode: 508 training return: tensor(-51.6198, device='cuda:0')
episode: 509 training return: tensor(-81.2520, device='cuda:0')
episode: 510 training return: tensor(-54.7746, device='cuda:0')
episode: 511 training return: tensor(-66.8147, device='cuda:0')
epoch: 128 test_true_pfm: 3209.9480428039374 sim_pfm: -75.08127567401
episode: 512 training return: tensor(-76.5288, device='cuda:0')
episode: 513 training return: tensor(41.1448, device='cuda:0')
episode: 514 training return: tensor(-81.4435, device='cuda:0')
episode: 515 training return: tensor(-86.3199, device='cuda:0')
epoch: 129 test_true_pfm: 3198.697667768321 sim_pfm: -53.17796836330672
episode: 516 training return: tensor(47.6167, device='cuda:0')
episode: 517 training return: tensor(-54.9198, device='cuda:0')
episode: 518 training return: tensor(-86.2826, device='cuda:0')
episode: 519 training return: tensor(-74.9140, device='cuda:0')
epoch: 130 test_true_pfm: 3226.2572679965792 sim_pfm: -74.89200530496116
episode: 520 training return: tensor(-86.1499, device='cuda:0')
episode: 521 training return: tensor(-45.5410, device='cuda:0')
episode: 522 training return: tensor(-49.6029, device='cuda:0')
episode: 523 training return: tensor(-74.2149, device='cuda:0')
epoch: 131 test_true_pfm: 3209.5153499088724 sim_pfm: -63.357294187687025
episode: 524 training return: tensor(-83.8193, device='cuda:0')
episode: 525 training return: tensor(-55.5479, device='cuda:0')
episode: 526 training return: tensor(-74.9417, device='cuda:0')
episode: 527 training return: tensor(-54.5551, device='cuda:0')
epoch: 132 test_true_pfm: 3200.4845516734695 sim_pfm: -87.07925060154714
episode: 528 training return: tensor(-87.7164, device='cuda:0')
episode: 529 training return: tensor(-72.6600, device='cuda:0')
episode: 530 training return: tensor(-697.9396, device='cuda:0')
episode: 531 training return: tensor(-114.9646, device='cuda:0')
epoch: 133 test_true_pfm: 3189.954198206375 sim_pfm: -59.582911886643465
episode: 532 training return: tensor(-75.7541, device='cuda:0')
episode: 533 training return: tensor(-88.7434, device='cuda:0')
episode: 534 training return: tensor(-62.2595, device='cuda:0')
episode: 535 training return: tensor(-46.6676, device='cuda:0')
epoch: 134 test_true_pfm: 3193.9717620542856 sim_pfm: -83.84100417106917
episode: 536 training return: tensor(-67.7450, device='cuda:0')
episode: 537 training return: tensor(-73.3576, device='cuda:0')
episode: 538 training return: tensor(-73.6695, device='cuda:0')
episode: 539 training return: tensor(-60.3618, device='cuda:0')
epoch: 135 test_true_pfm: 3225.1876837122295 sim_pfm: -39.597490139004854
episode: 540 training return: tensor(-87.1003, device='cuda:0')
episode: 541 training return: tensor(-585.1849, device='cuda:0')
episode: 542 training return: tensor(-52.8330, device='cuda:0')
episode: 543 training return: tensor(-78.8521, device='cuda:0')
epoch: 136 test_true_pfm: 3205.976385529481 sim_pfm: -79.65058620063549
episode: 544 training return: tensor(-59.2925, device='cuda:0')
episode: 545 training return: tensor(-97.1117, device='cuda:0')
episode: 546 training return: tensor(-56.2510, device='cuda:0')
episode: 547 training return: tensor(-69.1915, device='cuda:0')
epoch: 137 test_true_pfm: 3207.977370923203 sim_pfm: -79.4785612677515
episode: 548 training return: tensor(-84.6717, device='cuda:0')
episode: 549 training return: tensor(-105.5510, device='cuda:0')
episode: 550 training return: tensor(-44.0134, device='cuda:0')
episode: 551 training return: tensor(-126.8248, device='cuda:0')
epoch: 138 test_true_pfm: 3239.4829408093847 sim_pfm: -31.459276277610723
episode: 552 training return: tensor(-83.0784, device='cuda:0')
episode: 553 training return: tensor(-48.6685, device='cuda:0')
episode: 554 training return: tensor(-45.5480, device='cuda:0')
episode: 555 training return: tensor(-77.8024, device='cuda:0')
epoch: 139 test_true_pfm: 3218.146566077719 sim_pfm: -51.84023294983975
episode: 556 training return: tensor(-126.4334, device='cuda:0')
episode: 557 training return: tensor(-49.0511, device='cuda:0')
episode: 558 training return: tensor(-663.9238, device='cuda:0')
episode: 559 training return: tensor(-77.5601, device='cuda:0')
epoch: 140 test_true_pfm: 3196.857230080177 sim_pfm: -54.279595043713925
episode: 560 training return: tensor(-84.6759, device='cuda:0')
episode: 561 training return: tensor(-71.2672, device='cuda:0')
episode: 562 training return: tensor(-34.7581, device='cuda:0')
episode: 563 training return: tensor(-31.1108, device='cuda:0')
epoch: 141 test_true_pfm: 3201.57056857192 sim_pfm: -44.953642374768
episode: 564 training return: tensor(-83.5352, device='cuda:0')
episode: 565 training return: tensor(-90.2545, device='cuda:0')
episode: 566 training return: tensor(-98.2664, device='cuda:0')
episode: 567 training return: tensor(-87.0259, device='cuda:0')
epoch: 142 test_true_pfm: 3219.672860570034 sim_pfm: -82.68660031361894
episode: 568 training return: tensor(-72.0227, device='cuda:0')
episode: 569 training return: tensor(-104.4715, device='cuda:0')
episode: 570 training return: tensor(-57.3541, device='cuda:0')
episode: 571 training return: tensor(-73.6478, device='cuda:0')
epoch: 143 test_true_pfm: 3217.0516643551455 sim_pfm: -74.070943164872
episode: 572 training return: tensor(-122.0658, device='cuda:0')
episode: 573 training return: tensor(-45.8921, device='cuda:0')
episode: 574 training return: tensor(-99.9443, device='cuda:0')
episode: 575 training return: tensor(-79.1035, device='cuda:0')
epoch: 144 test_true_pfm: 3197.3944721110324 sim_pfm: -71.7586379692463
episode: 576 training return: tensor(-77.7493, device='cuda:0')
episode: 577 training return: tensor(-63.8469, device='cuda:0')
episode: 578 training return: tensor(-53.1031, device='cuda:0')
episode: 579 training return: tensor(-62.3295, device='cuda:0')
epoch: 145 test_true_pfm: 3217.461748936718 sim_pfm: -52.003280075043826
episode: 580 training return: tensor(-113.5070, device='cuda:0')
episode: 581 training return: tensor(-70.5913, device='cuda:0')
episode: 582 training return: tensor(-63.6342, device='cuda:0')
episode: 583 training return: tensor(-49.8850, device='cuda:0')
epoch: 146 test_true_pfm: 3229.208121907342 sim_pfm: -71.00641733741698
episode: 584 training return: tensor(-610.6074, device='cuda:0')
episode: 585 training return: tensor(-58.8387, device='cuda:0')
episode: 586 training return: tensor(-662.4323, device='cuda:0')
episode: 587 training return: tensor(-68.7886, device='cuda:0')
epoch: 147 test_true_pfm: 3219.610158347016 sim_pfm: -59.21426998514411
episode: 588 training return: tensor(-77.0351, device='cuda:0')
episode: 589 training return: tensor(-63.5308, device='cuda:0')
episode: 590 training return: tensor(-44.9409, device='cuda:0')
episode: 591 training return: tensor(-45.0108, device='cuda:0')
epoch: 148 test_true_pfm: 3201.0472229657075 sim_pfm: -88.141170622713
episode: 592 training return: tensor(-53.8911, device='cuda:0')
episode: 593 training return: tensor(-79.3270, device='cuda:0')
episode: 594 training return: tensor(-72.3229, device='cuda:0')
episode: 595 training return: tensor(-513.8322, device='cuda:0')
epoch: 149 test_true_pfm: 3208.1287684315835 sim_pfm: -59.05183139258103
episode: 596 training return: tensor(-62.0419, device='cuda:0')
episode: 597 training return: tensor(-277.5189, device='cuda:0')
episode: 598 training return: tensor(-75.4896, device='cuda:0')
episode: 599 training return: tensor(-27.0178, device='cuda:0')
epoch: 150 test_true_pfm: 3210.589119016895 sim_pfm: -66.74931669996779
