['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'behavior', '--traj', 'medium', '--seed', '3', '--data', '10000']
epoch: 0 training_loss 0.30904988557100294 test_loss: 0.21287238597869873
epoch: 1 training_loss 0.1674498676508665 test_loss: 0.1539306163787842
epoch: 2 training_loss 0.1354867048189044 test_loss: 0.1543775200843811
epoch: 3 training_loss 0.11920555233955384 test_loss: 0.14678245782852173
epoch: 4 training_loss 0.11724795568734407 test_loss: 0.14475828409194946
epoch: 5 training_loss 0.11731864016503096 test_loss: 0.12095906734466552
epoch: 6 training_loss 0.10441043194383383 test_loss: 0.127938973903656
epoch: 7 training_loss 0.10649602301418781 test_loss: 0.1541641116142273
epoch: 8 training_loss 0.09519980043172836 test_loss: 0.11148370504379272
epoch: 9 training_loss 0.09624672973528504 test_loss: 0.11527160406112671
epoch: 10 training_loss 0.0984503916464746 test_loss: 0.1187620997428894
epoch: 11 training_loss 0.09951963026076555 test_loss: 0.12397689819335937
epoch: 12 training_loss 0.09569246971979738 test_loss: 0.1325255513191223
epoch: 13 training_loss 0.09595353972166777 test_loss: 0.12660181522369385
epoch: 14 training_loss 0.09792128801345826 test_loss: 0.12424944639205933
epoch: 15 training_loss 0.09385496377944946 test_loss: 0.1216390609741211
epoch: 16 training_loss 0.08630351861938834 test_loss: 0.1319605827331543
epoch: 17 training_loss 0.09422991311177611 test_loss: 0.13517520427703858
epoch: 18 training_loss 0.09090600276365876 test_loss: 0.13237100839614868
epoch: 19 training_loss 0.09158038716763257 test_loss: 0.12785724401474
epoch: 20 training_loss 0.09330105505883694 test_loss: 0.12012896537780762
epoch: 21 training_loss 0.08949059668928384 test_loss: 0.1167590856552124
epoch: 22 training_loss 0.08608622351661324 test_loss: 0.1326249361038208
epoch: 23 training_loss 0.09319987446069718 test_loss: 0.12942905426025392
epoch: 24 training_loss 0.08400320781394839 test_loss: 0.12255162000656128
epoch: 25 training_loss 0.08818935308605433 test_loss: 0.1178058385848999
epoch: 26 training_loss 0.08459323398768902 test_loss: 0.1291848301887512
epoch: 27 training_loss 0.09263720026239752 test_loss: 0.1414690375328064
epoch: 28 training_loss 0.07760648917406797 test_loss: 0.12167470455169678
epoch: 29 training_loss 0.09009545739740134 test_loss: 0.1308245539665222
epoch: 30 training_loss 0.08439673840999604 test_loss: 0.12315707206726074
epoch: 31 training_loss 0.0823956523835659 test_loss: 0.12243138551712036
epoch: 32 training_loss 0.08003968983888626 test_loss: 0.1243399739265442
epoch: 33 training_loss 0.07449455605819821 test_loss: 0.11930574178695678
epoch: 34 training_loss 0.07964089514687657 test_loss: 0.12184849977493287
epoch: 35 training_loss 0.0782884458079934 test_loss: 0.12703746557235718
epoch: 36 training_loss 0.08055869437754154 test_loss: 0.13988431692123413
epoch: 37 training_loss 0.08657365340739488 test_loss: 0.12370038032531738
epoch: 38 training_loss 0.07823229387402535 test_loss: 0.12765879631042482
epoch: 39 training_loss 0.08623405009508132 test_loss: 0.1406320571899414
epoch: 40 training_loss 0.07759394353255629 test_loss: 0.11754318475723266
epoch: 41 training_loss 0.07971386436372996 test_loss: 0.14290069341659545
epoch: 42 training_loss 0.07903123513795435 test_loss: 0.13484929800033568
epoch: 43 training_loss 0.07942788422107697 test_loss: 0.12055150270462037
epoch: 44 training_loss 0.07597649982199073 test_loss: 0.14623225927352906
epoch: 45 training_loss 0.07251234445720911 test_loss: 0.139370858669281
epoch: 46 training_loss 0.0762532937899232 test_loss: 0.13970096111297609
epoch: 47 training_loss 0.07413311191368849 test_loss: 0.13952504396438598
epoch: 48 training_loss 0.0733334478829056 test_loss: 0.15567809343338013
epoch: 49 training_loss 0.07120376985520124 test_loss: 0.1275317907333374
epoch: 50 training_loss 0.07178564133122563 test_loss: 0.13505291938781738
epoch: 51 training_loss 0.06739524574950337 test_loss: 0.11702682971954345
epoch: 52 training_loss 0.07502581722103059 test_loss: 0.14647949934005738
epoch: 53 training_loss 0.07090937428176403 test_loss: 0.14951705932617188
epoch: 54 training_loss 0.07133985623717308 test_loss: 0.14563854932785034
epoch: 55 training_loss 0.06949386963620782 test_loss: 0.11598286628723145
epoch: 56 training_loss 0.070591328330338 test_loss: 0.12468931674957276
epoch: 57 training_loss 0.07286093283444643 test_loss: 0.14661682844161988
epoch: 58 training_loss 0.06576091415248811 test_loss: 0.1263229250907898
epoch: 59 training_loss 0.06900367641821503 test_loss: 0.1267739415168762
epoch: 60 training_loss 0.0685326154716313 test_loss: 0.13440983295440673
epoch: 61 training_loss 0.07066353226080536 test_loss: 0.1461065888404846
epoch: 62 training_loss 0.06302349705249072 test_loss: 0.14006991386413575
epoch: 63 training_loss 0.07323220763355494 test_loss: 0.12985124588012695
epoch: 64 training_loss 0.0627144393324852 test_loss: 0.135140860080719
epoch: 65 training_loss 0.06102659585885704 test_loss: 0.12900395393371583
epoch: 66 training_loss 0.06727939879521727 test_loss: 0.13194489479064941
epoch: 67 training_loss 0.06281603224109858 test_loss: 0.14003341197967528
epoch: 68 training_loss 0.06540817969478667 test_loss: 0.1512471079826355
epoch: 69 training_loss 0.061794484481215474 test_loss: 0.14614044427871703
epoch: 70 training_loss 0.06187614219263196 test_loss: 0.13439222574234008
epoch: 71 training_loss 0.06592164303176105 test_loss: 0.14511648416519166
epoch: 72 training_loss 0.05424516139551997 test_loss: 0.14325370788574218
epoch: 73 training_loss 0.06207895180210471 test_loss: 0.11962450742721557
epoch: 74 training_loss 0.05959595002233982 test_loss: 0.13135305643081666
epoch: 75 training_loss 0.06242793772369623 test_loss: 0.12171992063522338
epoch: 76 training_loss 0.056651089703664184 test_loss: 0.15324461460113525
epoch: 77 training_loss 0.05543542066588998 test_loss: 0.15895248651504518
epoch: 78 training_loss 0.05892380937002599 test_loss: 0.14463728666305542
epoch: 79 training_loss 0.05845030079595745 test_loss: 0.1579815626144409
epoch: 80 training_loss 0.05723749989643693 test_loss: 0.1380699872970581
epoch: 81 training_loss 0.05548257199116051 test_loss: 0.14274495840072632
epoch: 82 training_loss 0.0566241966933012 test_loss: 0.1558924913406372
epoch: 83 training_loss 0.05645531447604299 test_loss: 0.14561883211135865
epoch: 84 training_loss 0.05393835525028408 test_loss: 0.1615312933921814
epoch: 85 training_loss 0.05593053140677512 test_loss: 0.1404753088951111
epoch: 86 training_loss 0.051346845561638474 test_loss: 0.14609136581420898
epoch: 87 training_loss 0.05102498029358685 test_loss: 0.1562103509902954
epoch: 88 training_loss 0.060490180337801576 test_loss: 0.15799205303192138
epoch: 89 training_loss 0.048574785687960684 test_loss: 0.14694613218307495
epoch: 90 training_loss 0.06055334763601422 test_loss: 0.16724234819412231
epoch: 91 training_loss 0.05546144436579198 test_loss: 0.1736675500869751
epoch: 92 training_loss 0.04803056087810546 test_loss: 0.15921907424926757
epoch: 93 training_loss 0.04942302412353456 test_loss: 0.17114392518997193
epoch: 94 training_loss 0.05336976110003889 test_loss: 0.15061036348342896
epoch: 95 training_loss 0.0524527504760772 test_loss: 0.16873319149017335
epoch: 96 training_loss 0.051151883117854596 test_loss: 0.16585806608200074
epoch: 97 training_loss 0.05227343987673521 test_loss: 0.15938596725463866
epoch: 98 training_loss 0.05079293033108115 test_loss: 0.15862184762954712
epoch: 99 training_loss 0.04819863406009972 test_loss: 0.17133984565734864
epoch: 100 training_loss 0.048025285033509134 test_loss: 0.1659882664680481
epoch: 101 training_loss 0.0461506243981421 test_loss: 0.16066678762435913
epoch: 102 training_loss 0.04631375988014042 test_loss: 0.1565546751022339
epoch: 103 training_loss 0.05104692054912448 test_loss: 0.18740060329437255
epoch: 104 training_loss 0.044883414302021266 test_loss: 0.16269841194152831
epoch: 105 training_loss 0.05089703619480133 test_loss: 0.17570188045501708
epoch: 106 training_loss 0.04559496273286641 test_loss: 0.16657581329345703
epoch: 107 training_loss 0.04149441089946777 test_loss: 0.16762089729309082
epoch: 108 training_loss 0.046133244791999456 test_loss: 0.15956662893295287
epoch: 109 training_loss 0.04116154147312045 test_loss: 0.1573297381401062
epoch: 110 training_loss 0.04174707948230207 test_loss: 0.1401221752166748
epoch: 111 training_loss 0.048382834242656825 test_loss: 0.16103957891464232
epoch: 112 training_loss 0.048335582735016945 test_loss: 0.1784578323364258
epoch: 113 training_loss 0.04879672079347074 test_loss: 0.1518266201019287
epoch: 114 training_loss 0.04078226398676634 test_loss: 0.18957737684249878
epoch: 115 training_loss 0.03834781096316874 test_loss: 0.16123564243316652
epoch: 116 training_loss 0.040752188246697185 test_loss: 0.18030592203140258
epoch: 117 training_loss 0.036524389232508836 test_loss: 0.15917856693267823
epoch: 118 training_loss 0.03858678609598428 test_loss: 0.18668203353881835
epoch: 119 training_loss 0.034328452637419106 test_loss: 0.16567190885543823
epoch: 120 training_loss 0.04051123820245266 test_loss: 0.15278115272521972
epoch: 121 training_loss 0.03904540285468101 test_loss: 0.1712701439857483
epoch: 122 training_loss 0.03879116596654057 test_loss: 0.1598692774772644
epoch: 123 training_loss 0.037559347469359634 test_loss: 0.16791293621063233
epoch: 124 training_loss 0.038665574118494986 test_loss: 0.18259963989257813
epoch: 125 training_loss 0.04312322120182216 test_loss: 0.17052422761917113
epoch: 126 training_loss 0.03853533800691366 test_loss: 0.1975294828414917
epoch: 127 training_loss 0.037215450592339036 test_loss: 0.1714141011238098
epoch: 128 training_loss 0.03540446692146361 test_loss: 0.18620100021362304
epoch: 129 training_loss 0.04156229799380526 test_loss: 0.1867188572883606
epoch: 130 training_loss 0.0439998781401664 test_loss: 0.1606402039527893
epoch: 131 training_loss 0.03421235897578299 test_loss: 0.1783638119697571
epoch: 132 training_loss 0.03205578313209116 test_loss: 0.17206032276153566
epoch: 133 training_loss 0.028174123007338493 test_loss: 0.1769416332244873
epoch: 134 training_loss 0.03259846663102508 test_loss: 0.16897283792495726
epoch: 135 training_loss 0.036417522691190245 test_loss: 0.1684712529182434
epoch: 136 training_loss 0.03726933185942471 test_loss: 0.16894397735595704
epoch: 137 training_loss 0.03881258477456868 test_loss: 0.16583844423294067
epoch: 138 training_loss 0.031179245607927442 test_loss: 0.181197190284729
epoch: 139 training_loss 0.033455837815999985 test_loss: 0.16971499919891359
epoch: 140 training_loss 0.03414255471900105 test_loss: 0.17319860458374023
epoch: 141 training_loss 0.029804968046955763 test_loss: 0.18111507892608641
epoch: 142 training_loss 0.031203132206574082 test_loss: 0.14307929277420045
epoch: 143 training_loss 0.03140191160608083 test_loss: 0.17434308528900147
epoch: 144 training_loss 0.02678711574524641 test_loss: 0.20604329109191893
epoch: 145 training_loss 0.032181156999431554 test_loss: 0.17944787740707396
epoch: 146 training_loss 0.034441114524379375 test_loss: 0.18040070533752442
epoch: 147 training_loss 0.028898025155067442 test_loss: 0.19028069972991943
epoch: 148 training_loss 0.033628129763528705 test_loss: 0.18424320220947266
epoch: 149 training_loss 0.033649090277031064 test_loss: 0.18631871938705444
epoch: 0 training_loss 48.59130163192749 test_loss: 23.301844787597656
epoch: 1 training_loss 18.046635704040526 test_loss: 15.124130249023438
epoch: 2 training_loss 12.793991594314575 test_loss: 11.15286865234375
epoch: 3 training_loss 9.832027583122253 test_loss: 9.289258575439453
epoch: 4 training_loss 8.397574877738952 test_loss: 8.008247375488281
epoch: 5 training_loss 7.354852514266968 test_loss: 7.091168212890625
epoch: 6 training_loss 6.544775581359863 test_loss: 6.501865386962891
epoch: 7 training_loss 6.032575426101684 test_loss: 5.853353500366211
epoch: 8 training_loss 5.479717121124268 test_loss: 5.2803905487060545
epoch: 9 training_loss 5.093639116287232 test_loss: 5.047024917602539
epoch: 10 training_loss 4.807993648052215 test_loss: 4.748735809326172
epoch: 11 training_loss 4.56380111694336 test_loss: 4.38715705871582
epoch: 12 training_loss 4.248866374492645 test_loss: 4.39965934753418
epoch: 13 training_loss 4.082646763324737 test_loss: 4.1100822448730465
epoch: 14 training_loss 3.826479036808014 test_loss: 3.8543094635009765
epoch: 15 training_loss 3.67475923538208 test_loss: 3.756128692626953
epoch: 16 training_loss 3.5722171783447267 test_loss: 3.6550552368164064
epoch: 17 training_loss 3.4068679428100586 test_loss: 3.488422393798828
epoch: 18 training_loss 3.312420015335083 test_loss: 3.328282928466797
epoch: 19 training_loss 3.1686580395698547 test_loss: 3.28763427734375
epoch: 20 training_loss 3.2316949653625486 test_loss: 3.2384761810302733
epoch: 21 training_loss 3.0377801942825315 test_loss: 3.204367446899414
epoch: 22 training_loss 2.994407558441162 test_loss: 3.0638006210327147
epoch: 23 training_loss 2.914657168388367 test_loss: 3.02474365234375
epoch: 24 training_loss 2.883670492172241 test_loss: 2.9526824951171875
epoch: 25 training_loss 2.810677399635315 test_loss: 2.8581207275390623
epoch: 26 training_loss 2.694938623905182 test_loss: 2.781150245666504
epoch: 27 training_loss 2.630934724807739 test_loss: 2.7319496154785154
epoch: 28 training_loss 2.6229558539390565 test_loss: 2.6918399810791014
epoch: 29 training_loss 2.526381740570068 test_loss: 2.623476028442383
epoch: 30 training_loss 2.5858392214775083 test_loss: 2.5615074157714846
epoch: 31 training_loss 2.5471379160881042 test_loss: 2.62984676361084
epoch: 32 training_loss 2.4675103664398192 test_loss: 2.568368148803711
epoch: 33 training_loss 2.4130648517608644 test_loss: 2.4854103088378907
epoch: 34 training_loss 2.425574119091034 test_loss: 2.489015579223633
epoch: 35 training_loss 2.378148468732834 test_loss: 2.4683319091796876
epoch: 36 training_loss 2.3431878876686096 test_loss: 2.4443984985351563
epoch: 37 training_loss 2.276849652528763 test_loss: 2.443122673034668
epoch: 38 training_loss 2.2890643203258514 test_loss: 2.369070625305176
epoch: 39 training_loss 2.2313320314884186 test_loss: 2.3697879791259764
epoch: 40 training_loss 2.248003191947937 test_loss: 2.292413520812988
epoch: 41 training_loss 2.1729811358451845 test_loss: 2.2998300552368165
epoch: 42 training_loss 2.1885372579097746 test_loss: 2.3129804611206053
epoch: 43 training_loss 2.18655868768692 test_loss: 2.2639528274536134
epoch: 44 training_loss 2.1321752762794492 test_loss: 2.2116977691650392
epoch: 45 training_loss 2.111678830385208 test_loss: 2.1956655502319338
epoch: 46 training_loss 2.075453839302063 test_loss: 2.2210660934448243
epoch: 47 training_loss 2.075703146457672 test_loss: 2.144577407836914
epoch: 48 training_loss 2.096557809114456 test_loss: 2.1737594604492188
epoch: 49 training_loss 2.0621459555625914 test_loss: 2.1509353637695314
epoch: 50 training_loss 2.050182240009308 test_loss: 2.115670394897461
epoch: 51 training_loss 2.0088610422611235 test_loss: 2.0477108001708983
epoch: 52 training_loss 2.0196192252635954 test_loss: 2.0823286056518553
epoch: 53 training_loss 1.9596787798404693 test_loss: 2.070084571838379
epoch: 54 training_loss 1.9958239686489105 test_loss: 2.0638168334960936
epoch: 55 training_loss 1.9825361251831055 test_loss: 2.03027458190918
epoch: 56 training_loss 1.9731823682785035 test_loss: 1.9973430633544922
epoch: 57 training_loss 1.9803927206993104 test_loss: 1.9943992614746093
epoch: 58 training_loss 1.9359845912456513 test_loss: 2.030460166931152
epoch: 59 training_loss 1.8884026098251343 test_loss: 1.9740329742431642
epoch: 60 training_loss 1.9289514052867889 test_loss: 2.001555633544922
epoch: 61 training_loss 1.8672240281105041 test_loss: 1.9839778900146485
epoch: 62 training_loss 1.9036898577213288 test_loss: 1.953714370727539
epoch: 63 training_loss 1.877309285402298 test_loss: 1.947892189025879
epoch: 64 training_loss 1.8690822958946227 test_loss: 1.952671432495117
epoch: 65 training_loss 1.8491829967498778 test_loss: 1.9204166412353516
epoch: 66 training_loss 1.867361115217209 test_loss: 1.9502771377563477
epoch: 67 training_loss 1.845041093826294 test_loss: 1.9189744949340821
epoch: 68 training_loss 1.85200621843338 test_loss: 1.8750398635864258
epoch: 69 training_loss 1.857320818901062 test_loss: 1.9755746841430664
epoch: 70 training_loss 1.820264788866043 test_loss: 1.901316261291504
epoch: 71 training_loss 1.8096987235546111 test_loss: 1.8695615768432616
epoch: 72 training_loss 1.7715759706497192 test_loss: 1.8790260314941407
epoch: 73 training_loss 1.7883869314193725 test_loss: 1.83863525390625
epoch: 74 training_loss 1.7921844804286957 test_loss: 1.8465173721313477
epoch: 75 training_loss 1.753939793109894 test_loss: 1.8530134201049804
epoch: 76 training_loss 1.80478102684021 test_loss: 1.790480613708496
epoch: 77 training_loss 1.7473336970806121 test_loss: 1.8285186767578125
epoch: 78 training_loss 1.7591077363491059 test_loss: 1.818043327331543
epoch: 79 training_loss 1.7372062373161317 test_loss: 1.8125455856323243
epoch: 80 training_loss 1.7643200767040252 test_loss: 1.7812765121459961
epoch: 81 training_loss 1.7427005612850188 test_loss: 1.827413558959961
epoch: 82 training_loss 1.729570941925049 test_loss: 1.7879579544067383
epoch: 83 training_loss 1.7269696974754334 test_loss: 1.7765403747558595
epoch: 84 training_loss 1.7249999010562898 test_loss: 1.8098789215087892
epoch: 85 training_loss 1.7384188771247864 test_loss: 1.8285980224609375
epoch: 86 training_loss 1.7366078877449036 test_loss: 1.7699926376342774
epoch: 87 training_loss 1.672297133207321 test_loss: 1.7913400650024414
epoch: 88 training_loss 1.6844520330429078 test_loss: 1.7774946212768554
epoch: 89 training_loss 1.751778209209442 test_loss: 1.7690353393554688
epoch: 90 training_loss 1.6730056405067444 test_loss: 1.7638906478881835
epoch: 91 training_loss 1.6721648180484772 test_loss: 1.7659004211425782
epoch: 92 training_loss 1.6690859341621398 test_loss: 1.7089199066162108
epoch: 93 training_loss 1.681810644865036 test_loss: 1.755894088745117
epoch: 94 training_loss 1.6755866169929505 test_loss: 1.730723762512207
epoch: 95 training_loss 1.6695818436145782 test_loss: 1.7300148010253906
epoch: 96 training_loss 1.6296302163600922 test_loss: 1.7598621368408203
epoch: 97 training_loss 1.655618290901184 test_loss: 1.7154033660888672
epoch: 98 training_loss 1.6452915513515471 test_loss: 1.7229087829589844
epoch: 99 training_loss 1.6481746542453766 test_loss: 1.7118356704711915
epoch: 100 training_loss 1.6287173295021058 test_loss: 1.6787527084350586
epoch: 101 training_loss 1.645605217218399 test_loss: 1.6783885955810547
epoch: 102 training_loss 1.5951184689998628 test_loss: 1.7484508514404298
epoch: 103 training_loss 1.6482533347606658 test_loss: 1.6706878662109375
epoch: 104 training_loss 1.632194709777832 test_loss: 1.738077163696289
epoch: 105 training_loss 1.6016721034049988 test_loss: 1.6609012603759765
epoch: 106 training_loss 1.5873503541946412 test_loss: 1.6804994583129882
epoch: 107 training_loss 1.6310074472427367 test_loss: 1.6726974487304687
epoch: 108 training_loss 1.6023152327537538 test_loss: 1.6444791793823241
epoch: 109 training_loss 1.6015388786792755 test_loss: 1.6660953521728517
epoch: 110 training_loss 1.5790206325054168 test_loss: 1.612571907043457
epoch: 111 training_loss 1.5743560445308686 test_loss: 1.6699460983276366
epoch: 112 training_loss 1.605352759361267 test_loss: 1.6640098571777344
epoch: 113 training_loss 1.5671375644207002 test_loss: 1.6762699127197265
epoch: 114 training_loss 1.5690293049812316 test_loss: 1.6080593109130858
epoch: 115 training_loss 1.5825067818164826 test_loss: 1.6109230041503906
epoch: 116 training_loss 1.5771626472473144 test_loss: 1.6366386413574219
epoch: 117 training_loss 1.5487032115459443 test_loss: 1.6495336532592773
epoch: 118 training_loss 1.5393410170078277 test_loss: 1.6084199905395509
epoch: 119 training_loss 1.5640908110141754 test_loss: 1.6542957305908204
epoch: 120 training_loss 1.5594719707965852 test_loss: 1.6382644653320313
epoch: 121 training_loss 1.567408790588379 test_loss: 1.5919414520263673
epoch: 122 training_loss 1.5474111437797546 test_loss: 1.590816593170166
epoch: 123 training_loss 1.5091417813301087 test_loss: 1.6271163940429687
epoch: 124 training_loss 1.5452781414985657 test_loss: 1.5877040863037108
epoch: 125 training_loss 1.5261914670467376 test_loss: 1.620804786682129
epoch: 126 training_loss 1.548392150402069 test_loss: 1.6285724639892578
epoch: 127 training_loss 1.5477073192596436 test_loss: 1.6009958267211915
epoch: 128 training_loss 1.5258403635025024 test_loss: 1.6065118789672852
epoch: 129 training_loss 1.5239218068122864 test_loss: 1.6553218841552735
epoch: 130 training_loss 1.5352711379528046 test_loss: 1.5932899475097657
epoch: 131 training_loss 1.497014183998108 test_loss: 1.593042755126953
epoch: 132 training_loss 1.5363578188419342 test_loss: 1.5975668907165528
epoch: 133 training_loss 1.5120964276790618 test_loss: 1.6091043472290039
epoch: 134 training_loss 1.5222903096675873 test_loss: 1.5904120445251464
epoch: 135 training_loss 1.5097138786315918 test_loss: 1.5959245681762695
epoch: 136 training_loss 1.5146899950504302 test_loss: 1.5958014488220216
epoch: 137 training_loss 1.5207970285415648 test_loss: 1.566019058227539
epoch: 138 training_loss 1.4763138675689698 test_loss: 1.5442442893981934
epoch: 139 training_loss 1.510266683101654 test_loss: 1.588277244567871
epoch: 140 training_loss 1.4832419514656068 test_loss: 1.5907764434814453
epoch: 141 training_loss 1.512111122608185 test_loss: 1.561690330505371
epoch: 142 training_loss 1.510957202911377 test_loss: 1.5386496543884278
epoch: 143 training_loss 1.47395028591156 test_loss: 1.603624153137207
epoch: 144 training_loss 1.4825955617427826 test_loss: 1.5786750793457032
epoch: 145 training_loss 1.4560819220542909 test_loss: 1.5274264335632324
epoch: 146 training_loss 1.4911189115047454 test_loss: 1.5784494400024414
epoch: 147 training_loss 1.4893801975250245 test_loss: 1.563788890838623
epoch: 148 training_loss 1.4875087940692902 test_loss: 1.5664019584655762
epoch: 149 training_loss 1.4858686661720275 test_loss: 1.55670166015625
5048.651196683487
episode: 0 training return: tensor(-201.4814, device='cuda:0')
episode: 1 training return: tensor(-175.0028, device='cuda:0')
episode: 2 training return: tensor(-145.1420, device='cuda:0')
episode: 3 training return: tensor(15.6913, device='cuda:0')
epoch: 1 test_true_pfm: 5101.889773986091 sim_pfm: 33.785332941799425
episode: 4 training return: tensor(-159.0404, device='cuda:0')
episode: 5 training return: tensor(-177.6407, device='cuda:0')
episode: 6 training return: tensor(-224.6322, device='cuda:0')
episode: 7 training return: tensor(-40.8345, device='cuda:0')
epoch: 2 test_true_pfm: 5013.582995313991 sim_pfm: -7.99209708479854
episode: 8 training return: tensor(-20.0053, device='cuda:0')
episode: 9 training return: tensor(-218.3684, device='cuda:0')
episode: 10 training return: tensor(-75.4684, device='cuda:0')
episode: 11 training return: tensor(-94.4905, device='cuda:0')
epoch: 3 test_true_pfm: 5169.010630570321 sim_pfm: -23.835102282619726
episode: 12 training return: tensor(13.0795, device='cuda:0')
episode: 13 training return: tensor(-153.7135, device='cuda:0')
episode: 14 training return: tensor(-44.6913, device='cuda:0')
episode: 15 training return: tensor(-13.8291, device='cuda:0')
epoch: 4 test_true_pfm: 5132.605497842451 sim_pfm: -48.907671249461906
episode: 16 training return: tensor(13.9687, device='cuda:0')
episode: 17 training return: tensor(-34.1755, device='cuda:0')
episode: 18 training return: tensor(60.4175, device='cuda:0')
episode: 19 training return: tensor(-80.1638, device='cuda:0')
epoch: 5 test_true_pfm: 5124.432080809807 sim_pfm: 23.94406105178253
episode: 20 training return: tensor(-176.8017, device='cuda:0')
episode: 21 training return: tensor(-189.2657, device='cuda:0')
episode: 22 training return: tensor(-234.5687, device='cuda:0')
episode: 23 training return: tensor(-113.0309, device='cuda:0')
epoch: 6 test_true_pfm: 5101.75382992657 sim_pfm: -36.18657653470291
episode: 24 training return: tensor(24.3460, device='cuda:0')
episode: 25 training return: tensor(-220.8632, device='cuda:0')
episode: 26 training return: tensor(51.1858, device='cuda:0')
episode: 27 training return: tensor(-59.2327, device='cuda:0')
epoch: 7 test_true_pfm: 5137.186681553118 sim_pfm: -3.2869533679331653
episode: 28 training return: tensor(-86.6623, device='cuda:0')
episode: 29 training return: tensor(83.0015, device='cuda:0')
episode: 30 training return: tensor(-8.7683, device='cuda:0')
episode: 31 training return: tensor(-162.0043, device='cuda:0')
epoch: 8 test_true_pfm: 5148.72384556353 sim_pfm: -17.19504806007414
episode: 32 training return: tensor(-53.4803, device='cuda:0')
episode: 33 training return: tensor(-172.0601, device='cuda:0')
episode: 34 training return: tensor(-318.1574, device='cuda:0')
episode: 35 training return: tensor(115.5954, device='cuda:0')
epoch: 9 test_true_pfm: 5207.455167862339 sim_pfm: 87.63534905090152
episode: 36 training return: tensor(27.1365, device='cuda:0')
episode: 37 training return: tensor(89.9636, device='cuda:0')
episode: 38 training return: tensor(-144.3017, device='cuda:0')
episode: 39 training return: tensor(58.5940, device='cuda:0')
epoch: 10 test_true_pfm: 5091.583474658327 sim_pfm: -21.857046071536995
episode: 40 training return: tensor(-128.2657, device='cuda:0')
episode: 41 training return: tensor(140.9323, device='cuda:0')
episode: 42 training return: tensor(-65.7838, device='cuda:0')
episode: 43 training return: tensor(45.2963, device='cuda:0')
epoch: 11 test_true_pfm: 5305.005480893644 sim_pfm: 113.5338782526087
episode: 44 training return: tensor(14.3259, device='cuda:0')
episode: 45 training return: tensor(-112.3196, device='cuda:0')
episode: 46 training return: tensor(108.1539, device='cuda:0')
episode: 47 training return: tensor(64.4618, device='cuda:0')
epoch: 12 test_true_pfm: 5101.643375535471 sim_pfm: 116.63591882966769
episode: 48 training return: tensor(-60.8206, device='cuda:0')
episode: 49 training return: tensor(15.0883, device='cuda:0')
episode: 50 training return: tensor(-5.6930, device='cuda:0')
episode: 51 training return: tensor(15.1437, device='cuda:0')
epoch: 13 test_true_pfm: 5190.455591283834 sim_pfm: 84.41161880206589
episode: 52 training return: tensor(-46.3125, device='cuda:0')
episode: 53 training return: tensor(-88.1329, device='cuda:0')
episode: 54 training return: tensor(22.3758, device='cuda:0')
episode: 55 training return: tensor(100.2298, device='cuda:0')
epoch: 14 test_true_pfm: 5175.696322631492 sim_pfm: 133.54360729498634
episode: 56 training return: tensor(-41.7884, device='cuda:0')
episode: 57 training return: tensor(27.5779, device='cuda:0')
episode: 58 training return: tensor(-8.6083, device='cuda:0')
episode: 59 training return: tensor(-130.7226, device='cuda:0')
epoch: 15 test_true_pfm: 5136.987048580064 sim_pfm: 101.70246917370241
episode: 60 training return: tensor(-142.5447, device='cuda:0')
episode: 61 training return: tensor(-45.1012, device='cuda:0')
episode: 62 training return: tensor(-25.9459, device='cuda:0')
episode: 63 training return: tensor(32.6025, device='cuda:0')
epoch: 16 test_true_pfm: 5221.660175542048 sim_pfm: 110.79621192567477
episode: 64 training return: tensor(-273.2230, device='cuda:0')
episode: 65 training return: tensor(-98.7850, device='cuda:0')
episode: 66 training return: tensor(109.5407, device='cuda:0')
episode: 67 training return: tensor(-2.3875, device='cuda:0')
epoch: 17 test_true_pfm: 5212.115161711679 sim_pfm: 92.09016591724746
episode: 68 training return: tensor(-75.9551, device='cuda:0')
episode: 69 training return: tensor(-181.5320, device='cuda:0')
episode: 70 training return: tensor(-11.1467, device='cuda:0')
episode: 71 training return: tensor(-142.3205, device='cuda:0')
epoch: 18 test_true_pfm: 5180.414141478569 sim_pfm: 217.498373478457
episode: 72 training return: tensor(-67.3024, device='cuda:0')
episode: 73 training return: tensor(-191.1879, device='cuda:0')
episode: 74 training return: tensor(24.9005, device='cuda:0')
episode: 75 training return: tensor(20.6504, device='cuda:0')
epoch: 19 test_true_pfm: 5337.586305704611 sim_pfm: 27.969907724841807
episode: 76 training return: tensor(-80.7902, device='cuda:0')
episode: 77 training return: tensor(-135.6923, device='cuda:0')
episode: 78 training return: tensor(0.7095, device='cuda:0')
episode: 79 training return: tensor(-30.9320, device='cuda:0')
epoch: 20 test_true_pfm: 5358.424025019337 sim_pfm: 183.58418193171383
episode: 80 training return: tensor(-110.7882, device='cuda:0')
episode: 81 training return: tensor(68.2390, device='cuda:0')
episode: 82 training return: tensor(-141.5346, device='cuda:0')
episode: 83 training return: tensor(-58.3980, device='cuda:0')
epoch: 21 test_true_pfm: 5381.952447008898 sim_pfm: 178.80986301600933
episode: 84 training return: tensor(-28.2790, device='cuda:0')
episode: 85 training return: tensor(34.6367, device='cuda:0')
episode: 86 training return: tensor(-71.4796, device='cuda:0')
episode: 87 training return: tensor(182.0091, device='cuda:0')
epoch: 22 test_true_pfm: 5234.054370070066 sim_pfm: 159.20487147783084
episode: 88 training return: tensor(123.4039, device='cuda:0')
episode: 89 training return: tensor(182.9449, device='cuda:0')
episode: 90 training return: tensor(206.8790, device='cuda:0')
episode: 91 training return: tensor(-69.3795, device='cuda:0')
epoch: 23 test_true_pfm: 5302.6651141010525 sim_pfm: 143.80527630413417
episode: 92 training return: tensor(19.6727, device='cuda:0')
episode: 93 training return: tensor(-63.5881, device='cuda:0')
episode: 94 training return: tensor(79.1475, device='cuda:0')
episode: 95 training return: tensor(181.6031, device='cuda:0')
epoch: 24 test_true_pfm: 5283.139480646089 sim_pfm: 250.03908578834185
episode: 96 training return: tensor(192.9635, device='cuda:0')
episode: 97 training return: tensor(-6.5695, device='cuda:0')
episode: 98 training return: tensor(-29.7110, device='cuda:0')
episode: 99 training return: tensor(74.8982, device='cuda:0')
epoch: 25 test_true_pfm: 5297.975351926006 sim_pfm: 168.63298680406297
episode: 100 training return: tensor(45.0338, device='cuda:0')
episode: 101 training return: tensor(80.2641, device='cuda:0')
episode: 102 training return: tensor(-18.1974, device='cuda:0')
episode: 103 training return: tensor(168.7167, device='cuda:0')
epoch: 26 test_true_pfm: 5444.960337446691 sim_pfm: 190.93973107884327
episode: 104 training return: tensor(-86.5932, device='cuda:0')
episode: 105 training return: tensor(50.3326, device='cuda:0')
episode: 106 training return: tensor(192.2333, device='cuda:0')
episode: 107 training return: tensor(46.4580, device='cuda:0')
epoch: 27 test_true_pfm: 5346.038390457371 sim_pfm: 138.91677516380636
episode: 108 training return: tensor(1.5683, device='cuda:0')
episode: 109 training return: tensor(165.3959, device='cuda:0')
episode: 110 training return: tensor(158.8273, device='cuda:0')
episode: 111 training return: tensor(339.4579, device='cuda:0')
epoch: 28 test_true_pfm: 5342.954544862398 sim_pfm: 208.7804680344901
episode: 112 training return: tensor(168.4229, device='cuda:0')
episode: 113 training return: tensor(285.9249, device='cuda:0')
episode: 114 training return: tensor(209.3433, device='cuda:0')
episode: 115 training return: tensor(245.9017, device='cuda:0')
epoch: 29 test_true_pfm: 5476.55065273803 sim_pfm: 160.76572963239354
episode: 116 training return: tensor(105.2605, device='cuda:0')
episode: 117 training return: tensor(247.5275, device='cuda:0')
episode: 118 training return: tensor(235.9491, device='cuda:0')
episode: 119 training return: tensor(281.8825, device='cuda:0')
epoch: 30 test_true_pfm: 5369.972347481969 sim_pfm: 195.73927826435343
episode: 120 training return: tensor(67.3698, device='cuda:0')
episode: 121 training return: tensor(210.3297, device='cuda:0')
episode: 122 training return: tensor(294.4551, device='cuda:0')
episode: 123 training return: tensor(327.5523, device='cuda:0')
epoch: 31 test_true_pfm: 5366.953682443214 sim_pfm: 278.6889040255143
episode: 124 training return: tensor(-8.0203, device='cuda:0')
episode: 125 training return: tensor(173.9516, device='cuda:0')
episode: 126 training return: tensor(206.2269, device='cuda:0')
episode: 127 training return: tensor(-15.9609, device='cuda:0')
epoch: 32 test_true_pfm: 5488.83724470048 sim_pfm: 420.16962613140157
episode: 128 training return: tensor(187.7509, device='cuda:0')
episode: 129 training return: tensor(-101.3916, device='cuda:0')
episode: 130 training return: tensor(188.4642, device='cuda:0')
episode: 131 training return: tensor(18.7023, device='cuda:0')
epoch: 33 test_true_pfm: 5463.856822212551 sim_pfm: 196.0258592767641
episode: 132 training return: tensor(169.5858, device='cuda:0')
episode: 133 training return: tensor(86.5725, device='cuda:0')
episode: 134 training return: tensor(234.4069, device='cuda:0')
episode: 135 training return: tensor(-9.7575, device='cuda:0')
epoch: 34 test_true_pfm: 5422.240937355276 sim_pfm: 356.71095183549915
episode: 136 training return: tensor(174.3145, device='cuda:0')
episode: 137 training return: tensor(140.2444, device='cuda:0')
episode: 138 training return: tensor(66.4643, device='cuda:0')
episode: 139 training return: tensor(156.6505, device='cuda:0')
epoch: 35 test_true_pfm: 5545.2799951348325 sim_pfm: 305.259827107812
episode: 140 training return: tensor(201.9909, device='cuda:0')
episode: 141 training return: tensor(162.1461, device='cuda:0')
episode: 142 training return: tensor(169.5833, device='cuda:0')
episode: 143 training return: tensor(175.9554, device='cuda:0')
epoch: 36 test_true_pfm: 5436.662871587441 sim_pfm: 260.8148311914022
episode: 144 training return: tensor(197.9120, device='cuda:0')
episode: 145 training return: tensor(48.3019, device='cuda:0')
episode: 146 training return: tensor(30.8913, device='cuda:0')
episode: 147 training return: tensor(133.0381, device='cuda:0')
epoch: 37 test_true_pfm: 5484.534819793702 sim_pfm: 301.4242730004286
episode: 148 training return: tensor(52.8551, device='cuda:0')
episode: 149 training return: tensor(219.7635, device='cuda:0')
episode: 150 training return: tensor(268.7662, device='cuda:0')
episode: 151 training return: tensor(124.1886, device='cuda:0')
epoch: 38 test_true_pfm: 5390.529307221783 sim_pfm: 304.3671890425806
episode: 152 training return: tensor(215.9772, device='cuda:0')
episode: 153 training return: tensor(160.8048, device='cuda:0')
episode: 154 training return: tensor(215.5813, device='cuda:0')
episode: 155 training return: tensor(118.8598, device='cuda:0')
epoch: 39 test_true_pfm: 5451.2075122462165 sim_pfm: 288.5674191967507
episode: 156 training return: tensor(284.8296, device='cuda:0')
episode: 157 training return: tensor(193.4197, device='cuda:0')
episode: 158 training return: tensor(160.9904, device='cuda:0')
episode: 159 training return: tensor(180.1523, device='cuda:0')
epoch: 40 test_true_pfm: 5509.458509197194 sim_pfm: 348.4623378038911
episode: 160 training return: tensor(332.5878, device='cuda:0')
episode: 161 training return: tensor(317.9420, device='cuda:0')
episode: 162 training return: tensor(195.2762, device='cuda:0')
episode: 163 training return: tensor(211.1169, device='cuda:0')
epoch: 41 test_true_pfm: 5460.6895577539535 sim_pfm: 312.3376316370753
episode: 164 training return: tensor(223.2336, device='cuda:0')
episode: 165 training return: tensor(112.7269, device='cuda:0')
episode: 166 training return: tensor(187.6577, device='cuda:0')
episode: 167 training return: tensor(-14.9519, device='cuda:0')
epoch: 42 test_true_pfm: 5427.6815294063135 sim_pfm: 269.7405404216067
episode: 168 training return: tensor(155.0757, device='cuda:0')
episode: 169 training return: tensor(137.3624, device='cuda:0')
episode: 170 training return: tensor(245.9348, device='cuda:0')
episode: 171 training return: tensor(283.4474, device='cuda:0')
epoch: 43 test_true_pfm: 5497.824526170162 sim_pfm: 254.24652011750732
episode: 172 training return: tensor(153.4342, device='cuda:0')
episode: 173 training return: tensor(224.0898, device='cuda:0')
episode: 174 training return: tensor(238.9532, device='cuda:0')
episode: 175 training return: tensor(223.0848, device='cuda:0')
epoch: 44 test_true_pfm: 5549.1385835024075 sim_pfm: 277.03444125072565
episode: 176 training return: tensor(220.7079, device='cuda:0')
episode: 177 training return: tensor(281.5189, device='cuda:0')
episode: 178 training return: tensor(179.7847, device='cuda:0')
episode: 179 training return: tensor(189.0115, device='cuda:0')
epoch: 45 test_true_pfm: 5531.774569187277 sim_pfm: 397.8198191005504
episode: 180 training return: tensor(136.4135, device='cuda:0')
episode: 181 training return: tensor(388.1552, device='cuda:0')
episode: 182 training return: tensor(208.5958, device='cuda:0')
episode: 183 training return: tensor(229.8568, device='cuda:0')
epoch: 46 test_true_pfm: 5580.773032729805 sim_pfm: 342.7731471983134
episode: 184 training return: tensor(93.4085, device='cuda:0')
episode: 185 training return: tensor(161.6564, device='cuda:0')
episode: 186 training return: tensor(278.3517, device='cuda:0')
episode: 187 training return: tensor(196.8475, device='cuda:0')
epoch: 47 test_true_pfm: 5460.013220232825 sim_pfm: 334.99810466505005
episode: 188 training return: tensor(143.4978, device='cuda:0')
episode: 189 training return: tensor(201.4233, device='cuda:0')
episode: 190 training return: tensor(298.3151, device='cuda:0')
episode: 191 training return: tensor(66.5104, device='cuda:0')
epoch: 48 test_true_pfm: 5487.499529610116 sim_pfm: 326.3652225527524
episode: 192 training return: tensor(319.0586, device='cuda:0')
episode: 193 training return: tensor(233.3181, device='cuda:0')
episode: 194 training return: tensor(204.2242, device='cuda:0')
episode: 195 training return: tensor(254.7522, device='cuda:0')
epoch: 49 test_true_pfm: 5496.144660619345 sim_pfm: 360.6883019065717
episode: 196 training return: tensor(299.1170, device='cuda:0')
episode: 197 training return: tensor(190.9456, device='cuda:0')
episode: 198 training return: tensor(59.8937, device='cuda:0')
episode: 199 training return: tensor(275.7089, device='cuda:0')
epoch: 50 test_true_pfm: 5475.866593334214 sim_pfm: 325.82034372730413
episode: 200 training return: tensor(226.4307, device='cuda:0')
episode: 201 training return: tensor(304.5281, device='cuda:0')
episode: 202 training return: tensor(221.9385, device='cuda:0')
episode: 203 training return: tensor(346.6679, device='cuda:0')
epoch: 51 test_true_pfm: 5602.520013575879 sim_pfm: 300.95977070897544
episode: 204 training return: tensor(155.3561, device='cuda:0')
episode: 205 training return: tensor(158.1056, device='cuda:0')
episode: 206 training return: tensor(345.1244, device='cuda:0')
episode: 207 training return: tensor(224.3653, device='cuda:0')
epoch: 52 test_true_pfm: 5507.455818465253 sim_pfm: 317.99360535974847
episode: 208 training return: tensor(302.5336, device='cuda:0')
episode: 209 training return: tensor(286.0782, device='cuda:0')
episode: 210 training return: tensor(323.5521, device='cuda:0')
episode: 211 training return: tensor(357.3278, device='cuda:0')
epoch: 53 test_true_pfm: 5536.723821474181 sim_pfm: 339.6440948059317
episode: 212 training return: tensor(220.0787, device='cuda:0')
episode: 213 training return: tensor(222.0654, device='cuda:0')
episode: 214 training return: tensor(225.4606, device='cuda:0')
episode: 215 training return: tensor(262.5275, device='cuda:0')
epoch: 54 test_true_pfm: 5555.453224293779 sim_pfm: 324.3541730187523
episode: 216 training return: tensor(250.9498, device='cuda:0')
episode: 217 training return: tensor(247.6471, device='cuda:0')
episode: 218 training return: tensor(281.0645, device='cuda:0')
episode: 219 training return: tensor(186.4408, device='cuda:0')
epoch: 55 test_true_pfm: 5573.372276582063 sim_pfm: 365.8992942175828
episode: 220 training return: tensor(229.1389, device='cuda:0')
episode: 221 training return: tensor(268.7321, device='cuda:0')
episode: 222 training return: tensor(388.3371, device='cuda:0')
episode: 223 training return: tensor(238.2189, device='cuda:0')
epoch: 56 test_true_pfm: 5464.135860859235 sim_pfm: 317.07600784503546
episode: 224 training return: tensor(265.5951, device='cuda:0')
episode: 225 training return: tensor(256.2149, device='cuda:0')
episode: 226 training return: tensor(205.8345, device='cuda:0')
episode: 227 training return: tensor(365.4311, device='cuda:0')
epoch: 57 test_true_pfm: 5564.436781889417 sim_pfm: 385.4741407225956
episode: 228 training return: tensor(275.1312, device='cuda:0')
episode: 229 training return: tensor(238.5848, device='cuda:0')
episode: 230 training return: tensor(244.3819, device='cuda:0')
episode: 231 training return: tensor(370.8972, device='cuda:0')
epoch: 58 test_true_pfm: 5565.219042137484 sim_pfm: 413.5232406712603
episode: 232 training return: tensor(309.8226, device='cuda:0')
episode: 233 training return: tensor(217.0061, device='cuda:0')
episode: 234 training return: tensor(320.4798, device='cuda:0')
episode: 235 training return: tensor(219.1190, device='cuda:0')
epoch: 59 test_true_pfm: 5582.764238838619 sim_pfm: 374.48357328475686
episode: 236 training return: tensor(236.3866, device='cuda:0')
episode: 237 training return: tensor(59.0255, device='cuda:0')
episode: 238 training return: tensor(284.9798, device='cuda:0')
episode: 239 training return: tensor(212.3687, device='cuda:0')
epoch: 60 test_true_pfm: 5600.794061256864 sim_pfm: 426.4573523593717
episode: 240 training return: tensor(276.8852, device='cuda:0')
episode: 241 training return: tensor(250.7192, device='cuda:0')
episode: 242 training return: tensor(285.4779, device='cuda:0')
episode: 243 training return: tensor(309.1397, device='cuda:0')
epoch: 61 test_true_pfm: 5569.739841880703 sim_pfm: 521.3721044004196
episode: 244 training return: tensor(231.7003, device='cuda:0')
episode: 245 training return: tensor(356.6094, device='cuda:0')
episode: 246 training return: tensor(129.4865, device='cuda:0')
episode: 247 training return: tensor(225.7635, device='cuda:0')
epoch: 62 test_true_pfm: 5578.020050231576 sim_pfm: 439.4248485278028
episode: 248 training return: tensor(405.7416, device='cuda:0')
episode: 249 training return: tensor(307.5541, device='cuda:0')
episode: 250 training return: tensor(394.2409, device='cuda:0')
episode: 251 training return: tensor(239.8443, device='cuda:0')
epoch: 63 test_true_pfm: 5570.254963439933 sim_pfm: 406.3168590149532
episode: 252 training return: tensor(423.6734, device='cuda:0')
episode: 253 training return: tensor(406.0745, device='cuda:0')
episode: 254 training return: tensor(184.3577, device='cuda:0')
episode: 255 training return: tensor(329.2290, device='cuda:0')
epoch: 64 test_true_pfm: 5604.439141915357 sim_pfm: 448.16490634635556
episode: 256 training return: tensor(233.3243, device='cuda:0')
episode: 257 training return: tensor(421.4684, device='cuda:0')
episode: 258 training return: tensor(158.9774, device='cuda:0')
episode: 259 training return: tensor(297.8701, device='cuda:0')
epoch: 65 test_true_pfm: 5658.5856197105995 sim_pfm: 482.8901862585335
episode: 260 training return: tensor(318.7144, device='cuda:0')
episode: 261 training return: tensor(261.9334, device='cuda:0')
episode: 262 training return: tensor(359.9695, device='cuda:0')
episode: 263 training return: tensor(243.5703, device='cuda:0')
epoch: 66 test_true_pfm: 5676.760114353224 sim_pfm: 490.9299105433747
episode: 264 training return: tensor(413.1304, device='cuda:0')
episode: 265 training return: tensor(251.9796, device='cuda:0')
episode: 266 training return: tensor(185.4112, device='cuda:0')
episode: 267 training return: tensor(119.4101, device='cuda:0')
epoch: 67 test_true_pfm: 5638.492519633898 sim_pfm: 412.69144452078035
episode: 268 training return: tensor(423.2715, device='cuda:0')
episode: 269 training return: tensor(164.3904, device='cuda:0')
episode: 270 training return: tensor(344.7136, device='cuda:0')
episode: 271 training return: tensor(263.6691, device='cuda:0')
epoch: 68 test_true_pfm: 5614.381160984022 sim_pfm: 460.9067691178061
episode: 272 training return: tensor(349.0245, device='cuda:0')
episode: 273 training return: tensor(307.7562, device='cuda:0')
episode: 274 training return: tensor(390.6151, device='cuda:0')
episode: 275 training return: tensor(358.1473, device='cuda:0')
epoch: 69 test_true_pfm: 5622.288197173584 sim_pfm: 450.9771364370051
episode: 276 training return: tensor(342.9682, device='cuda:0')
episode: 277 training return: tensor(427.7930, device='cuda:0')
episode: 278 training return: tensor(295.0252, device='cuda:0')
episode: 279 training return: tensor(383.6013, device='cuda:0')
epoch: 70 test_true_pfm: 5589.11840601077 sim_pfm: 446.85006745984236
episode: 280 training return: tensor(64.6066, device='cuda:0')
episode: 281 training return: tensor(332.3337, device='cuda:0')
episode: 282 training return: tensor(322.3846, device='cuda:0')
episode: 283 training return: tensor(415.5412, device='cuda:0')
epoch: 71 test_true_pfm: 5715.053719879949 sim_pfm: 380.71695970073534
episode: 284 training return: tensor(368.2596, device='cuda:0')
episode: 285 training return: tensor(303.9056, device='cuda:0')
episode: 286 training return: tensor(366.1656, device='cuda:0')
episode: 287 training return: tensor(320.5580, device='cuda:0')
epoch: 72 test_true_pfm: 5579.851121979936 sim_pfm: 436.40612975766027
episode: 288 training return: tensor(393.2224, device='cuda:0')
episode: 289 training return: tensor(439.0215, device='cuda:0')
episode: 290 training return: tensor(428.5911, device='cuda:0')
episode: 291 training return: tensor(435.9126, device='cuda:0')
epoch: 73 test_true_pfm: 5630.113789593114 sim_pfm: 371.1670683352665
episode: 292 training return: tensor(236.2631, device='cuda:0')
episode: 293 training return: tensor(60.6625, device='cuda:0')
episode: 294 training return: tensor(191.3768, device='cuda:0')
episode: 295 training return: tensor(222.9069, device='cuda:0')
epoch: 74 test_true_pfm: 5650.159278873266 sim_pfm: 394.91021885521087
episode: 296 training return: tensor(383.0706, device='cuda:0')
episode: 297 training return: tensor(252.6030, device='cuda:0')
episode: 298 training return: tensor(318.2613, device='cuda:0')
episode: 299 training return: tensor(270.3189, device='cuda:0')
epoch: 75 test_true_pfm: 5697.116358332091 sim_pfm: 434.9714900650918
episode: 300 training return: tensor(422.9713, device='cuda:0')
episode: 301 training return: tensor(260.4619, device='cuda:0')
episode: 302 training return: tensor(522.6897, device='cuda:0')
episode: 303 training return: tensor(346.4549, device='cuda:0')
epoch: 76 test_true_pfm: 5622.9657749196895 sim_pfm: 458.0281392824448
episode: 304 training return: tensor(168.2174, device='cuda:0')
episode: 305 training return: tensor(350.7503, device='cuda:0')
episode: 306 training return: tensor(330.9413, device='cuda:0')
episode: 307 training return: tensor(457.9485, device='cuda:0')
epoch: 77 test_true_pfm: 5587.551271158033 sim_pfm: 465.50648241622065
episode: 308 training return: tensor(103.7641, device='cuda:0')
episode: 309 training return: tensor(409.5154, device='cuda:0')
episode: 310 training return: tensor(217.7250, device='cuda:0')
episode: 311 training return: tensor(341.9544, device='cuda:0')
epoch: 78 test_true_pfm: 5525.805804845545 sim_pfm: 467.1659242196086
episode: 312 training return: tensor(357.1049, device='cuda:0')
episode: 313 training return: tensor(263.4982, device='cuda:0')
episode: 314 training return: tensor(319.3641, device='cuda:0')
episode: 315 training return: tensor(267.4031, device='cuda:0')
epoch: 79 test_true_pfm: 5543.894143748471 sim_pfm: 451.120922577645
episode: 316 training return: tensor(447.2777, device='cuda:0')
episode: 317 training return: tensor(328.6745, device='cuda:0')
episode: 318 training return: tensor(354.0042, device='cuda:0')
episode: 319 training return: tensor(371.7468, device='cuda:0')
epoch: 80 test_true_pfm: 5673.914058556712 sim_pfm: 379.4613332200291
episode: 320 training return: tensor(304.4502, device='cuda:0')
episode: 321 training return: tensor(264.3315, device='cuda:0')
episode: 322 training return: tensor(296.2828, device='cuda:0')
episode: 323 training return: tensor(155.4694, device='cuda:0')
epoch: 81 test_true_pfm: 5618.860214671885 sim_pfm: 441.91887875677395
episode: 324 training return: tensor(320.1273, device='cuda:0')
episode: 325 training return: tensor(337.9145, device='cuda:0')
episode: 326 training return: tensor(365.9420, device='cuda:0')
episode: 327 training return: tensor(385.3617, device='cuda:0')
epoch: 82 test_true_pfm: 5682.720979175283 sim_pfm: 524.5901178029017
episode: 328 training return: tensor(290.3533, device='cuda:0')
episode: 329 training return: tensor(137.5323, device='cuda:0')
episode: 330 training return: tensor(265.5515, device='cuda:0')
episode: 331 training return: tensor(341.3663, device='cuda:0')
epoch: 83 test_true_pfm: 5629.9523974311305 sim_pfm: 414.17927579502185
episode: 332 training return: tensor(411.2675, device='cuda:0')
episode: 333 training return: tensor(371.9459, device='cuda:0')
episode: 334 training return: tensor(449.8452, device='cuda:0')
episode: 335 training return: tensor(223.0815, device='cuda:0')
epoch: 84 test_true_pfm: 5618.833223935996 sim_pfm: 434.798774292256
episode: 336 training return: tensor(337.6728, device='cuda:0')
episode: 337 training return: tensor(260.0587, device='cuda:0')
episode: 338 training return: tensor(305.8911, device='cuda:0')
episode: 339 training return: tensor(322.0161, device='cuda:0')
epoch: 85 test_true_pfm: 5643.8045325724315 sim_pfm: 389.71359926748363
episode: 340 training return: tensor(228.6301, device='cuda:0')
episode: 341 training return: tensor(333.6387, device='cuda:0')
episode: 342 training return: tensor(449.1710, device='cuda:0')
episode: 343 training return: tensor(336.9849, device='cuda:0')
epoch: 86 test_true_pfm: 5632.042889371092 sim_pfm: 421.29077688554145
episode: 344 training return: tensor(138.5552, device='cuda:0')
episode: 345 training return: tensor(308.0284, device='cuda:0')
episode: 346 training return: tensor(234.5971, device='cuda:0')
episode: 347 training return: tensor(375.6211, device='cuda:0')
epoch: 87 test_true_pfm: 5692.779323217351 sim_pfm: 477.973265619056
episode: 348 training return: tensor(360.9552, device='cuda:0')
episode: 349 training return: tensor(236.8695, device='cuda:0')
episode: 350 training return: tensor(255.7767, device='cuda:0')
episode: 351 training return: tensor(253.9522, device='cuda:0')
epoch: 88 test_true_pfm: 5656.260012977663 sim_pfm: 416.65670092052704
episode: 352 training return: tensor(335.8041, device='cuda:0')
episode: 353 training return: tensor(312.4189, device='cuda:0')
episode: 354 training return: tensor(452.6676, device='cuda:0')
episode: 355 training return: tensor(328.5242, device='cuda:0')
epoch: 89 test_true_pfm: 5626.804292404522 sim_pfm: 467.8757521097238
episode: 356 training return: tensor(353.3359, device='cuda:0')
episode: 357 training return: tensor(331.8423, device='cuda:0')
episode: 358 training return: tensor(507.0191, device='cuda:0')
episode: 359 training return: tensor(432.4238, device='cuda:0')
epoch: 90 test_true_pfm: 5595.547495477294 sim_pfm: 468.3170569234741
episode: 360 training return: tensor(328.8734, device='cuda:0')
episode: 361 training return: tensor(404.8528, device='cuda:0')
episode: 362 training return: tensor(285.9215, device='cuda:0')
episode: 363 training return: tensor(468.9661, device='cuda:0')
epoch: 91 test_true_pfm: 5663.559464451076 sim_pfm: 442.7849670640232
episode: 364 training return: tensor(338.3586, device='cuda:0')
episode: 365 training return: tensor(339.0507, device='cuda:0')
episode: 366 training return: tensor(406.9410, device='cuda:0')
episode: 367 training return: tensor(218.4240, device='cuda:0')
epoch: 92 test_true_pfm: 5648.467195676746 sim_pfm: 424.3338586720638
episode: 368 training return: tensor(443.5499, device='cuda:0')
episode: 369 training return: tensor(254.1787, device='cuda:0')
episode: 370 training return: tensor(437.9390, device='cuda:0')
episode: 371 training return: tensor(453.6152, device='cuda:0')
epoch: 93 test_true_pfm: 5691.999309672597 sim_pfm: 446.9501785817362
episode: 372 training return: tensor(241.1561, device='cuda:0')
episode: 373 training return: tensor(433.4133, device='cuda:0')
episode: 374 training return: tensor(455.2773, device='cuda:0')
episode: 375 training return: tensor(362.4637, device='cuda:0')
epoch: 94 test_true_pfm: 5569.8728928318715 sim_pfm: 442.1109123475617
episode: 376 training return: tensor(382.6988, device='cuda:0')
episode: 377 training return: tensor(208.3310, device='cuda:0')
episode: 378 training return: tensor(391.5685, device='cuda:0')
episode: 379 training return: tensor(403.2255, device='cuda:0')
epoch: 95 test_true_pfm: 5637.030983228717 sim_pfm: 500.3723438831512
episode: 380 training return: tensor(370.9821, device='cuda:0')
episode: 381 training return: tensor(422.1278, device='cuda:0')
episode: 382 training return: tensor(418.9575, device='cuda:0')
episode: 383 training return: tensor(377.8759, device='cuda:0')
epoch: 96 test_true_pfm: 5681.537980928876 sim_pfm: 470.8363674482098
episode: 384 training return: tensor(498.9902, device='cuda:0')
episode: 385 training return: tensor(455.7505, device='cuda:0')
episode: 386 training return: tensor(288.4899, device='cuda:0')
episode: 387 training return: tensor(434.0583, device='cuda:0')
epoch: 97 test_true_pfm: 5689.042191632457 sim_pfm: 456.20156805230846
episode: 388 training return: tensor(243.9012, device='cuda:0')
episode: 389 training return: tensor(373.1330, device='cuda:0')
episode: 390 training return: tensor(403.3081, device='cuda:0')
episode: 391 training return: tensor(329.9623, device='cuda:0')
epoch: 98 test_true_pfm: 5638.189667077503 sim_pfm: 454.1524495257375
episode: 392 training return: tensor(419.6490, device='cuda:0')
episode: 393 training return: tensor(421.5036, device='cuda:0')
episode: 394 training return: tensor(290.9677, device='cuda:0')
episode: 395 training return: tensor(457.1186, device='cuda:0')
epoch: 99 test_true_pfm: 5644.0070616792555 sim_pfm: 490.2498142741776
episode: 396 training return: tensor(405.4927, device='cuda:0')
episode: 397 training return: tensor(232.6357, device='cuda:0')
episode: 398 training return: tensor(535.2787, device='cuda:0')
episode: 399 training return: tensor(370.2377, device='cuda:0')
epoch: 100 test_true_pfm: 5674.39945195756 sim_pfm: 516.4783962380412
episode: 400 training return: tensor(441.1768, device='cuda:0')
episode: 401 training return: tensor(386.7198, device='cuda:0')
episode: 402 training return: tensor(299.0404, device='cuda:0')
episode: 403 training return: tensor(348.7924, device='cuda:0')
epoch: 101 test_true_pfm: 5652.804849677957 sim_pfm: 451.05736413186725
episode: 404 training return: tensor(373.6245, device='cuda:0')
episode: 405 training return: tensor(413.4930, device='cuda:0')
episode: 406 training return: tensor(399.4572, device='cuda:0')
episode: 407 training return: tensor(366.7846, device='cuda:0')
epoch: 102 test_true_pfm: 5717.809448209341 sim_pfm: 531.445210071785
episode: 408 training return: tensor(411.7122, device='cuda:0')
episode: 409 training return: tensor(387.2816, device='cuda:0')
episode: 410 training return: tensor(364.4083, device='cuda:0')
episode: 411 training return: tensor(494.1934, device='cuda:0')
epoch: 103 test_true_pfm: 5567.39000982649 sim_pfm: 430.44872128719
episode: 412 training return: tensor(333.5574, device='cuda:0')
episode: 413 training return: tensor(319.1362, device='cuda:0')
episode: 414 training return: tensor(375.1420, device='cuda:0')
episode: 415 training return: tensor(380.1889, device='cuda:0')
epoch: 104 test_true_pfm: 5672.738958892297 sim_pfm: 440.22951753623784
episode: 416 training return: tensor(323.5900, device='cuda:0')
episode: 417 training return: tensor(444.5327, device='cuda:0')
episode: 418 training return: tensor(430.2106, device='cuda:0')
episode: 419 training return: tensor(278.6573, device='cuda:0')
epoch: 105 test_true_pfm: 5696.2464711016955 sim_pfm: 515.1215695226565
episode: 420 training return: tensor(405.9684, device='cuda:0')
episode: 421 training return: tensor(490.9918, device='cuda:0')
episode: 422 training return: tensor(460.3611, device='cuda:0')
episode: 423 training return: tensor(436.8098, device='cuda:0')
epoch: 106 test_true_pfm: 5641.191492883172 sim_pfm: 454.8290689934899
episode: 424 training return: tensor(215.7169, device='cuda:0')
episode: 425 training return: tensor(398.6983, device='cuda:0')
episode: 426 training return: tensor(312.4577, device='cuda:0')
episode: 427 training return: tensor(372.0373, device='cuda:0')
epoch: 107 test_true_pfm: 5678.609647770837 sim_pfm: 473.2217938084311
episode: 428 training return: tensor(343.2954, device='cuda:0')
episode: 429 training return: tensor(425.4961, device='cuda:0')
episode: 430 training return: tensor(288.2755, device='cuda:0')
episode: 431 training return: tensor(430.5009, device='cuda:0')
epoch: 108 test_true_pfm: 5672.787704312941 sim_pfm: 509.01545586943394
episode: 432 training return: tensor(474.5714, device='cuda:0')
episode: 433 training return: tensor(329.1590, device='cuda:0')
episode: 434 training return: tensor(415.1736, device='cuda:0')
episode: 435 training return: tensor(506.6120, device='cuda:0')
epoch: 109 test_true_pfm: 5728.882480878806 sim_pfm: 463.51395874334656
episode: 436 training return: tensor(191.4159, device='cuda:0')
episode: 437 training return: tensor(311.1245, device='cuda:0')
episode: 438 training return: tensor(481.0416, device='cuda:0')
episode: 439 training return: tensor(459.4236, device='cuda:0')
epoch: 110 test_true_pfm: 5725.832326710969 sim_pfm: 516.5259083954152
episode: 440 training return: tensor(302.5172, device='cuda:0')
episode: 441 training return: tensor(333.0767, device='cuda:0')
episode: 442 training return: tensor(364.5735, device='cuda:0')
episode: 443 training return: tensor(345.6953, device='cuda:0')
epoch: 111 test_true_pfm: 5625.172869623944 sim_pfm: 517.854054977574
episode: 444 training return: tensor(384.1347, device='cuda:0')
episode: 445 training return: tensor(337.6752, device='cuda:0')
episode: 446 training return: tensor(352.5740, device='cuda:0')
episode: 447 training return: tensor(350.7649, device='cuda:0')
epoch: 112 test_true_pfm: 5712.05382644413 sim_pfm: 524.500668266012
episode: 448 training return: tensor(290.0067, device='cuda:0')
episode: 449 training return: tensor(-551.7188, device='cuda:0')
episode: 450 training return: tensor(413.7609, device='cuda:0')
episode: 451 training return: tensor(299.3342, device='cuda:0')
epoch: 113 test_true_pfm: 5746.525077091657 sim_pfm: 505.313143458198
episode: 452 training return: tensor(380.6218, device='cuda:0')
episode: 453 training return: tensor(507.5120, device='cuda:0')
episode: 454 training return: tensor(254.9668, device='cuda:0')
episode: 455 training return: tensor(299.1652, device='cuda:0')
epoch: 114 test_true_pfm: 5716.218871061287 sim_pfm: 428.83270867502625
episode: 456 training return: tensor(255.5292, device='cuda:0')
episode: 457 training return: tensor(403.1627, device='cuda:0')
episode: 458 training return: tensor(347.9088, device='cuda:0')
episode: 459 training return: tensor(490.2910, device='cuda:0')
epoch: 115 test_true_pfm: 5699.25058276089 sim_pfm: 522.1968479518933
episode: 460 training return: tensor(410.3321, device='cuda:0')
episode: 461 training return: tensor(351.5786, device='cuda:0')
episode: 462 training return: tensor(309.5182, device='cuda:0')
episode: 463 training return: tensor(367.0620, device='cuda:0')
epoch: 116 test_true_pfm: 5718.894400089204 sim_pfm: 511.6717291253153
episode: 464 training return: tensor(372.6118, device='cuda:0')
episode: 465 training return: tensor(476.7398, device='cuda:0')
episode: 466 training return: tensor(331.6901, device='cuda:0')
episode: 467 training return: tensor(300.6976, device='cuda:0')
epoch: 117 test_true_pfm: 5661.100807115909 sim_pfm: 514.2545374159721
episode: 468 training return: tensor(306.2789, device='cuda:0')
episode: 469 training return: tensor(496.6955, device='cuda:0')
episode: 470 training return: tensor(363.6730, device='cuda:0')
episode: 471 training return: tensor(396.0026, device='cuda:0')
epoch: 118 test_true_pfm: 5674.652602512174 sim_pfm: 509.2862430022603
episode: 472 training return: tensor(312.9484, device='cuda:0')
episode: 473 training return: tensor(496.5104, device='cuda:0')
episode: 474 training return: tensor(410.3713, device='cuda:0')
episode: 475 training return: tensor(246.5744, device='cuda:0')
epoch: 119 test_true_pfm: 5676.6635418185315 sim_pfm: 462.0052330211426
episode: 476 training return: tensor(425.3500, device='cuda:0')
episode: 477 training return: tensor(539.3807, device='cuda:0')
episode: 478 training return: tensor(272.9363, device='cuda:0')
episode: 479 training return: tensor(354.2401, device='cuda:0')
epoch: 120 test_true_pfm: 5711.828565351166 sim_pfm: 510.24517779351055
episode: 480 training return: tensor(462.2834, device='cuda:0')
episode: 481 training return: tensor(367.0434, device='cuda:0')
episode: 482 training return: tensor(363.2868, device='cuda:0')
episode: 483 training return: tensor(627.6415, device='cuda:0')
epoch: 121 test_true_pfm: 5738.978199235701 sim_pfm: 585.5155421850795
episode: 484 training return: tensor(315.1246, device='cuda:0')
episode: 485 training return: tensor(454.9177, device='cuda:0')
episode: 486 training return: tensor(300.6999, device='cuda:0')
episode: 487 training return: tensor(412.5764, device='cuda:0')
epoch: 122 test_true_pfm: 5602.125309972671 sim_pfm: 440.4964024616056
episode: 488 training return: tensor(385.4182, device='cuda:0')
episode: 489 training return: tensor(444.2026, device='cuda:0')
episode: 490 training return: tensor(235.8351, device='cuda:0')
episode: 491 training return: tensor(508.6194, device='cuda:0')
epoch: 123 test_true_pfm: 5726.069473097142 sim_pfm: 457.7530333795585
episode: 492 training return: tensor(498.1608, device='cuda:0')
episode: 493 training return: tensor(349.8419, device='cuda:0')
episode: 494 training return: tensor(363.2611, device='cuda:0')
episode: 495 training return: tensor(340.5181, device='cuda:0')
epoch: 124 test_true_pfm: 5693.012216179384 sim_pfm: 502.6377324509667
episode: 496 training return: tensor(419.8274, device='cuda:0')
episode: 497 training return: tensor(338.3056, device='cuda:0')
episode: 498 training return: tensor(315.7862, device='cuda:0')
episode: 499 training return: tensor(388.2385, device='cuda:0')
epoch: 125 test_true_pfm: 5781.677530955611 sim_pfm: 449.2551578577065
episode: 500 training return: tensor(349.5258, device='cuda:0')
episode: 501 training return: tensor(275.6874, device='cuda:0')
episode: 502 training return: tensor(365.9659, device='cuda:0')
episode: 503 training return: tensor(292.9417, device='cuda:0')
epoch: 126 test_true_pfm: 5711.269099496309 sim_pfm: 446.84330930688884
episode: 504 training return: tensor(495.6186, device='cuda:0')
episode: 505 training return: tensor(436.0768, device='cuda:0')
episode: 506 training return: tensor(502.6568, device='cuda:0')
episode: 507 training return: tensor(418.6126, device='cuda:0')
epoch: 127 test_true_pfm: 5712.930053575245 sim_pfm: 556.3089031627557
episode: 508 training return: tensor(377.5012, device='cuda:0')
episode: 509 training return: tensor(342.8889, device='cuda:0')
episode: 510 training return: tensor(402.1212, device='cuda:0')
episode: 511 training return: tensor(516.9200, device='cuda:0')
epoch: 128 test_true_pfm: 5635.150638438246 sim_pfm: 489.2458057688394
episode: 512 training return: tensor(494.6483, device='cuda:0')
episode: 513 training return: tensor(415.6836, device='cuda:0')
episode: 514 training return: tensor(366.3260, device='cuda:0')
episode: 515 training return: tensor(418.4047, device='cuda:0')
epoch: 129 test_true_pfm: 5664.072687883277 sim_pfm: 497.7379344486205
episode: 516 training return: tensor(390.3969, device='cuda:0')
episode: 517 training return: tensor(293.8958, device='cuda:0')
episode: 518 training return: tensor(407.2876, device='cuda:0')
episode: 519 training return: tensor(409.5379, device='cuda:0')
epoch: 130 test_true_pfm: 5757.594175196517 sim_pfm: 527.6813135546399
episode: 520 training return: tensor(345.5548, device='cuda:0')
episode: 521 training return: tensor(420.4528, device='cuda:0')
episode: 522 training return: tensor(314.2361, device='cuda:0')
episode: 523 training return: tensor(519.5397, device='cuda:0')
epoch: 131 test_true_pfm: 5705.00871853702 sim_pfm: 573.3244178696186
episode: 524 training return: tensor(437.4893, device='cuda:0')
episode: 525 training return: tensor(293.3594, device='cuda:0')
episode: 526 training return: tensor(394.3827, device='cuda:0')
episode: 527 training return: tensor(379.2274, device='cuda:0')
epoch: 132 test_true_pfm: 5762.5075840619975 sim_pfm: 570.8135323254683
episode: 528 training return: tensor(289.3423, device='cuda:0')
episode: 529 training return: tensor(426.6339, device='cuda:0')
episode: 530 training return: tensor(315.2739, device='cuda:0')
episode: 531 training return: tensor(474.5079, device='cuda:0')
epoch: 133 test_true_pfm: 5674.624794965934 sim_pfm: 564.5401021004654
episode: 532 training return: tensor(354.0640, device='cuda:0')
episode: 533 training return: tensor(329.5511, device='cuda:0')
episode: 534 training return: tensor(350.8877, device='cuda:0')
episode: 535 training return: tensor(436.0251, device='cuda:0')
epoch: 134 test_true_pfm: 5660.336947197582 sim_pfm: 487.78837203027797
episode: 536 training return: tensor(349.5089, device='cuda:0')
episode: 537 training return: tensor(379.2005, device='cuda:0')
episode: 538 training return: tensor(362.7903, device='cuda:0')
episode: 539 training return: tensor(383.0866, device='cuda:0')
epoch: 135 test_true_pfm: 5686.232242423245 sim_pfm: 510.6809031688802
episode: 540 training return: tensor(397.7439, device='cuda:0')
episode: 541 training return: tensor(305.2082, device='cuda:0')
episode: 542 training return: tensor(554.7925, device='cuda:0')
episode: 543 training return: tensor(430.8403, device='cuda:0')
epoch: 136 test_true_pfm: 5741.537926486887 sim_pfm: 557.7339334938442
episode: 544 training return: tensor(329.9288, device='cuda:0')
episode: 545 training return: tensor(502.4181, device='cuda:0')
episode: 546 training return: tensor(328.1299, device='cuda:0')
episode: 547 training return: tensor(375.4371, device='cuda:0')
epoch: 137 test_true_pfm: 5750.548624316284 sim_pfm: 553.6505026550343
episode: 548 training return: tensor(348.1775, device='cuda:0')
episode: 549 training return: tensor(247.9715, device='cuda:0')
episode: 550 training return: tensor(418.4130, device='cuda:0')
episode: 551 training return: tensor(392.5414, device='cuda:0')
epoch: 138 test_true_pfm: 5675.369946650938 sim_pfm: 450.45949919546064
episode: 552 training return: tensor(421.0572, device='cuda:0')
episode: 553 training return: tensor(502.5208, device='cuda:0')
episode: 554 training return: tensor(333.9209, device='cuda:0')
episode: 555 training return: tensor(385.0830, device='cuda:0')
epoch: 139 test_true_pfm: 5758.563631487199 sim_pfm: 503.049201202346
episode: 556 training return: tensor(359.9495, device='cuda:0')
episode: 557 training return: tensor(301.6973, device='cuda:0')
episode: 558 training return: tensor(399.1155, device='cuda:0')
episode: 559 training return: tensor(439.1893, device='cuda:0')
epoch: 140 test_true_pfm: 5707.2548419735795 sim_pfm: 529.6602921232407
episode: 560 training return: tensor(278.7658, device='cuda:0')
episode: 561 training return: tensor(285.5450, device='cuda:0')
episode: 562 training return: tensor(481.1243, device='cuda:0')
episode: 563 training return: tensor(396.0586, device='cuda:0')
epoch: 141 test_true_pfm: 5656.873227502874 sim_pfm: 537.8529694265453
episode: 564 training return: tensor(509.1421, device='cuda:0')
episode: 565 training return: tensor(290.9954, device='cuda:0')
episode: 566 training return: tensor(333.0734, device='cuda:0')
episode: 567 training return: tensor(267.6582, device='cuda:0')
epoch: 142 test_true_pfm: 5827.447761222284 sim_pfm: 559.286644472566
episode: 568 training return: tensor(397.9149, device='cuda:0')
episode: 569 training return: tensor(361.3416, device='cuda:0')
episode: 570 training return: tensor(521.0499, device='cuda:0')
episode: 571 training return: tensor(300.1862, device='cuda:0')
epoch: 143 test_true_pfm: 5800.6842993137225 sim_pfm: 626.8415492698356
episode: 572 training return: tensor(529.0311, device='cuda:0')
episode: 573 training return: tensor(587.6279, device='cuda:0')
episode: 574 training return: tensor(327.6562, device='cuda:0')
episode: 575 training return: tensor(468.2838, device='cuda:0')
epoch: 144 test_true_pfm: 5750.809932505329 sim_pfm: 480.37746859677526
episode: 576 training return: tensor(403.5494, device='cuda:0')
episode: 577 training return: tensor(334.4254, device='cuda:0')
episode: 578 training return: tensor(427.0502, device='cuda:0')
episode: 579 training return: tensor(487.0096, device='cuda:0')
epoch: 145 test_true_pfm: 5713.55996605069 sim_pfm: 549.4739936528495
episode: 580 training return: tensor(284.8498, device='cuda:0')
episode: 581 training return: tensor(371.8832, device='cuda:0')
episode: 582 training return: tensor(420.2087, device='cuda:0')
episode: 583 training return: tensor(400.5637, device='cuda:0')
epoch: 146 test_true_pfm: 5768.529945208993 sim_pfm: 525.104683746545
episode: 584 training return: tensor(441.0695, device='cuda:0')
episode: 585 training return: tensor(421.0941, device='cuda:0')
episode: 586 training return: tensor(234.0883, device='cuda:0')
episode: 587 training return: tensor(385.0534, device='cuda:0')
epoch: 147 test_true_pfm: 5638.145724316288 sim_pfm: 518.483512858899
episode: 588 training return: tensor(458.4788, device='cuda:0')
episode: 589 training return: tensor(431.5320, device='cuda:0')
episode: 590 training return: tensor(248.4790, device='cuda:0')
episode: 591 training return: tensor(363.4001, device='cuda:0')
epoch: 148 test_true_pfm: 5730.055101696203 sim_pfm: 583.8326858057213
episode: 592 training return: tensor(376.4961, device='cuda:0')
episode: 593 training return: tensor(389.1004, device='cuda:0')
episode: 594 training return: tensor(430.0197, device='cuda:0')
episode: 595 training return: tensor(317.7945, device='cuda:0')
epoch: 149 test_true_pfm: 5705.687438462203 sim_pfm: 574.1827102755196
episode: 596 training return: tensor(439.8796, device='cuda:0')
episode: 597 training return: tensor(363.8600, device='cuda:0')
episode: 598 training return: tensor(308.4581, device='cuda:0')
episode: 599 training return: tensor(509.1721, device='cuda:0')
epoch: 150 test_true_pfm: 5649.673366176076 sim_pfm: 558.3891268152898
